polars-df 0.21.0-x86_64-linux-musl → 0.22.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/LICENSE-THIRD-PARTY.txt +23 -49
  6. data/README.md +12 -0
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/3.4/polars.so +0 -0
  10. data/lib/polars/array_expr.rb +382 -3
  11. data/lib/polars/array_name_space.rb +281 -0
  12. data/lib/polars/binary_expr.rb +67 -0
  13. data/lib/polars/binary_name_space.rb +43 -0
  14. data/lib/polars/cat_expr.rb +224 -0
  15. data/lib/polars/cat_name_space.rb +138 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/convert.rb +6 -6
  18. data/lib/polars/data_frame.rb +794 -27
  19. data/lib/polars/data_type_expr.rb +52 -0
  20. data/lib/polars/data_types.rb +26 -5
  21. data/lib/polars/date_time_expr.rb +252 -1
  22. data/lib/polars/date_time_name_space.rb +299 -0
  23. data/lib/polars/expr.rb +1248 -206
  24. data/lib/polars/functions/business.rb +95 -0
  25. data/lib/polars/functions/datatype.rb +21 -0
  26. data/lib/polars/functions/lazy.rb +14 -1
  27. data/lib/polars/io/csv.rb +1 -1
  28. data/lib/polars/io/iceberg.rb +27 -0
  29. data/lib/polars/io/json.rb +4 -4
  30. data/lib/polars/io/ndjson.rb +4 -4
  31. data/lib/polars/io/parquet.rb +32 -7
  32. data/lib/polars/io/scan_options.rb +4 -1
  33. data/lib/polars/lazy_frame.rb +1028 -28
  34. data/lib/polars/list_expr.rb +217 -17
  35. data/lib/polars/list_name_space.rb +231 -22
  36. data/lib/polars/meta_expr.rb +89 -0
  37. data/lib/polars/name_expr.rb +36 -0
  38. data/lib/polars/query_opt_flags.rb +50 -0
  39. data/lib/polars/scan_cast_options.rb +20 -1
  40. data/lib/polars/schema.rb +79 -3
  41. data/lib/polars/selector.rb +72 -0
  42. data/lib/polars/selectors.rb +3 -3
  43. data/lib/polars/series.rb +1053 -54
  44. data/lib/polars/string_expr.rb +436 -32
  45. data/lib/polars/string_name_space.rb +736 -50
  46. data/lib/polars/struct_expr.rb +103 -0
  47. data/lib/polars/struct_name_space.rb +19 -1
  48. data/lib/polars/utils/serde.rb +17 -0
  49. data/lib/polars/utils/various.rb +22 -1
  50. data/lib/polars/utils.rb +5 -1
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +6 -0
  53. metadata +8 -2
@@ -184,86 +184,81 @@ module Polars
184
184
  super
185
185
  end
186
186
 
187
- # Get length of the string values in the Series (as number of bytes).
187
+ # Convert a String column into a Decimal column.
188
188
  #
189
- # @return [Series]
189
+ # This method infers the needed parameters `precision` and `scale`.
190
190
  #
191
- # @note
192
- # The returned lengths are equal to the number of bytes in the UTF8 string. If you
193
- # need the length in terms of the number of characters, use `n_chars` instead.
191
+ # @param inference_length [Integer]
192
+ # Number of elements to parse to determine the `precision` and `scale`
193
+ #
194
+ # @return [Series]
194
195
  #
195
196
  # @example
196
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
197
- # s.str.lengths
197
+ # s = Polars::Series.new(
198
+ # ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
199
+ # )
200
+ # s.str.to_decimal
198
201
  # # =>
199
- # # shape: (4,)
200
- # # Series: '' [u32]
202
+ # # shape: (7,)
203
+ # # Series: '' [decimal[*,2]]
201
204
  # # [
202
- # # 5
203
- # # null
204
- # # 3
205
- # # 6
205
+ # # 40.12
206
+ # # 3420.13
207
+ # # 120134.19
208
+ # # 3212.98
209
+ # # 12.90
210
+ # # 143.09
211
+ # # 143.90
206
212
  # # ]
207
- def lengths
208
- super
213
+ def to_decimal(inference_length = 100, scale: nil)
214
+ if !scale.nil?
215
+ raise Todo
216
+ end
217
+
218
+ Utils.wrap_s(_s.str_to_decimal_infer(inference_length))
209
219
  end
210
220
 
211
- # Get length of the string values in the Series (as number of chars).
221
+ # Return the length of each string as the number of bytes.
212
222
  #
213
223
  # @return [Series]
214
224
  #
215
- # @note
216
- # If you know that you are working with ASCII text, `lengths` will be
217
- # equivalent, and faster (returns length in terms of the number of bytes).
218
- #
219
225
  # @example
220
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
221
- # s.str.n_chars
226
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
227
+ # s.str.len_bytes
222
228
  # # =>
223
229
  # # shape: (4,)
224
230
  # # Series: '' [u32]
225
231
  # # [
226
- # # 4
227
- # # null
232
+ # # 5
228
233
  # # 3
229
- # # 2
234
+ # # 6
235
+ # # null
230
236
  # # ]
231
- def n_chars
237
+ def len_bytes
232
238
  super
233
239
  end
240
+ alias_method :lengths, :len_bytes
234
241
 
235
- # Vertically concat the values in the Series to a single string value.
236
- #
237
- # @param delimiter [String]
238
- # The delimiter to insert between consecutive string values.
239
- # @param ignore_nulls [Boolean]
240
- # Ignore null values (default).
241
- # If set to `False`, null values will be propagated. This means that
242
- # if the column contains any null values, the output is null.
242
+ # Return the length of each string as the number of characters.
243
243
  #
244
244
  # @return [Series]
245
245
  #
246
246
  # @example
247
- # Polars::Series.new([1, nil, 2]).str.join("-")
248
- # # =>
249
- # # shape: (1,)
250
- # # Series: '' [str]
251
- # # [
252
- # # "1-2"
253
- # # ]
254
- #
255
- # @example
256
- # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
247
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
248
+ # s.str.len_chars
257
249
  # # =>
258
- # # shape: (1,)
259
- # # Series: '' [str]
250
+ # # shape: (4,)
251
+ # # Series: '' [u32]
260
252
  # # [
253
+ # # 4
254
+ # # 3
255
+ # # 2
261
256
  # # null
262
257
  # # ]
263
- def join(delimiter = "-", ignore_nulls: true)
258
+ def len_chars
264
259
  super
265
260
  end
266
- alias_method :concat, :join
261
+ alias_method :n_chars, :len_chars
267
262
 
268
263
  # Check if strings in Series contain a substring that matches a regex.
269
264
  #
@@ -302,6 +297,65 @@ module Polars
302
297
  super
303
298
  end
304
299
 
300
+ # Return the bytes offset of the first substring matching a pattern.
301
+ #
302
+ # If the pattern is not found, returns nil.
303
+ #
304
+ # @param pattern
305
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
306
+ # @param literal
307
+ # Treat `pattern` as a literal string, not as a regular expression.
308
+ # @param strict
309
+ # Raise an error if the underlying pattern is not a valid regex,
310
+ # otherwise mask out with a null value.
311
+ #
312
+ # @return [Series]
313
+ #
314
+ # @note
315
+ # To modify regular expression behaviour (such as case-sensitivity) with
316
+ # flags, use the inline `(?iLmsuxU)` syntax.
317
+ #
318
+ # @example Find the index of the first substring matching a regex pattern:
319
+ # s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
320
+ # s.str.find("a|e").rename("idx_rx")
321
+ # # =>
322
+ # # shape: (4,)
323
+ # # Series: 'idx_rx' [u32]
324
+ # # [
325
+ # # 2
326
+ # # 5
327
+ # # null
328
+ # # 5
329
+ # # ]
330
+ #
331
+ # @example Find the index of the first substring matching a literal pattern:
332
+ # s.str.find("e", literal: true).rename("idx_lit")
333
+ # # =>
334
+ # # shape: (4,)
335
+ # # Series: 'idx_lit' [u32]
336
+ # # [
337
+ # # null
338
+ # # 5
339
+ # # null
340
+ # # 7
341
+ # # ]
342
+ #
343
+ # @example Match against a pattern found in another column or (expression):
344
+ # p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
345
+ # s.str.find(p).rename("idx")
346
+ # # =>
347
+ # # shape: (4,)
348
+ # # Series: 'idx' [u32]
349
+ # # [
350
+ # # 2
351
+ # # 2
352
+ # # null
353
+ # # 5
354
+ # # ]
355
+ def find(pattern, literal: false, strict: true)
356
+ super
357
+ end
358
+
305
359
  # Check if string values end with a substring.
306
360
  #
307
361
  # @param sub [String]
@@ -395,6 +449,43 @@ module Polars
395
449
  super
396
450
  end
397
451
 
452
+ # Parse string values as JSON.
453
+ #
454
+ # Throws an error if invalid JSON strings are encountered.
455
+ #
456
+ # @param dtype [Object]
457
+ # The dtype to cast the extracted value to. If nil, the dtype will be
458
+ # inferred from the JSON value.
459
+ # @param infer_schema_length [Integer]
460
+ # The maximum number of rows to scan for schema inference.
461
+ # If set to `nil`, the full data may be scanned *(this is slow)*.
462
+ #
463
+ # @return [Series]
464
+ #
465
+ # @example
466
+ # s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
467
+ # s.str.json_decode
468
+ # # =>
469
+ # # shape: (3,)
470
+ # # Series: 'json' [struct[2]]
471
+ # # [
472
+ # # {1,true}
473
+ # # null
474
+ # # {2,false}
475
+ # # ]
476
+ def json_decode(dtype = nil, infer_schema_length: 100)
477
+ if !dtype.nil?
478
+ s = Utils.wrap_s(_s)
479
+ return (
480
+ s.to_frame
481
+ .select_seq(F.col(s.name).str.json_decode(dtype))
482
+ .to_series
483
+ )
484
+ end
485
+
486
+ Utils.wrap_s(_s.str_json_decode(infer_schema_length))
487
+ end
488
+
398
489
  # Extract the first match of json string with provided JSONPath expression.
399
490
  #
400
491
  # Throw errors if encounter invalid json strings.
@@ -479,6 +570,39 @@ module Polars
479
570
  super
480
571
  end
481
572
 
573
+ # Extract all capture groups for the given regex pattern.
574
+ #
575
+ # @param pattern [String]
576
+ # A valid regular expression pattern containing at least one capture group,
577
+ # compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
578
+ #
579
+ # @return [Series]
580
+ #
581
+ # @note
582
+ # All group names are **strings**.
583
+ #
584
+ # @example
585
+ # s = Polars::Series.new(
586
+ # "url",
587
+ # [
588
+ # "http://vote.com/ballon_dor?candidate=messi&ref=python",
589
+ # "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
590
+ # "http://vote.com/ballon_dor?error=404&ref=rust"
591
+ # ]
592
+ # )
593
+ # s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
594
+ # # =>
595
+ # # shape: (3,)
596
+ # # Series: 'url' [struct[2]]
597
+ # # [
598
+ # # {"messi","python"}
599
+ # # {"weghorst","polars"}
600
+ # # {null,null}
601
+ # # ]
602
+ def extract_groups(pattern)
603
+ super
604
+ end
605
+
482
606
  # Count all successive non-overlapping regex matches.
483
607
  #
484
608
  # @param pattern [String]
@@ -488,7 +612,7 @@ module Polars
488
612
  #
489
613
  # @example
490
614
  # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
491
- # s.str.count_match('\d')
615
+ # s.str.count_matches('\d')
492
616
  # # =>
493
617
  # # shape: (2,)
494
618
  # # Series: 'foo' [u32]
@@ -496,9 +620,10 @@ module Polars
496
620
  # # 5
497
621
  # # 6
498
622
  # # ]
499
- def count_match(pattern)
623
+ def count_matches(pattern)
500
624
  super
501
625
  end
626
+ alias_method :count_match, :count_matches
502
627
 
503
628
  # Split the string by a substring.
504
629
  #
@@ -728,6 +853,108 @@ module Polars
728
853
  end
729
854
  alias_method :rstrip, :strip_chars_end
730
855
 
856
+ # Remove prefix.
857
+ #
858
+ # The prefix will be removed from the string exactly once, if found.
859
+ #
860
+ # @param prefix [String]
861
+ # The prefix to be removed.
862
+ #
863
+ # @return [Series]
864
+ #
865
+ # @example
866
+ # s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
867
+ # s.str.strip_prefix("foo")
868
+ # # =>
869
+ # # shape: (4,)
870
+ # # Series: '' [str]
871
+ # # [
872
+ # # "bar"
873
+ # # "foobar"
874
+ # # ""
875
+ # # "bar"
876
+ # # ]
877
+ def strip_prefix(prefix)
878
+ super
879
+ end
880
+
881
+ # Remove suffix.
882
+ #
883
+ # The suffix will be removed from the string exactly once, if found.
884
+ #
885
+ # @param suffix [String]
886
+ # The suffix to be removed.
887
+ #
888
+ # @return [Series]
889
+ #
890
+ # @example
891
+ # s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
892
+ # s.str.strip_suffix("bar")
893
+ # # =>
894
+ # # shape: (4,)
895
+ # # Series: '' [str]
896
+ # # [
897
+ # # "foo"
898
+ # # "foobar"
899
+ # # "foo"
900
+ # # ""
901
+ # # ]
902
+ def strip_suffix(suffix)
903
+ super
904
+ end
905
+
906
+ # Pad the start of the string until it reaches the given length.
907
+ #
908
+ # @param length [Integer]
909
+ # Pad the string until it reaches this length. Strings with length equal to or
910
+ # greater than this value are returned as-is.
911
+ # @param fill_char [String]
912
+ # The character to pad the string with.
913
+ #
914
+ # @return [Series]
915
+ #
916
+ # @example
917
+ # s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
918
+ # s.str.pad_start(8, "*")
919
+ # # =>
920
+ # # shape: (4,)
921
+ # # Series: 'a' [str]
922
+ # # [
923
+ # # "*****cow"
924
+ # # "**monkey"
925
+ # # "hippopotamus"
926
+ # # null
927
+ # # ]
928
+ def pad_start(length, fill_char = " ")
929
+ super
930
+ end
931
+
932
+ # Pad the end of the string until it reaches the given length.
933
+ #
934
+ # @param length [Integer]
935
+ # Pad the string until it reaches this length. Strings with length equal to or
936
+ # greater than this value are returned as-is.
937
+ # @param fill_char [String]
938
+ # The character to pad the string with.
939
+ #
940
+ # @return [Series]
941
+ #
942
+ # @example
943
+ # s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
944
+ # s.str.pad_end(8, "*")
945
+ # # =>
946
+ # # shape: (4,)
947
+ # # Series: '' [str]
948
+ # # [
949
+ # # "cow*****"
950
+ # # "monkey**"
951
+ # # "hippopotamus"
952
+ # # null
953
+ # # ]
954
+ def pad_end(length, fill_char = " ")
955
+ super
956
+ end
957
+
731
958
  # Fills the string with zeroes.
732
959
  #
733
960
  # Return a copy of the string left filled with ASCII '0' digits to make a string
@@ -850,6 +1077,25 @@ module Polars
850
1077
  super
851
1078
  end
852
1079
 
1080
+ # Returns string values in reversed order.
1081
+ #
1082
+ # @return [Series]
1083
+ #
1084
+ # @example
1085
+ # s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
1086
+ # s.str.reverse
1087
+ # # =>
1088
+ # # shape: (3,)
1089
+ # # Series: 'text' [str]
1090
+ # # [
1091
+ # # "oof"
1092
+ # # "rab"
1093
+ # # "anañam"
1094
+ # # ]
1095
+ def reverse
1096
+ super
1097
+ end
1098
+
853
1099
  # Create subslices of the string values of a Utf8 Series.
854
1100
  #
855
1101
  # @param offset [Integer]
@@ -888,5 +1134,445 @@ module Polars
888
1134
  s = Utils.wrap_s(_s)
889
1135
  s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
890
1136
  end
1137
+
1138
+ # Return the first n characters of each string in a String Series.
1139
+ #
1140
+ # @param n [Object]
1141
+ # Length of the slice (integer or expression). Negative indexing is supported;
1142
+ # see note (2) below.
1143
+ #
1144
+ # @return [Series]
1145
+ #
1146
+ # @example Return up to the first 5 characters.
1147
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1148
+ # s.str.head(5)
1149
+ # # =>
1150
+ # # shape: (4,)
1151
+ # # Series: '' [str]
1152
+ # # [
1153
+ # # "pear"
1154
+ # # null
1155
+ # # "papay"
1156
+ # # "drago"
1157
+ # # ]
1158
+ #
1159
+ # @example Return up to the 3rd character from the end.
1160
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1161
+ # s.str.head(-3)
1162
+ # # =>
1163
+ # # shape: (4,)
1164
+ # # Series: '' [str]
1165
+ # # [
1166
+ # # "p"
1167
+ # # null
1168
+ # # "pap"
1169
+ # # "dragonfr"
1170
+ # # ]
1171
+ def head(n)
1172
+ super
1173
+ end
1174
+
1175
+ # Return the last n characters of each string in a String Series.
1176
+ #
1177
+ # @param n [Object]
1178
+ # Length of the slice (integer or expression). Negative indexing is supported;
1179
+ # see note (2) below.
1180
+ #
1181
+ # @return [Series]
1182
+ #
1183
+ # @example Return up to the last 5 characters:
1184
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1185
+ # s.str.tail(5)
1186
+ # # =>
1187
+ # # shape: (4,)
1188
+ # # Series: '' [str]
1189
+ # # [
1190
+ # # "pear"
1191
+ # # null
1192
+ # # "apaya"
1193
+ # # "fruit"
1194
+ # # ]
1195
+ #
1196
+ # @example Return from the 3rd character to the end:
1197
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1198
+ # s.str.tail(-3)
1199
+ # # =>
1200
+ # # shape: (4,)
1201
+ # # Series: '' [str]
1202
+ # # [
1203
+ # # "r"
1204
+ # # null
1205
+ # # "aya"
1206
+ # # "gonfruit"
1207
+ # # ]
1208
+ def tail(n)
1209
+ super
1210
+ end
1211
+
1212
+ # Convert an String column into a column of dtype with base radix.
1213
+ #
1214
+ # @param base [Integer]
1215
+ # Positive integer or expression which is the base of the string
1216
+ # we are parsing.
1217
+ # Default: 10.
1218
+ # @param dtype [Object]
1219
+ # Polars integer type to cast to.
1220
+ # Default: `Int64`.
1221
+ # @param strict [Object]
1222
+ # Bool, Default=true will raise any ParseError or overflow as ComputeError.
1223
+ # false silently convert to Null.
1224
+ #
1225
+ # @return [Series]
1226
+ #
1227
+ # @example
1228
+ # s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
1229
+ # s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
1230
+ # # =>
1231
+ # # shape: (4,)
1232
+ # # Series: 'bin' [i32]
1233
+ # # [
1234
+ # # 6
1235
+ # # 5
1236
+ # # 2
1237
+ # # null
1238
+ # # ]
1239
+ #
1240
+ # @example
1241
+ # s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
1242
+ # s.str.to_integer(base: 16)
1243
+ # # =>
1244
+ # # shape: (4,)
1245
+ # # Series: 'hex' [i64]
1246
+ # # [
1247
+ # # 64030
1248
+ # # 65280
1249
+ # # 51966
1250
+ # # null
1251
+ # # ]
1252
+ def to_integer(
1253
+ base: 10,
1254
+ dtype: Int64,
1255
+ strict: true
1256
+ )
1257
+ super
1258
+ end
1259
+
1260
+ # Use the Aho-Corasick algorithm to find matches.
1261
+ #
1262
+ # Determines if any of the patterns are contained in the string.
1263
+ #
1264
+ # @param patterns [Object]
1265
+ # String patterns to search.
1266
+ # @param ascii_case_insensitive [Boolean]
1267
+ # Enable ASCII-aware case-insensitive matching.
1268
+ # When this option is enabled, searching will be performed without respect
1269
+ # to case for ASCII letters (a-z and A-Z) only.
1270
+ #
1271
+ # @return [Series]
1272
+ #
1273
+ # @note
1274
+ # This method supports matching on string literals only, and does not support
1275
+ # regular expression matching.
1276
+ #
1277
+ # @example
1278
+ # s = Polars::Series.new(
1279
+ # "lyrics",
1280
+ # [
1281
+ # "Everybody wants to rule the world",
1282
+ # "Tell me what you want, what you really really want",
1283
+ # "Can you feel the love tonight"
1284
+ # ]
1285
+ # )
1286
+ # s.str.contains_any(["you", "me"])
1287
+ # # =>
1288
+ # # shape: (3,)
1289
+ # # Series: 'lyrics' [bool]
1290
+ # # [
1291
+ # # false
1292
+ # # true
1293
+ # # true
1294
+ # # ]
1295
+ def contains_any(
1296
+ patterns,
1297
+ ascii_case_insensitive: false
1298
+ )
1299
+ super
1300
+ end
1301
+
1302
+ # Use the Aho-Corasick algorithm to replace many matches.
1303
+ #
1304
+ # @param patterns
1305
+ # String patterns to search and replace.
1306
+ # Also accepts a mapping of patterns to their replacement as syntactic sugar
1307
+ # for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
1308
+ # @param replace_with
1309
+ # Strings to replace where a pattern was a match.
1310
+ # Length must match the length of `patterns` or have length 1. This can be
1311
+ # broadcasted, so it supports many:one and many:many.
1312
+ # @param ascii_case_insensitive
1313
+ # Enable ASCII-aware case-insensitive matching.
1314
+ # When this option is enabled, searching will be performed without respect
1315
+ # to case for ASCII letters (a-z and A-Z) only.
1316
+ #
1317
+ # @return [Series]
1318
+ #
1319
+ # @note
1320
+ # This method supports matching on string literals only, and does not support
1321
+ # regular expression matching.
1322
+ #
1323
+ # @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
1324
+ # s = Polars::Series.new(
1325
+ # "lyrics",
1326
+ # [
1327
+ # "Everybody wants to rule the world",
1328
+ # "Tell me what you want, what you really really want",
1329
+ # "Can you feel the love tonight"
1330
+ # ]
1331
+ # )
1332
+ # s.str.replace_many(["you", "me"], ["me", "you"])
1333
+ # # =>
1334
+ # # shape: (3,)
1335
+ # # Series: 'lyrics' [str]
1336
+ # # [
1337
+ # # "Everybody wants to rule the wo…
1338
+ # # "Tell you what me want, what me…
1339
+ # # "Can me feel the love tonight"
1340
+ # # ]
1341
+ #
1342
+ # @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
1343
+ # s = Polars::Series.new(
1344
+ # "lyrics",
1345
+ # [
1346
+ # "Everybody wants to rule the world",
1347
+ # "Tell me what you want, what you really really want",
1348
+ # "Can you feel the love tonight",
1349
+ # ]
1350
+ # )
1351
+ # s.str.replace_many(["me", "you", "they"], [""])
1352
+ # # =>
1353
+ # # shape: (3,)
1354
+ # # Series: 'lyrics' [str]
1355
+ # # [
1356
+ # # "Everybody wants to rule the wo…
1357
+ # # "Tell what want, what really…
1358
+ # # "Can feel the love tonight"
1359
+ # # ]
1360
+ #
1361
+ # @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
1362
+ # s = Polars::Series.new(
1363
+ # "lyrics",
1364
+ # [
1365
+ # "Everybody wants to rule the world",
1366
+ # "Tell me what you want, what you really really want",
1367
+ # "Can you feel the love tonight"
1368
+ # ]
1369
+ # )
1370
+ # mapping = {"me" => "you", "you" => "me", "want" => "need"}
1371
+ # s.str.replace_many(mapping)
1372
+ # # =>
1373
+ # # shape: (3,)
1374
+ # # Series: 'lyrics' [str]
1375
+ # # [
1376
+ # # "Everybody needs to rule the wo…
1377
+ # # "Tell you what me need, what me…
1378
+ # # "Can me feel the love tonight"
1379
+ # # ]
1380
+ def replace_many(
1381
+ patterns,
1382
+ replace_with = Expr::NO_DEFAULT,
1383
+ ascii_case_insensitive: false
1384
+ )
1385
+ super
1386
+ end
1387
+
1388
+ # Use the Aho-Corasick algorithm to extract many matches.
1389
+ #
1390
+ # @param patterns [Object]
1391
+ # String patterns to search.
1392
+ # @param ascii_case_insensitive [Boolean]
1393
+ # Enable ASCII-aware case-insensitive matching.
1394
+ # When this option is enabled, searching will be performed without respect
1395
+ # to case for ASCII letters (a-z and A-Z) only.
1396
+ # @param overlapping [Boolean]
1397
+ # Whether matches may overlap.
1398
+ #
1399
+ # @return [Series]
1400
+ #
1401
+ # @note
1402
+ # This method supports matching on string literals only, and does not support
1403
+ # regular expression matching.
1404
+ #
1405
+ # @example
1406
+ # s = Polars::Series.new("values", ["discontent"])
1407
+ # patterns = ["winter", "disco", "onte", "discontent"]
1408
+ # s.str.extract_many(patterns, overlapping: true)
1409
+ # # =>
1410
+ # # shape: (1,)
1411
+ # # Series: 'values' [list[str]]
1412
+ # # [
1413
+ # # ["disco", "onte", "discontent"]
1414
+ # # ]
1415
+ def extract_many(
1416
+ patterns,
1417
+ ascii_case_insensitive: false,
1418
+ overlapping: false
1419
+ )
1420
+ super
1421
+ end
1422
+
1423
+ # Use the Aho-Corasick algorithm to find all matches.
1424
+ #
1425
+ # The function returns the byte offset of the start of each match.
1426
+ # The return type will be `List<UInt32>`
1427
+ #
1428
+ # @param patterns [Object]
1429
+ # String patterns to search.
1430
+ # @param ascii_case_insensitive [Boolean]
1431
+ # Enable ASCII-aware case-insensitive matching.
1432
+ # When this option is enabled, searching will be performed without respect
1433
+ # to case for ASCII letters (a-z and A-Z) only.
1434
+ # @param overlapping [Boolean]
1435
+ # Whether matches may overlap.
1436
+ #
1437
+ # @return [Series]
1438
+ #
1439
+ # @note
1440
+ # This method supports matching on string literals only, and does not support
1441
+ # regular expression matching.
1442
+ #
1443
+ # @example
1444
+ # df = Polars::DataFrame.new({"values" => ["discontent"]})
1445
+ # patterns = ["winter", "disco", "onte", "discontent"]
1446
+ # df.with_columns(
1447
+ # Polars.col("values")
1448
+ # .str.extract_many(patterns, overlapping: false)
1449
+ # .alias("matches"),
1450
+ # Polars.col("values")
1451
+ # .str.extract_many(patterns, overlapping: true)
1452
+ # .alias("matches_overlapping")
1453
+ # )
1454
+ # # =>
1455
+ # # shape: (1, 3)
1456
+ # # ┌────────────┬───────────┬─────────────────────────────────┐
1457
+ # # │ values ┆ matches ┆ matches_overlapping │
1458
+ # # │ --- ┆ --- ┆ --- │
1459
+ # # │ str ┆ list[str] ┆ list[str] │
1460
+ # # ╞════════════╪═══════════╪═════════════════════════════════╡
1461
+ # # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
1462
+ # # └────────────┴───────────┴─────────────────────────────────┘
1463
+ #
1464
+ # @example
1465
+ # df = Polars::DataFrame.new(
1466
+ # {
1467
+ # "values" => ["discontent", "rhapsody"],
1468
+ # "patterns" => [
1469
+ # ["winter", "disco", "onte", "discontent"],
1470
+ # ["rhap", "ody", "coalesce"]
1471
+ # ]
1472
+ # }
1473
+ # )
1474
+ # df.select(Polars.col("values").str.find_many("patterns"))
1475
+ # # =>
1476
+ # # shape: (2, 1)
1477
+ # # ┌───────────┐
1478
+ # # │ values │
1479
+ # # │ --- │
1480
+ # # │ list[u32] │
1481
+ # # ╞═══════════╡
1482
+ # # │ [0] │
1483
+ # # │ [0, 5] │
1484
+ # # └───────────┘
1485
+ def find_many(
1486
+ patterns,
1487
+ ascii_case_insensitive: false,
1488
+ overlapping: false
1489
+ )
1490
+ super
1491
+ end
1492
+
1493
+ # Vertically concat the values in the Series to a single string value.
1494
+ #
1495
+ # @param delimiter [String]
1496
+ # The delimiter to insert between consecutive string values.
1497
+ # @param ignore_nulls [Boolean]
1498
+ # Ignore null values (default).
1499
+ # If set to `False`, null values will be propagated. This means that
1500
+ # if the column contains any null values, the output is null.
1501
+ #
1502
+ # @return [Series]
1503
+ #
1504
+ # @example
1505
+ # Polars::Series.new([1, nil, 2]).str.join("-")
1506
+ # # =>
1507
+ # # shape: (1,)
1508
+ # # Series: '' [str]
1509
+ # # [
1510
+ # # "1-2"
1511
+ # # ]
1512
+ #
1513
+ # @example
1514
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
1515
+ # # =>
1516
+ # # shape: (1,)
1517
+ # # Series: '' [str]
1518
+ # # [
1519
+ # # null
1520
+ # # ]
1521
+ def join(delimiter = "-", ignore_nulls: true)
1522
+ super
1523
+ end
1524
+ alias_method :concat, :join
1525
+
1526
+ # Returns string values with all regular expression meta characters escaped.
1527
+ #
1528
+ # @return [Series]
1529
+ #
1530
+ # @example
1531
+ # Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
1532
+ # # =>
1533
+ # # shape: (4,)
1534
+ # # Series: '' [str]
1535
+ # # [
1536
+ # # "abc"
1537
+ # # "def"
1538
+ # # null
1539
+ # # "abc\(\\w\+\)"
1540
+ # # ]
1541
+ def escape_regex
1542
+ super
1543
+ end
1544
+
1545
+ # Returns the Unicode normal form of the string values.
1546
+ #
1547
+ # This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
1548
+ #
1549
+ # @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
1550
+ # Unicode form to use.
1551
+ #
1552
+ # @return [Series]
1553
+ #
1554
+ # @example
1555
+ # s = Polars::Series.new(["01²", "KADOKAWA"])
1556
+ # s.str.normalize("NFC")
1557
+ # # =>
1558
+ # # shape: (2,)
1559
+ # # Series: '' [str]
1560
+ # # [
1561
+ # # "01²"
1562
+ # # "KADOKAWA"
1563
+ # # ]
1564
+ #
1565
+ # @example
1566
+ # s.str.normalize("NFKC")
1567
+ # # =>
1568
+ # # shape: (2,)
1569
+ # # Series: '' [str]
1570
+ # # [
1571
+ # # "012"
1572
+ # # "KADOKAWA"
1573
+ # # ]
1574
+ def normalize(form = "NFC")
1575
+ super
1576
+ end
891
1577
  end
892
1578
  end