polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
@@ -184,86 +184,77 @@ module Polars
184
184
  super
185
185
  end
186
186
 
187
- # Get length of the string values in the Series (as number of bytes).
187
+ # Convert a String column into a Decimal column.
188
188
  #
189
- # @return [Series]
189
+ # This method infers the needed parameters `precision` and `scale`.
190
190
  #
191
- # @note
192
- # The returned lengths are equal to the number of bytes in the UTF8 string. If you
193
- # need the length in terms of the number of characters, use `n_chars` instead.
191
+ # @param inference_length [Integer]
192
+ # Number of elements to parse to determine the `precision` and `scale`
193
+ #
194
+ # @return [Series]
194
195
  #
195
196
  # @example
196
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
197
- # s.str.lengths
197
+ # s = Polars::Series.new(
198
+ # ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
199
+ # )
200
+ # s.str.to_decimal
198
201
  # # =>
199
- # # shape: (4,)
200
- # # Series: '' [u32]
202
+ # # shape: (7,)
203
+ # # Series: '' [decimal[*,2]]
201
204
  # # [
202
- # # 5
203
- # # null
204
- # # 3
205
- # # 6
205
+ # # 40.12
206
+ # # 3420.13
207
+ # # 120134.19
208
+ # # 3212.98
209
+ # # 12.90
210
+ # # 143.09
211
+ # # 143.90
206
212
  # # ]
207
- def lengths
213
+ def to_decimal(inference_length = 100)
208
214
  super
209
215
  end
210
216
 
211
- # Get length of the string values in the Series (as number of chars).
217
+ # Return the length of each string as the number of bytes.
212
218
  #
213
219
  # @return [Series]
214
220
  #
215
- # @note
216
- # If you know that you are working with ASCII text, `lengths` will be
217
- # equivalent, and faster (returns length in terms of the number of bytes).
218
- #
219
221
  # @example
220
- # s = Polars::Series.new(["Café", nil, "345", "東京"])
221
- # s.str.n_chars
222
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
223
+ # s.str.len_bytes
222
224
  # # =>
223
225
  # # shape: (4,)
224
226
  # # Series: '' [u32]
225
227
  # # [
226
- # # 4
227
- # # null
228
+ # # 5
228
229
  # # 3
229
- # # 2
230
+ # # 6
231
+ # # null
230
232
  # # ]
231
- def n_chars
233
+ def len_bytes
232
234
  super
233
235
  end
236
+ alias_method :lengths, :len_bytes
234
237
 
235
- # Vertically concat the values in the Series to a single string value.
236
- #
237
- # @param delimiter [String]
238
- # The delimiter to insert between consecutive string values.
239
- # @param ignore_nulls [Boolean]
240
- # Ignore null values (default).
241
- # If set to `False`, null values will be propagated. This means that
242
- # if the column contains any null values, the output is null.
238
+ # Return the length of each string as the number of characters.
243
239
  #
244
240
  # @return [Series]
245
241
  #
246
242
  # @example
247
- # Polars::Series.new([1, nil, 2]).str.join("-")
243
+ # s = Polars::Series.new(["Café", "345", "東京", nil])
244
+ # s.str.len_chars
248
245
  # # =>
249
- # # shape: (1,)
250
- # # Series: '' [str]
251
- # # [
252
- # # "1-2"
253
- # # ]
254
- #
255
- # @example
256
- # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
257
- # # =>
258
- # # shape: (1,)
259
- # # Series: '' [str]
246
+ # # shape: (4,)
247
+ # # Series: '' [u32]
260
248
  # # [
249
+ # # 4
250
+ # # 3
251
+ # # 2
261
252
  # # null
262
253
  # # ]
263
- def join(delimiter = "-", ignore_nulls: true)
254
+ def len_chars
264
255
  super
265
256
  end
266
- alias_method :concat, :join
257
+ alias_method :n_chars, :len_chars
267
258
 
268
259
  # Check if strings in Series contain a substring that matches a regex.
269
260
  #
@@ -302,6 +293,65 @@ module Polars
302
293
  super
303
294
  end
304
295
 
296
+ # Return the bytes offset of the first substring matching a pattern.
297
+ #
298
+ # If the pattern is not found, returns nil.
299
+ #
300
+ # @param pattern
301
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
302
+ # @param literal
303
+ # Treat `pattern` as a literal string, not as a regular expression.
304
+ # @param strict
305
+ # Raise an error if the underlying pattern is not a valid regex,
306
+ # otherwise mask out with a null value.
307
+ #
308
+ # @return [Series]
309
+ #
310
+ # @note
311
+ # To modify regular expression behaviour (such as case-sensitivity) with
312
+ # flags, use the inline `(?iLmsuxU)` syntax.
313
+ #
314
+ # @example Find the index of the first substring matching a regex pattern:
315
+ # s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
316
+ # s.str.find("a|e").rename("idx_rx")
317
+ # # =>
318
+ # # shape: (4,)
319
+ # # Series: 'idx_rx' [u32]
320
+ # # [
321
+ # # 2
322
+ # # 5
323
+ # # null
324
+ # # 5
325
+ # # ]
326
+ #
327
+ # @example Find the index of the first substring matching a literal pattern:
328
+ # s.str.find("e", literal: true).rename("idx_lit")
329
+ # # =>
330
+ # # shape: (4,)
331
+ # # Series: 'idx_lit' [u32]
332
+ # # [
333
+ # # null
334
+ # # 5
335
+ # # null
336
+ # # 7
337
+ # # ]
338
+ #
339
+ # @example Match against a pattern found in another column or (expression):
340
+ # p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
341
+ # s.str.find(p).rename("idx")
342
+ # # =>
343
+ # # shape: (4,)
344
+ # # Series: 'idx' [u32]
345
+ # # [
346
+ # # 2
347
+ # # 2
348
+ # # null
349
+ # # 5
350
+ # # ]
351
+ def find(pattern, literal: false, strict: true)
352
+ super
353
+ end
354
+
305
355
  # Check if string values end with a substring.
306
356
  #
307
357
  # @param sub [String]
@@ -395,6 +445,34 @@ module Polars
395
445
  super
396
446
  end
397
447
 
448
+ # Parse string values as JSON.
449
+ #
450
+ # Throws an error if invalid JSON strings are encountered.
451
+ #
452
+ # @param dtype [Object]
453
+ # The dtype to cast the extracted value to. If None, the dtype will be
454
+ # inferred from the JSON value.
455
+ # @param infer_schema_length [Integer]
456
+ # The maximum number of rows to scan for schema inference.
457
+ # If set to `nil`, the full data may be scanned *(this is slow)*.
458
+ #
459
+ # @return [Series]
460
+ #
461
+ # @example
462
+ # s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
463
+ # s.str.json_decode
464
+ # # =>
465
+ # # shape: (3,)
466
+ # # Series: 'json' [struct[2]]
467
+ # # [
468
+ # # {1,true}
469
+ # # null
470
+ # # {2,false}
471
+ # # ]
472
+ def json_decode(dtype = nil, infer_schema_length: 100)
473
+ super
474
+ end
475
+
398
476
  # Extract the first match of json string with provided JSONPath expression.
399
477
  #
400
478
  # Throw errors if encounter invalid json strings.
@@ -479,6 +557,39 @@ module Polars
479
557
  super
480
558
  end
481
559
 
560
+ # Extract all capture groups for the given regex pattern.
561
+ #
562
+ # @param pattern [String]
563
+ # A valid regular expression pattern containing at least one capture group,
564
+ # compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
565
+ #
566
+ # @return [Series]
567
+ #
568
+ # @note
569
+ # All group names are **strings**.
570
+ #
571
+ # @example
572
+ # s = Polars::Series.new(
573
+ # "url",
574
+ # [
575
+ # "http://vote.com/ballon_dor?candidate=messi&ref=python",
576
+ # "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
577
+ # "http://vote.com/ballon_dor?error=404&ref=rust"
578
+ # ]
579
+ # )
580
+ # s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
581
+ # # =>
582
+ # # shape: (3,)
583
+ # # Series: 'url' [struct[2]]
584
+ # # [
585
+ # # {"messi","python"}
586
+ # # {"weghorst","polars"}
587
+ # # {null,null}
588
+ # # ]
589
+ def extract_groups(pattern)
590
+ super
591
+ end
592
+
482
593
  # Count all successive non-overlapping regex matches.
483
594
  #
484
595
  # @param pattern [String]
@@ -488,7 +599,7 @@ module Polars
488
599
  #
489
600
  # @example
490
601
  # s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
491
- # s.str.count_match('\d')
602
+ # s.str.count_matches('\d')
492
603
  # # =>
493
604
  # # shape: (2,)
494
605
  # # Series: 'foo' [u32]
@@ -496,9 +607,10 @@ module Polars
496
607
  # # 5
497
608
  # # 6
498
609
  # # ]
499
- def count_match(pattern)
610
+ def count_matches(pattern)
500
611
  super
501
612
  end
613
+ alias_method :count_match, :count_matches
502
614
 
503
615
  # Split the string by a substring.
504
616
  #
@@ -728,6 +840,108 @@ module Polars
728
840
  end
729
841
  alias_method :rstrip, :strip_chars_end
730
842
 
843
+ # Remove prefix.
844
+ #
845
+ # The prefix will be removed from the string exactly once, if found.
846
+ #
847
+ # @param prefix [String]
848
+ # The prefix to be removed.
849
+ #
850
+ # @return [Series]
851
+ #
852
+ # @example
853
+ # s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
854
+ # s.str.strip_prefix("foo")
855
+ # # =>
856
+ # # shape: (4,)
857
+ # # Series: '' [str]
858
+ # # [
859
+ # # "bar"
860
+ # # "foobar"
861
+ # # ""
862
+ # # "bar"
863
+ # # ]
864
+ def strip_prefix(prefix)
865
+ super
866
+ end
867
+
868
+ # Remove suffix.
869
+ #
870
+ # The suffix will be removed from the string exactly once, if found.
871
+ #
872
+ # @param suffix [String]
873
+ # The suffix to be removed.
874
+ #
875
+ # @return [Series]
876
+ #
877
+ # @example
878
+ # s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
879
+ # s.str.strip_suffix("bar")
880
+ # # =>
881
+ # # shape: (4,)
882
+ # # Series: '' [str]
883
+ # # [
884
+ # # "foo"
885
+ # # "foobar"
886
+ # # "foo"
887
+ # # ""
888
+ # # ]
889
+ def strip_suffix(suffix)
890
+ super
891
+ end
892
+
893
+ # Pad the start of the string until it reaches the given length.
894
+ #
895
+ # @param length [Integer]
896
+ # Pad the string until it reaches this length. Strings with length equal to or
897
+ # greater than this value are returned as-is.
898
+ # @param fill_char [String]
899
+ # The character to pad the string with.
900
+ #
901
+ # @return [Series]
902
+ #
903
+ # @example
904
+ # s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
905
+ # s.str.pad_start(8, "*")
906
+ # # =>
907
+ # # shape: (4,)
908
+ # # Series: 'a' [str]
909
+ # # [
910
+ # # "*****cow"
911
+ # # "**monkey"
912
+ # # "hippopotamus"
913
+ # # null
914
+ # # ]
915
+ def pad_start(length, fill_char = " ")
916
+ super
917
+ end
918
+
919
+ # Pad the end of the string until it reaches the given length.
920
+ #
921
+ # @param length [Integer]
922
+ # Pad the string until it reaches this length. Strings with length equal to or
923
+ # greater than this value are returned as-is.
924
+ # @param fill_char [String]
925
+ # The character to pad the string with.
926
+ #
927
+ # @return [Series]
928
+ #
929
+ # @example
930
+ # s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
931
+ # s.str.pad_end(8, "*")
932
+ # # =>
933
+ # # shape: (4,)
934
+ # # Series: '' [str]
935
+ # # [
936
+ # # "cow*****"
937
+ # # "monkey**"
938
+ # # "hippopotamus"
939
+ # # null
940
+ # # ]
941
+ def pad_end(length, fill_char = " ")
942
+ super
943
+ end
944
+
731
945
  # Fills the string with zeroes.
732
946
  #
733
947
  # Return a copy of the string left filled with ASCII '0' digits to make a string
@@ -850,6 +1064,25 @@ module Polars
850
1064
  super
851
1065
  end
852
1066
 
1067
+ # Returns string values in reversed order.
1068
+ #
1069
+ # @return [Series]
1070
+ #
1071
+ # @example
1072
+ # s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
1073
+ # s.str.reverse
1074
+ # # =>
1075
+ # # shape: (3,)
1076
+ # # Series: 'text' [str]
1077
+ # # [
1078
+ # # "oof"
1079
+ # # "rab"
1080
+ # # "anañam"
1081
+ # # ]
1082
+ def reverse
1083
+ super
1084
+ end
1085
+
853
1086
  # Create subslices of the string values of a Utf8 Series.
854
1087
  #
855
1088
  # @param offset [Integer]
@@ -888,5 +1121,445 @@ module Polars
888
1121
  s = Utils.wrap_s(_s)
889
1122
  s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
890
1123
  end
1124
+
1125
+ # Return the first n characters of each string in a String Series.
1126
+ #
1127
+ # @param n [Object]
1128
+ # Length of the slice (integer or expression). Negative indexing is supported;
1129
+ # see note (2) below.
1130
+ #
1131
+ # @return [Series]
1132
+ #
1133
+ # @example Return up to the first 5 characters.
1134
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1135
+ # s.str.head(5)
1136
+ # # =>
1137
+ # # shape: (4,)
1138
+ # # Series: '' [str]
1139
+ # # [
1140
+ # # "pear"
1141
+ # # null
1142
+ # # "papay"
1143
+ # # "drago"
1144
+ # # ]
1145
+ #
1146
+ # @example Return up to the 3rd character from the end.
1147
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1148
+ # s.str.head(-3)
1149
+ # # =>
1150
+ # # shape: (4,)
1151
+ # # Series: '' [str]
1152
+ # # [
1153
+ # # "p"
1154
+ # # null
1155
+ # # "pap"
1156
+ # # "dragonfr"
1157
+ # # ]
1158
+ def head(n)
1159
+ super
1160
+ end
1161
+
1162
+ # Return the last n characters of each string in a String Series.
1163
+ #
1164
+ # @param n [Object]
1165
+ # Length of the slice (integer or expression). Negative indexing is supported;
1166
+ # see note (2) below.
1167
+ #
1168
+ # @return [Series]
1169
+ #
1170
+ # @example Return up to the last 5 characters:
1171
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1172
+ # s.str.tail(5)
1173
+ # # =>
1174
+ # # shape: (4,)
1175
+ # # Series: '' [str]
1176
+ # # [
1177
+ # # "pear"
1178
+ # # null
1179
+ # # "apaya"
1180
+ # # "fruit"
1181
+ # # ]
1182
+ #
1183
+ # @example Return from the 3rd character to the end:
1184
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
1185
+ # s.str.tail(-3)
1186
+ # # =>
1187
+ # # shape: (4,)
1188
+ # # Series: '' [str]
1189
+ # # [
1190
+ # # "r"
1191
+ # # null
1192
+ # # "aya"
1193
+ # # "gonfruit"
1194
+ # # ]
1195
+ def tail(n)
1196
+ super
1197
+ end
1198
+
1199
+ # Convert an String column into a column of dtype with base radix.
1200
+ #
1201
+ # @param base [Integer]
1202
+ # Positive integer or expression which is the base of the string
1203
+ # we are parsing.
1204
+ # Default: 10.
1205
+ # @param dtype [Object]
1206
+ # Polars integer type to cast to.
1207
+ # Default: `Int64`.
1208
+ # @param strict [Object]
1209
+ # Bool, Default=true will raise any ParseError or overflow as ComputeError.
1210
+ # false silently convert to Null.
1211
+ #
1212
+ # @return [Series]
1213
+ #
1214
+ # @example
1215
+ # s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
1216
+ # s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
1217
+ # # =>
1218
+ # # shape: (4,)
1219
+ # # Series: 'bin' [i32]
1220
+ # # [
1221
+ # # 6
1222
+ # # 5
1223
+ # # 2
1224
+ # # null
1225
+ # # ]
1226
+ #
1227
+ # @example
1228
+ # s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
1229
+ # s.str.to_integer(base: 16)
1230
+ # # =>
1231
+ # # shape: (4,)
1232
+ # # Series: 'hex' [i64]
1233
+ # # [
1234
+ # # 64030
1235
+ # # 65280
1236
+ # # 51966
1237
+ # # null
1238
+ # # ]
1239
+ def to_integer(
1240
+ base: 10,
1241
+ dtype: Int64,
1242
+ strict: true
1243
+ )
1244
+ super
1245
+ end
1246
+
1247
+ # Use the Aho-Corasick algorithm to find matches.
1248
+ #
1249
+ # Determines if any of the patterns are contained in the string.
1250
+ #
1251
+ # @param patterns [Object]
1252
+ # String patterns to search.
1253
+ # @param ascii_case_insensitive [Boolean]
1254
+ # Enable ASCII-aware case-insensitive matching.
1255
+ # When this option is enabled, searching will be performed without respect
1256
+ # to case for ASCII letters (a-z and A-Z) only.
1257
+ #
1258
+ # @return [Series]
1259
+ #
1260
+ # @note
1261
+ # This method supports matching on string literals only, and does not support
1262
+ # regular expression matching.
1263
+ #
1264
+ # @example
1265
+ # s = Polars::Series.new(
1266
+ # "lyrics",
1267
+ # [
1268
+ # "Everybody wants to rule the world",
1269
+ # "Tell me what you want, what you really really want",
1270
+ # "Can you feel the love tonight"
1271
+ # ]
1272
+ # )
1273
+ # s.str.contains_any(["you", "me"])
1274
+ # # =>
1275
+ # # shape: (3,)
1276
+ # # Series: 'lyrics' [bool]
1277
+ # # [
1278
+ # # false
1279
+ # # true
1280
+ # # true
1281
+ # # ]
1282
+ def contains_any(
1283
+ patterns,
1284
+ ascii_case_insensitive: false
1285
+ )
1286
+ super
1287
+ end
1288
+
1289
+ # Use the Aho-Corasick algorithm to replace many matches.
1290
+ #
1291
+ # @param patterns
1292
+ # String patterns to search and replace.
1293
+ # Also accepts a mapping of patterns to their replacement as syntactic sugar
1294
+ # for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
1295
+ # @param replace_with
1296
+ # Strings to replace where a pattern was a match.
1297
+ # Length must match the length of `patterns` or have length 1. This can be
1298
+ # broadcasted, so it supports many:one and many:many.
1299
+ # @param ascii_case_insensitive
1300
+ # Enable ASCII-aware case-insensitive matching.
1301
+ # When this option is enabled, searching will be performed without respect
1302
+ # to case for ASCII letters (a-z and A-Z) only.
1303
+ #
1304
+ # @return [Series]
1305
+ #
1306
+ # @note
1307
+ # This method supports matching on string literals only, and does not support
1308
+ # regular expression matching.
1309
+ #
1310
+ # @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
1311
+ # s = Polars::Series.new(
1312
+ # "lyrics",
1313
+ # [
1314
+ # "Everybody wants to rule the world",
1315
+ # "Tell me what you want, what you really really want",
1316
+ # "Can you feel the love tonight"
1317
+ # ]
1318
+ # )
1319
+ # s.str.replace_many(["you", "me"], ["me", "you"])
1320
+ # # =>
1321
+ # # shape: (3,)
1322
+ # # Series: 'lyrics' [str]
1323
+ # # [
1324
+ # # "Everybody wants to rule the wo…
1325
+ # # "Tell you what me want, what me…
1326
+ # # "Can me feel the love tonight"
1327
+ # # ]
1328
+ #
1329
+ # @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
1330
+ # s = Polars::Series.new(
1331
+ # "lyrics",
1332
+ # [
1333
+ # "Everybody wants to rule the world",
1334
+ # "Tell me what you want, what you really really want",
1335
+ # "Can you feel the love tonight",
1336
+ # ]
1337
+ # )
1338
+ # s.str.replace_many(["me", "you", "they"], [""])
1339
+ # # =>
1340
+ # # shape: (3,)
1341
+ # # Series: 'lyrics' [str]
1342
+ # # [
1343
+ # # "Everybody wants to rule the wo…
1344
+ # # "Tell what want, what really…
1345
+ # # "Can feel the love tonight"
1346
+ # # ]
1347
+ #
1348
+ # @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
1349
+ # s = Polars::Series.new(
1350
+ # "lyrics",
1351
+ # [
1352
+ # "Everybody wants to rule the world",
1353
+ # "Tell me what you want, what you really really want",
1354
+ # "Can you feel the love tonight"
1355
+ # ]
1356
+ # )
1357
+ # mapping = {"me" => "you", "you" => "me", "want" => "need"}
1358
+ # s.str.replace_many(mapping)
1359
+ # # =>
1360
+ # # shape: (3,)
1361
+ # # Series: 'lyrics' [str]
1362
+ # # [
1363
+ # # "Everybody needs to rule the wo…
1364
+ # # "Tell you what me need, what me…
1365
+ # # "Can me feel the love tonight"
1366
+ # # ]
1367
+ def replace_many(
1368
+ patterns,
1369
+ replace_with = Expr::NO_DEFAULT,
1370
+ ascii_case_insensitive: false
1371
+ )
1372
+ super
1373
+ end
1374
+
1375
+ # Use the Aho-Corasick algorithm to extract many matches.
1376
+ #
1377
+ # @param patterns [Object]
1378
+ # String patterns to search.
1379
+ # @param ascii_case_insensitive [Boolean]
1380
+ # Enable ASCII-aware case-insensitive matching.
1381
+ # When this option is enabled, searching will be performed without respect
1382
+ # to case for ASCII letters (a-z and A-Z) only.
1383
+ # @param overlapping [Boolean]
1384
+ # Whether matches may overlap.
1385
+ #
1386
+ # @return [Series]
1387
+ #
1388
+ # @note
1389
+ # This method supports matching on string literals only, and does not support
1390
+ # regular expression matching.
1391
+ #
1392
+ # @example
1393
+ # s = Polars::Series.new("values", ["discontent"])
1394
+ # patterns = ["winter", "disco", "onte", "discontent"]
1395
+ # s.str.extract_many(patterns, overlapping: true)
1396
+ # # =>
1397
+ # # shape: (1,)
1398
+ # # Series: 'values' [list[str]]
1399
+ # # [
1400
+ # # ["disco", "onte", "discontent"]
1401
+ # # ]
1402
+ def extract_many(
1403
+ patterns,
1404
+ ascii_case_insensitive: false,
1405
+ overlapping: false
1406
+ )
1407
+ super
1408
+ end
1409
+
1410
+ # Use the Aho-Corasick algorithm to find all matches.
1411
+ #
1412
+ # The function returns the byte offset of the start of each match.
1413
+ # The return type will be `List<UInt32>`
1414
+ #
1415
+ # @param patterns [Object]
1416
+ # String patterns to search.
1417
+ # @param ascii_case_insensitive [Boolean]
1418
+ # Enable ASCII-aware case-insensitive matching.
1419
+ # When this option is enabled, searching will be performed without respect
1420
+ # to case for ASCII letters (a-z and A-Z) only.
1421
+ # @param overlapping [Boolean]
1422
+ # Whether matches may overlap.
1423
+ #
1424
+ # @return [Series]
1425
+ #
1426
+ # @note
1427
+ # This method supports matching on string literals only, and does not support
1428
+ # regular expression matching.
1429
+ #
1430
+ # @example
1431
+ # df = Polars::DataFrame.new({"values" => ["discontent"]})
1432
+ # patterns = ["winter", "disco", "onte", "discontent"]
1433
+ # df.with_columns(
1434
+ # Polars.col("values")
1435
+ # .str.extract_many(patterns, overlapping: false)
1436
+ # .alias("matches"),
1437
+ # Polars.col("values")
1438
+ # .str.extract_many(patterns, overlapping: true)
1439
+ # .alias("matches_overlapping")
1440
+ # )
1441
+ # # =>
1442
+ # # shape: (1, 3)
1443
+ # # ┌────────────┬───────────┬─────────────────────────────────┐
1444
+ # # │ values ┆ matches ┆ matches_overlapping │
1445
+ # # │ --- ┆ --- ┆ --- │
1446
+ # # │ str ┆ list[str] ┆ list[str] │
1447
+ # # ╞════════════╪═══════════╪═════════════════════════════════╡
1448
+ # # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
1449
+ # # └────────────┴───────────┴─────────────────────────────────┘
1450
+ #
1451
+ # @example
1452
+ # df = Polars::DataFrame.new(
1453
+ # {
1454
+ # "values" => ["discontent", "rhapsody"],
1455
+ # "patterns" => [
1456
+ # ["winter", "disco", "onte", "discontent"],
1457
+ # ["rhap", "ody", "coalesce"]
1458
+ # ]
1459
+ # }
1460
+ # )
1461
+ # df.select(Polars.col("values").str.find_many("patterns"))
1462
+ # # =>
1463
+ # # shape: (2, 1)
1464
+ # # ┌───────────┐
1465
+ # # │ values │
1466
+ # # │ --- │
1467
+ # # │ list[u32] │
1468
+ # # ╞═══════════╡
1469
+ # # │ [0] │
1470
+ # # │ [0, 5] │
1471
+ # # └───────────┘
1472
+ def find_many(
1473
+ patterns,
1474
+ ascii_case_insensitive: false,
1475
+ overlapping: false
1476
+ )
1477
+ super
1478
+ end
1479
+
1480
+ # Vertically concat the values in the Series to a single string value.
1481
+ #
1482
+ # @param delimiter [String]
1483
+ # The delimiter to insert between consecutive string values.
1484
+ # @param ignore_nulls [Boolean]
1485
+ # Ignore null values (default).
1486
+ # If set to `False`, null values will be propagated. This means that
1487
+ # if the column contains any null values, the output is null.
1488
+ #
1489
+ # @return [Series]
1490
+ #
1491
+ # @example
1492
+ # Polars::Series.new([1, nil, 2]).str.join("-")
1493
+ # # =>
1494
+ # # shape: (1,)
1495
+ # # Series: '' [str]
1496
+ # # [
1497
+ # # "1-2"
1498
+ # # ]
1499
+ #
1500
+ # @example
1501
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
1502
+ # # =>
1503
+ # # shape: (1,)
1504
+ # # Series: '' [str]
1505
+ # # [
1506
+ # # null
1507
+ # # ]
1508
+ def join(delimiter = "-", ignore_nulls: true)
1509
+ super
1510
+ end
1511
+ alias_method :concat, :join
1512
+
1513
+ # Returns string values with all regular expression meta characters escaped.
1514
+ #
1515
+ # @return [Series]
1516
+ #
1517
+ # @example
1518
+ # Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
1519
+ # # =>
1520
+ # # shape: (4,)
1521
+ # # Series: '' [str]
1522
+ # # [
1523
+ # # "abc"
1524
+ # # "def"
1525
+ # # null
1526
+ # # "abc\(\\w\+\)"
1527
+ # # ]
1528
+ def escape_regex
1529
+ super
1530
+ end
1531
+
1532
+ # Returns the Unicode normal form of the string values.
1533
+ #
1534
+ # This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
1535
+ #
1536
+ # @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
1537
+ # Unicode form to use.
1538
+ #
1539
+ # @return [Series]
1540
+ #
1541
+ # @example
1542
+ # s = Polars::Series.new(["01²", "KADOKAWA"])
1543
+ # s.str.normalize("NFC")
1544
+ # # =>
1545
+ # # shape: (2,)
1546
+ # # Series: '' [str]
1547
+ # # [
1548
+ # # "01²"
1549
+ # # "KADOKAWA"
1550
+ # # ]
1551
+ #
1552
+ # @example
1553
+ # s.str.normalize("NFKC")
1554
+ # # =>
1555
+ # # shape: (2,)
1556
+ # # Series: '' [str]
1557
+ # # [
1558
+ # # "012"
1559
+ # # "KADOKAWA"
1560
+ # # ]
1561
+ def normalize(form = "NFC")
1562
+ super
1563
+ end
891
1564
  end
892
1565
  end