polars-df 0.21.0-x86_64-linux-musl → 0.22.0-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/LICENSE-THIRD-PARTY.txt +23 -49
- data/README.md +12 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +794 -27
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +26 -5
- data/lib/polars/date_time_expr.rb +252 -1
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1248 -206
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +14 -1
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +32 -7
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +1028 -28
- data/lib/polars/list_expr.rb +217 -17
- data/lib/polars/list_name_space.rb +231 -22
- data/lib/polars/meta_expr.rb +89 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1053 -54
- data/lib/polars/string_expr.rb +436 -32
- data/lib/polars/string_name_space.rb +736 -50
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +22 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +8 -2
@@ -184,86 +184,81 @@ module Polars
|
|
184
184
|
super
|
185
185
|
end
|
186
186
|
|
187
|
-
#
|
187
|
+
# Convert a String column into a Decimal column.
|
188
188
|
#
|
189
|
-
#
|
189
|
+
# This method infers the needed parameters `precision` and `scale`.
|
190
190
|
#
|
191
|
-
# @
|
192
|
-
#
|
193
|
-
#
|
191
|
+
# @param inference_length [Integer]
|
192
|
+
# Number of elements to parse to determine the `precision` and `scale`
|
193
|
+
#
|
194
|
+
# @return [Series]
|
194
195
|
#
|
195
196
|
# @example
|
196
|
-
# s = Polars::Series.new(
|
197
|
-
#
|
197
|
+
# s = Polars::Series.new(
|
198
|
+
# ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
|
199
|
+
# )
|
200
|
+
# s.str.to_decimal
|
198
201
|
# # =>
|
199
|
-
# # shape: (
|
200
|
-
# # Series: '' [
|
202
|
+
# # shape: (7,)
|
203
|
+
# # Series: '' [decimal[*,2]]
|
201
204
|
# # [
|
202
|
-
# #
|
203
|
-
# #
|
204
|
-
# #
|
205
|
-
# #
|
205
|
+
# # 40.12
|
206
|
+
# # 3420.13
|
207
|
+
# # 120134.19
|
208
|
+
# # 3212.98
|
209
|
+
# # 12.90
|
210
|
+
# # 143.09
|
211
|
+
# # 143.90
|
206
212
|
# # ]
|
207
|
-
def
|
208
|
-
|
213
|
+
def to_decimal(inference_length = 100, scale: nil)
|
214
|
+
if !scale.nil?
|
215
|
+
raise Todo
|
216
|
+
end
|
217
|
+
|
218
|
+
Utils.wrap_s(_s.str_to_decimal_infer(inference_length))
|
209
219
|
end
|
210
220
|
|
211
|
-
#
|
221
|
+
# Return the length of each string as the number of bytes.
|
212
222
|
#
|
213
223
|
# @return [Series]
|
214
224
|
#
|
215
|
-
# @note
|
216
|
-
# If you know that you are working with ASCII text, `lengths` will be
|
217
|
-
# equivalent, and faster (returns length in terms of the number of bytes).
|
218
|
-
#
|
219
225
|
# @example
|
220
|
-
# s = Polars::Series.new(["Café",
|
221
|
-
# s.str.
|
226
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
227
|
+
# s.str.len_bytes
|
222
228
|
# # =>
|
223
229
|
# # shape: (4,)
|
224
230
|
# # Series: '' [u32]
|
225
231
|
# # [
|
226
|
-
# #
|
227
|
-
# # null
|
232
|
+
# # 5
|
228
233
|
# # 3
|
229
|
-
# #
|
234
|
+
# # 6
|
235
|
+
# # null
|
230
236
|
# # ]
|
231
|
-
def
|
237
|
+
def len_bytes
|
232
238
|
super
|
233
239
|
end
|
240
|
+
alias_method :lengths, :len_bytes
|
234
241
|
|
235
|
-
#
|
236
|
-
#
|
237
|
-
# @param delimiter [String]
|
238
|
-
# The delimiter to insert between consecutive string values.
|
239
|
-
# @param ignore_nulls [Boolean]
|
240
|
-
# Ignore null values (default).
|
241
|
-
# If set to `False`, null values will be propagated. This means that
|
242
|
-
# if the column contains any null values, the output is null.
|
242
|
+
# Return the length of each string as the number of characters.
|
243
243
|
#
|
244
244
|
# @return [Series]
|
245
245
|
#
|
246
246
|
# @example
|
247
|
-
# Polars::Series.new([
|
248
|
-
#
|
249
|
-
# # shape: (1,)
|
250
|
-
# # Series: '' [str]
|
251
|
-
# # [
|
252
|
-
# # "1-2"
|
253
|
-
# # ]
|
254
|
-
#
|
255
|
-
# @example
|
256
|
-
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
247
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
248
|
+
# s.str.len_chars
|
257
249
|
# # =>
|
258
|
-
# # shape: (
|
259
|
-
# # Series: '' [
|
250
|
+
# # shape: (4,)
|
251
|
+
# # Series: '' [u32]
|
260
252
|
# # [
|
253
|
+
# # 4
|
254
|
+
# # 3
|
255
|
+
# # 2
|
261
256
|
# # null
|
262
257
|
# # ]
|
263
|
-
def
|
258
|
+
def len_chars
|
264
259
|
super
|
265
260
|
end
|
266
|
-
alias_method :
|
261
|
+
alias_method :n_chars, :len_chars
|
267
262
|
|
268
263
|
# Check if strings in Series contain a substring that matches a regex.
|
269
264
|
#
|
@@ -302,6 +297,65 @@ module Polars
|
|
302
297
|
super
|
303
298
|
end
|
304
299
|
|
300
|
+
# Return the bytes offset of the first substring matching a pattern.
|
301
|
+
#
|
302
|
+
# If the pattern is not found, returns nil.
|
303
|
+
#
|
304
|
+
# @param pattern
|
305
|
+
# A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
306
|
+
# @param literal
|
307
|
+
# Treat `pattern` as a literal string, not as a regular expression.
|
308
|
+
# @param strict
|
309
|
+
# Raise an error if the underlying pattern is not a valid regex,
|
310
|
+
# otherwise mask out with a null value.
|
311
|
+
#
|
312
|
+
# @return [Series]
|
313
|
+
#
|
314
|
+
# @note
|
315
|
+
# To modify regular expression behaviour (such as case-sensitivity) with
|
316
|
+
# flags, use the inline `(?iLmsuxU)` syntax.
|
317
|
+
#
|
318
|
+
# @example Find the index of the first substring matching a regex pattern:
|
319
|
+
# s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
|
320
|
+
# s.str.find("a|e").rename("idx_rx")
|
321
|
+
# # =>
|
322
|
+
# # shape: (4,)
|
323
|
+
# # Series: 'idx_rx' [u32]
|
324
|
+
# # [
|
325
|
+
# # 2
|
326
|
+
# # 5
|
327
|
+
# # null
|
328
|
+
# # 5
|
329
|
+
# # ]
|
330
|
+
#
|
331
|
+
# @example Find the index of the first substring matching a literal pattern:
|
332
|
+
# s.str.find("e", literal: true).rename("idx_lit")
|
333
|
+
# # =>
|
334
|
+
# # shape: (4,)
|
335
|
+
# # Series: 'idx_lit' [u32]
|
336
|
+
# # [
|
337
|
+
# # null
|
338
|
+
# # 5
|
339
|
+
# # null
|
340
|
+
# # 7
|
341
|
+
# # ]
|
342
|
+
#
|
343
|
+
# @example Match against a pattern found in another column or (expression):
|
344
|
+
# p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
|
345
|
+
# s.str.find(p).rename("idx")
|
346
|
+
# # =>
|
347
|
+
# # shape: (4,)
|
348
|
+
# # Series: 'idx' [u32]
|
349
|
+
# # [
|
350
|
+
# # 2
|
351
|
+
# # 2
|
352
|
+
# # null
|
353
|
+
# # 5
|
354
|
+
# # ]
|
355
|
+
def find(pattern, literal: false, strict: true)
|
356
|
+
super
|
357
|
+
end
|
358
|
+
|
305
359
|
# Check if string values end with a substring.
|
306
360
|
#
|
307
361
|
# @param sub [String]
|
@@ -395,6 +449,43 @@ module Polars
|
|
395
449
|
super
|
396
450
|
end
|
397
451
|
|
452
|
+
# Parse string values as JSON.
|
453
|
+
#
|
454
|
+
# Throws an error if invalid JSON strings are encountered.
|
455
|
+
#
|
456
|
+
# @param dtype [Object]
|
457
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
458
|
+
# inferred from the JSON value.
|
459
|
+
# @param infer_schema_length [Integer]
|
460
|
+
# The maximum number of rows to scan for schema inference.
|
461
|
+
# If set to `nil`, the full data may be scanned *(this is slow)*.
|
462
|
+
#
|
463
|
+
# @return [Series]
|
464
|
+
#
|
465
|
+
# @example
|
466
|
+
# s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
|
467
|
+
# s.str.json_decode
|
468
|
+
# # =>
|
469
|
+
# # shape: (3,)
|
470
|
+
# # Series: 'json' [struct[2]]
|
471
|
+
# # [
|
472
|
+
# # {1,true}
|
473
|
+
# # null
|
474
|
+
# # {2,false}
|
475
|
+
# # ]
|
476
|
+
def json_decode(dtype = nil, infer_schema_length: 100)
|
477
|
+
if !dtype.nil?
|
478
|
+
s = Utils.wrap_s(_s)
|
479
|
+
return (
|
480
|
+
s.to_frame
|
481
|
+
.select_seq(F.col(s.name).str.json_decode(dtype))
|
482
|
+
.to_series
|
483
|
+
)
|
484
|
+
end
|
485
|
+
|
486
|
+
Utils.wrap_s(_s.str_json_decode(infer_schema_length))
|
487
|
+
end
|
488
|
+
|
398
489
|
# Extract the first match of json string with provided JSONPath expression.
|
399
490
|
#
|
400
491
|
# Throw errors if encounter invalid json strings.
|
@@ -479,6 +570,39 @@ module Polars
|
|
479
570
|
super
|
480
571
|
end
|
481
572
|
|
573
|
+
# Extract all capture groups for the given regex pattern.
|
574
|
+
#
|
575
|
+
# @param pattern [String]
|
576
|
+
# A valid regular expression pattern containing at least one capture group,
|
577
|
+
# compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
578
|
+
#
|
579
|
+
# @return [Series]
|
580
|
+
#
|
581
|
+
# @note
|
582
|
+
# All group names are **strings**.
|
583
|
+
#
|
584
|
+
# @example
|
585
|
+
# s = Polars::Series.new(
|
586
|
+
# "url",
|
587
|
+
# [
|
588
|
+
# "http://vote.com/ballon_dor?candidate=messi&ref=python",
|
589
|
+
# "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
|
590
|
+
# "http://vote.com/ballon_dor?error=404&ref=rust"
|
591
|
+
# ]
|
592
|
+
# )
|
593
|
+
# s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
|
594
|
+
# # =>
|
595
|
+
# # shape: (3,)
|
596
|
+
# # Series: 'url' [struct[2]]
|
597
|
+
# # [
|
598
|
+
# # {"messi","python"}
|
599
|
+
# # {"weghorst","polars"}
|
600
|
+
# # {null,null}
|
601
|
+
# # ]
|
602
|
+
def extract_groups(pattern)
|
603
|
+
super
|
604
|
+
end
|
605
|
+
|
482
606
|
# Count all successive non-overlapping regex matches.
|
483
607
|
#
|
484
608
|
# @param pattern [String]
|
@@ -488,7 +612,7 @@ module Polars
|
|
488
612
|
#
|
489
613
|
# @example
|
490
614
|
# s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
|
491
|
-
# s.str.
|
615
|
+
# s.str.count_matches('\d')
|
492
616
|
# # =>
|
493
617
|
# # shape: (2,)
|
494
618
|
# # Series: 'foo' [u32]
|
@@ -496,9 +620,10 @@ module Polars
|
|
496
620
|
# # 5
|
497
621
|
# # 6
|
498
622
|
# # ]
|
499
|
-
def
|
623
|
+
def count_matches(pattern)
|
500
624
|
super
|
501
625
|
end
|
626
|
+
alias_method :count_match, :count_matches
|
502
627
|
|
503
628
|
# Split the string by a substring.
|
504
629
|
#
|
@@ -728,6 +853,108 @@ module Polars
|
|
728
853
|
end
|
729
854
|
alias_method :rstrip, :strip_chars_end
|
730
855
|
|
856
|
+
# Remove prefix.
|
857
|
+
#
|
858
|
+
# The prefix will be removed from the string exactly once, if found.
|
859
|
+
#
|
860
|
+
# @param prefix [String]
|
861
|
+
# The prefix to be removed.
|
862
|
+
#
|
863
|
+
# @return [Series]
|
864
|
+
#
|
865
|
+
# @example
|
866
|
+
# s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
|
867
|
+
# s.str.strip_prefix("foo")
|
868
|
+
# # =>
|
869
|
+
# # shape: (4,)
|
870
|
+
# # Series: '' [str]
|
871
|
+
# # [
|
872
|
+
# # "bar"
|
873
|
+
# # "foobar"
|
874
|
+
# # ""
|
875
|
+
# # "bar"
|
876
|
+
# # ]
|
877
|
+
def strip_prefix(prefix)
|
878
|
+
super
|
879
|
+
end
|
880
|
+
|
881
|
+
# Remove suffix.
|
882
|
+
#
|
883
|
+
# The suffix will be removed from the string exactly once, if found.
|
884
|
+
#
|
885
|
+
# @param suffix [String]
|
886
|
+
# The suffix to be removed.
|
887
|
+
#
|
888
|
+
# @return [Series]
|
889
|
+
#
|
890
|
+
# @example
|
891
|
+
# s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
|
892
|
+
# s.str.strip_suffix("bar")
|
893
|
+
# # =>
|
894
|
+
# # shape: (4,)
|
895
|
+
# # Series: '' [str]
|
896
|
+
# # [
|
897
|
+
# # "foo"
|
898
|
+
# # "foobar"
|
899
|
+
# # "foo"
|
900
|
+
# # ""
|
901
|
+
# # ]
|
902
|
+
def strip_suffix(suffix)
|
903
|
+
super
|
904
|
+
end
|
905
|
+
|
906
|
+
# Pad the start of the string until it reaches the given length.
|
907
|
+
#
|
908
|
+
# @param length [Integer]
|
909
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
910
|
+
# greater than this value are returned as-is.
|
911
|
+
# @param fill_char [String]
|
912
|
+
# The character to pad the string with.
|
913
|
+
#
|
914
|
+
# @return [Series]
|
915
|
+
#
|
916
|
+
# @example
|
917
|
+
# s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
|
918
|
+
# s.str.pad_start(8, "*")
|
919
|
+
# # =>
|
920
|
+
# # shape: (4,)
|
921
|
+
# # Series: 'a' [str]
|
922
|
+
# # [
|
923
|
+
# # "*****cow"
|
924
|
+
# # "**monkey"
|
925
|
+
# # "hippopotamus"
|
926
|
+
# # null
|
927
|
+
# # ]
|
928
|
+
def pad_start(length, fill_char = " ")
|
929
|
+
super
|
930
|
+
end
|
931
|
+
|
932
|
+
# Pad the end of the string until it reaches the given length.
|
933
|
+
#
|
934
|
+
# @param length [Integer]
|
935
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
936
|
+
# greater than this value are returned as-is.
|
937
|
+
# @param fill_char [String]
|
938
|
+
# The character to pad the string with.
|
939
|
+
#
|
940
|
+
# @return [Series]
|
941
|
+
#
|
942
|
+
# @example
|
943
|
+
# s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
|
944
|
+
# s.str.pad_end(8, "*")
|
945
|
+
# # =>
|
946
|
+
# # shape: (4,)
|
947
|
+
# # Series: '' [str]
|
948
|
+
# # [
|
949
|
+
# # "cow*****"
|
950
|
+
# # "monkey**"
|
951
|
+
# # "hippopotamus"
|
952
|
+
# # null
|
953
|
+
# # ]
|
954
|
+
def pad_end(length, fill_char = " ")
|
955
|
+
super
|
956
|
+
end
|
957
|
+
|
731
958
|
# Fills the string with zeroes.
|
732
959
|
#
|
733
960
|
# Return a copy of the string left filled with ASCII '0' digits to make a string
|
@@ -850,6 +1077,25 @@ module Polars
|
|
850
1077
|
super
|
851
1078
|
end
|
852
1079
|
|
1080
|
+
# Returns string values in reversed order.
|
1081
|
+
#
|
1082
|
+
# @return [Series]
|
1083
|
+
#
|
1084
|
+
# @example
|
1085
|
+
# s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
|
1086
|
+
# s.str.reverse
|
1087
|
+
# # =>
|
1088
|
+
# # shape: (3,)
|
1089
|
+
# # Series: 'text' [str]
|
1090
|
+
# # [
|
1091
|
+
# # "oof"
|
1092
|
+
# # "rab"
|
1093
|
+
# # "anañam"
|
1094
|
+
# # ]
|
1095
|
+
def reverse
|
1096
|
+
super
|
1097
|
+
end
|
1098
|
+
|
853
1099
|
# Create subslices of the string values of a Utf8 Series.
|
854
1100
|
#
|
855
1101
|
# @param offset [Integer]
|
@@ -888,5 +1134,445 @@ module Polars
|
|
888
1134
|
s = Utils.wrap_s(_s)
|
889
1135
|
s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
|
890
1136
|
end
|
1137
|
+
|
1138
|
+
# Return the first n characters of each string in a String Series.
|
1139
|
+
#
|
1140
|
+
# @param n [Object]
|
1141
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1142
|
+
# see note (2) below.
|
1143
|
+
#
|
1144
|
+
# @return [Series]
|
1145
|
+
#
|
1146
|
+
# @example Return up to the first 5 characters.
|
1147
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1148
|
+
# s.str.head(5)
|
1149
|
+
# # =>
|
1150
|
+
# # shape: (4,)
|
1151
|
+
# # Series: '' [str]
|
1152
|
+
# # [
|
1153
|
+
# # "pear"
|
1154
|
+
# # null
|
1155
|
+
# # "papay"
|
1156
|
+
# # "drago"
|
1157
|
+
# # ]
|
1158
|
+
#
|
1159
|
+
# @example Return up to the 3rd character from the end.
|
1160
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1161
|
+
# s.str.head(-3)
|
1162
|
+
# # =>
|
1163
|
+
# # shape: (4,)
|
1164
|
+
# # Series: '' [str]
|
1165
|
+
# # [
|
1166
|
+
# # "p"
|
1167
|
+
# # null
|
1168
|
+
# # "pap"
|
1169
|
+
# # "dragonfr"
|
1170
|
+
# # ]
|
1171
|
+
def head(n)
|
1172
|
+
super
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
# Return the last n characters of each string in a String Series.
|
1176
|
+
#
|
1177
|
+
# @param n [Object]
|
1178
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1179
|
+
# see note (2) below.
|
1180
|
+
#
|
1181
|
+
# @return [Series]
|
1182
|
+
#
|
1183
|
+
# @example Return up to the last 5 characters:
|
1184
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1185
|
+
# s.str.tail(5)
|
1186
|
+
# # =>
|
1187
|
+
# # shape: (4,)
|
1188
|
+
# # Series: '' [str]
|
1189
|
+
# # [
|
1190
|
+
# # "pear"
|
1191
|
+
# # null
|
1192
|
+
# # "apaya"
|
1193
|
+
# # "fruit"
|
1194
|
+
# # ]
|
1195
|
+
#
|
1196
|
+
# @example Return from the 3rd character to the end:
|
1197
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1198
|
+
# s.str.tail(-3)
|
1199
|
+
# # =>
|
1200
|
+
# # shape: (4,)
|
1201
|
+
# # Series: '' [str]
|
1202
|
+
# # [
|
1203
|
+
# # "r"
|
1204
|
+
# # null
|
1205
|
+
# # "aya"
|
1206
|
+
# # "gonfruit"
|
1207
|
+
# # ]
|
1208
|
+
def tail(n)
|
1209
|
+
super
|
1210
|
+
end
|
1211
|
+
|
1212
|
+
# Convert an String column into a column of dtype with base radix.
|
1213
|
+
#
|
1214
|
+
# @param base [Integer]
|
1215
|
+
# Positive integer or expression which is the base of the string
|
1216
|
+
# we are parsing.
|
1217
|
+
# Default: 10.
|
1218
|
+
# @param dtype [Object]
|
1219
|
+
# Polars integer type to cast to.
|
1220
|
+
# Default: `Int64`.
|
1221
|
+
# @param strict [Object]
|
1222
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
1223
|
+
# false silently convert to Null.
|
1224
|
+
#
|
1225
|
+
# @return [Series]
|
1226
|
+
#
|
1227
|
+
# @example
|
1228
|
+
# s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
|
1229
|
+
# s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
|
1230
|
+
# # =>
|
1231
|
+
# # shape: (4,)
|
1232
|
+
# # Series: 'bin' [i32]
|
1233
|
+
# # [
|
1234
|
+
# # 6
|
1235
|
+
# # 5
|
1236
|
+
# # 2
|
1237
|
+
# # null
|
1238
|
+
# # ]
|
1239
|
+
#
|
1240
|
+
# @example
|
1241
|
+
# s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
|
1242
|
+
# s.str.to_integer(base: 16)
|
1243
|
+
# # =>
|
1244
|
+
# # shape: (4,)
|
1245
|
+
# # Series: 'hex' [i64]
|
1246
|
+
# # [
|
1247
|
+
# # 64030
|
1248
|
+
# # 65280
|
1249
|
+
# # 51966
|
1250
|
+
# # null
|
1251
|
+
# # ]
|
1252
|
+
def to_integer(
|
1253
|
+
base: 10,
|
1254
|
+
dtype: Int64,
|
1255
|
+
strict: true
|
1256
|
+
)
|
1257
|
+
super
|
1258
|
+
end
|
1259
|
+
|
1260
|
+
# Use the Aho-Corasick algorithm to find matches.
|
1261
|
+
#
|
1262
|
+
# Determines if any of the patterns are contained in the string.
|
1263
|
+
#
|
1264
|
+
# @param patterns [Object]
|
1265
|
+
# String patterns to search.
|
1266
|
+
# @param ascii_case_insensitive [Boolean]
|
1267
|
+
# Enable ASCII-aware case-insensitive matching.
|
1268
|
+
# When this option is enabled, searching will be performed without respect
|
1269
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1270
|
+
#
|
1271
|
+
# @return [Series]
|
1272
|
+
#
|
1273
|
+
# @note
|
1274
|
+
# This method supports matching on string literals only, and does not support
|
1275
|
+
# regular expression matching.
|
1276
|
+
#
|
1277
|
+
# @example
|
1278
|
+
# s = Polars::Series.new(
|
1279
|
+
# "lyrics",
|
1280
|
+
# [
|
1281
|
+
# "Everybody wants to rule the world",
|
1282
|
+
# "Tell me what you want, what you really really want",
|
1283
|
+
# "Can you feel the love tonight"
|
1284
|
+
# ]
|
1285
|
+
# )
|
1286
|
+
# s.str.contains_any(["you", "me"])
|
1287
|
+
# # =>
|
1288
|
+
# # shape: (3,)
|
1289
|
+
# # Series: 'lyrics' [bool]
|
1290
|
+
# # [
|
1291
|
+
# # false
|
1292
|
+
# # true
|
1293
|
+
# # true
|
1294
|
+
# # ]
|
1295
|
+
def contains_any(
|
1296
|
+
patterns,
|
1297
|
+
ascii_case_insensitive: false
|
1298
|
+
)
|
1299
|
+
super
|
1300
|
+
end
|
1301
|
+
|
1302
|
+
# Use the Aho-Corasick algorithm to replace many matches.
|
1303
|
+
#
|
1304
|
+
# @param patterns
|
1305
|
+
# String patterns to search and replace.
|
1306
|
+
# Also accepts a mapping of patterns to their replacement as syntactic sugar
|
1307
|
+
# for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
|
1308
|
+
# @param replace_with
|
1309
|
+
# Strings to replace where a pattern was a match.
|
1310
|
+
# Length must match the length of `patterns` or have length 1. This can be
|
1311
|
+
# broadcasted, so it supports many:one and many:many.
|
1312
|
+
# @param ascii_case_insensitive
|
1313
|
+
# Enable ASCII-aware case-insensitive matching.
|
1314
|
+
# When this option is enabled, searching will be performed without respect
|
1315
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1316
|
+
#
|
1317
|
+
# @return [Series]
|
1318
|
+
#
|
1319
|
+
# @note
|
1320
|
+
# This method supports matching on string literals only, and does not support
|
1321
|
+
# regular expression matching.
|
1322
|
+
#
|
1323
|
+
# @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
|
1324
|
+
# s = Polars::Series.new(
|
1325
|
+
# "lyrics",
|
1326
|
+
# [
|
1327
|
+
# "Everybody wants to rule the world",
|
1328
|
+
# "Tell me what you want, what you really really want",
|
1329
|
+
# "Can you feel the love tonight"
|
1330
|
+
# ]
|
1331
|
+
# )
|
1332
|
+
# s.str.replace_many(["you", "me"], ["me", "you"])
|
1333
|
+
# # =>
|
1334
|
+
# # shape: (3,)
|
1335
|
+
# # Series: 'lyrics' [str]
|
1336
|
+
# # [
|
1337
|
+
# # "Everybody wants to rule the wo…
|
1338
|
+
# # "Tell you what me want, what me…
|
1339
|
+
# # "Can me feel the love tonight"
|
1340
|
+
# # ]
|
1341
|
+
#
|
1342
|
+
# @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
|
1343
|
+
# s = Polars::Series.new(
|
1344
|
+
# "lyrics",
|
1345
|
+
# [
|
1346
|
+
# "Everybody wants to rule the world",
|
1347
|
+
# "Tell me what you want, what you really really want",
|
1348
|
+
# "Can you feel the love tonight",
|
1349
|
+
# ]
|
1350
|
+
# )
|
1351
|
+
# s.str.replace_many(["me", "you", "they"], [""])
|
1352
|
+
# # =>
|
1353
|
+
# # shape: (3,)
|
1354
|
+
# # Series: 'lyrics' [str]
|
1355
|
+
# # [
|
1356
|
+
# # "Everybody wants to rule the wo…
|
1357
|
+
# # "Tell what want, what really…
|
1358
|
+
# # "Can feel the love tonight"
|
1359
|
+
# # ]
|
1360
|
+
#
|
1361
|
+
# @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
|
1362
|
+
# s = Polars::Series.new(
|
1363
|
+
# "lyrics",
|
1364
|
+
# [
|
1365
|
+
# "Everybody wants to rule the world",
|
1366
|
+
# "Tell me what you want, what you really really want",
|
1367
|
+
# "Can you feel the love tonight"
|
1368
|
+
# ]
|
1369
|
+
# )
|
1370
|
+
# mapping = {"me" => "you", "you" => "me", "want" => "need"}
|
1371
|
+
# s.str.replace_many(mapping)
|
1372
|
+
# # =>
|
1373
|
+
# # shape: (3,)
|
1374
|
+
# # Series: 'lyrics' [str]
|
1375
|
+
# # [
|
1376
|
+
# # "Everybody needs to rule the wo…
|
1377
|
+
# # "Tell you what me need, what me…
|
1378
|
+
# # "Can me feel the love tonight"
|
1379
|
+
# # ]
|
1380
|
+
def replace_many(
|
1381
|
+
patterns,
|
1382
|
+
replace_with = Expr::NO_DEFAULT,
|
1383
|
+
ascii_case_insensitive: false
|
1384
|
+
)
|
1385
|
+
super
|
1386
|
+
end
|
1387
|
+
|
1388
|
+
# Use the Aho-Corasick algorithm to extract many matches.
|
1389
|
+
#
|
1390
|
+
# @param patterns [Object]
|
1391
|
+
# String patterns to search.
|
1392
|
+
# @param ascii_case_insensitive [Boolean]
|
1393
|
+
# Enable ASCII-aware case-insensitive matching.
|
1394
|
+
# When this option is enabled, searching will be performed without respect
|
1395
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1396
|
+
# @param overlapping [Boolean]
|
1397
|
+
# Whether matches may overlap.
|
1398
|
+
#
|
1399
|
+
# @return [Series]
|
1400
|
+
#
|
1401
|
+
# @note
|
1402
|
+
# This method supports matching on string literals only, and does not support
|
1403
|
+
# regular expression matching.
|
1404
|
+
#
|
1405
|
+
# @example
|
1406
|
+
# s = Polars::Series.new("values", ["discontent"])
|
1407
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1408
|
+
# s.str.extract_many(patterns, overlapping: true)
|
1409
|
+
# # =>
|
1410
|
+
# # shape: (1,)
|
1411
|
+
# # Series: 'values' [list[str]]
|
1412
|
+
# # [
|
1413
|
+
# # ["disco", "onte", "discontent"]
|
1414
|
+
# # ]
|
1415
|
+
def extract_many(
|
1416
|
+
patterns,
|
1417
|
+
ascii_case_insensitive: false,
|
1418
|
+
overlapping: false
|
1419
|
+
)
|
1420
|
+
super
|
1421
|
+
end
|
1422
|
+
|
1423
|
+
# Use the Aho-Corasick algorithm to find all matches.
|
1424
|
+
#
|
1425
|
+
# The function returns the byte offset of the start of each match.
|
1426
|
+
# The return type will be `List<UInt32>`
|
1427
|
+
#
|
1428
|
+
# @param patterns [Object]
|
1429
|
+
# String patterns to search.
|
1430
|
+
# @param ascii_case_insensitive [Boolean]
|
1431
|
+
# Enable ASCII-aware case-insensitive matching.
|
1432
|
+
# When this option is enabled, searching will be performed without respect
|
1433
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1434
|
+
# @param overlapping [Boolean]
|
1435
|
+
# Whether matches may overlap.
|
1436
|
+
#
|
1437
|
+
# @return [Series]
|
1438
|
+
#
|
1439
|
+
# @note
|
1440
|
+
# This method supports matching on string literals only, and does not support
|
1441
|
+
# regular expression matching.
|
1442
|
+
#
|
1443
|
+
# @example
|
1444
|
+
# df = Polars::DataFrame.new({"values" => ["discontent"]})
|
1445
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1446
|
+
# df.with_columns(
|
1447
|
+
# Polars.col("values")
|
1448
|
+
# .str.extract_many(patterns, overlapping: false)
|
1449
|
+
# .alias("matches"),
|
1450
|
+
# Polars.col("values")
|
1451
|
+
# .str.extract_many(patterns, overlapping: true)
|
1452
|
+
# .alias("matches_overlapping")
|
1453
|
+
# )
|
1454
|
+
# # =>
|
1455
|
+
# # shape: (1, 3)
|
1456
|
+
# # ┌────────────┬───────────┬─────────────────────────────────┐
|
1457
|
+
# # │ values ┆ matches ┆ matches_overlapping │
|
1458
|
+
# # │ --- ┆ --- ┆ --- │
|
1459
|
+
# # │ str ┆ list[str] ┆ list[str] │
|
1460
|
+
# # ╞════════════╪═══════════╪═════════════════════════════════╡
|
1461
|
+
# # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
|
1462
|
+
# # └────────────┴───────────┴─────────────────────────────────┘
|
1463
|
+
#
|
1464
|
+
# @example
|
1465
|
+
# df = Polars::DataFrame.new(
|
1466
|
+
# {
|
1467
|
+
# "values" => ["discontent", "rhapsody"],
|
1468
|
+
# "patterns" => [
|
1469
|
+
# ["winter", "disco", "onte", "discontent"],
|
1470
|
+
# ["rhap", "ody", "coalesce"]
|
1471
|
+
# ]
|
1472
|
+
# }
|
1473
|
+
# )
|
1474
|
+
# df.select(Polars.col("values").str.find_many("patterns"))
|
1475
|
+
# # =>
|
1476
|
+
# # shape: (2, 1)
|
1477
|
+
# # ┌───────────┐
|
1478
|
+
# # │ values │
|
1479
|
+
# # │ --- │
|
1480
|
+
# # │ list[u32] │
|
1481
|
+
# # ╞═══════════╡
|
1482
|
+
# # │ [0] │
|
1483
|
+
# # │ [0, 5] │
|
1484
|
+
# # └───────────┘
|
1485
|
+
def find_many(
|
1486
|
+
patterns,
|
1487
|
+
ascii_case_insensitive: false,
|
1488
|
+
overlapping: false
|
1489
|
+
)
|
1490
|
+
super
|
1491
|
+
end
|
1492
|
+
|
1493
|
+
# Vertically concat the values in the Series to a single string value.
|
1494
|
+
#
|
1495
|
+
# @param delimiter [String]
|
1496
|
+
# The delimiter to insert between consecutive string values.
|
1497
|
+
# @param ignore_nulls [Boolean]
|
1498
|
+
# Ignore null values (default).
|
1499
|
+
# If set to `False`, null values will be propagated. This means that
|
1500
|
+
# if the column contains any null values, the output is null.
|
1501
|
+
#
|
1502
|
+
# @return [Series]
|
1503
|
+
#
|
1504
|
+
# @example
|
1505
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
1506
|
+
# # =>
|
1507
|
+
# # shape: (1,)
|
1508
|
+
# # Series: '' [str]
|
1509
|
+
# # [
|
1510
|
+
# # "1-2"
|
1511
|
+
# # ]
|
1512
|
+
#
|
1513
|
+
# @example
|
1514
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
1515
|
+
# # =>
|
1516
|
+
# # shape: (1,)
|
1517
|
+
# # Series: '' [str]
|
1518
|
+
# # [
|
1519
|
+
# # null
|
1520
|
+
# # ]
|
1521
|
+
def join(delimiter = "-", ignore_nulls: true)
|
1522
|
+
super
|
1523
|
+
end
|
1524
|
+
alias_method :concat, :join
|
1525
|
+
|
1526
|
+
# Returns string values with all regular expression meta characters escaped.
|
1527
|
+
#
|
1528
|
+
# @return [Series]
|
1529
|
+
#
|
1530
|
+
# @example
|
1531
|
+
# Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
|
1532
|
+
# # =>
|
1533
|
+
# # shape: (4,)
|
1534
|
+
# # Series: '' [str]
|
1535
|
+
# # [
|
1536
|
+
# # "abc"
|
1537
|
+
# # "def"
|
1538
|
+
# # null
|
1539
|
+
# # "abc\(\\w\+\)"
|
1540
|
+
# # ]
|
1541
|
+
def escape_regex
|
1542
|
+
super
|
1543
|
+
end
|
1544
|
+
|
1545
|
+
# Returns the Unicode normal form of the string values.
|
1546
|
+
#
|
1547
|
+
# This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
|
1548
|
+
#
|
1549
|
+
# @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
|
1550
|
+
# Unicode form to use.
|
1551
|
+
#
|
1552
|
+
# @return [Series]
|
1553
|
+
#
|
1554
|
+
# @example
|
1555
|
+
# s = Polars::Series.new(["01²", "KADOKAWA"])
|
1556
|
+
# s.str.normalize("NFC")
|
1557
|
+
# # =>
|
1558
|
+
# # shape: (2,)
|
1559
|
+
# # Series: '' [str]
|
1560
|
+
# # [
|
1561
|
+
# # "01²"
|
1562
|
+
# # "KADOKAWA"
|
1563
|
+
# # ]
|
1564
|
+
#
|
1565
|
+
# @example
|
1566
|
+
# s.str.normalize("NFKC")
|
1567
|
+
# # =>
|
1568
|
+
# # shape: (2,)
|
1569
|
+
# # Series: '' [str]
|
1570
|
+
# # [
|
1571
|
+
# # "012"
|
1572
|
+
# # "KADOKAWA"
|
1573
|
+
# # ]
|
1574
|
+
def normalize(form = "NFC")
|
1575
|
+
super
|
1576
|
+
end
|
891
1577
|
end
|
892
1578
|
end
|