polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +2153 -2532
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +12 -2
@@ -63,6 +63,13 @@ module Polars
|
|
63
63
|
# in the target string.
|
64
64
|
# @param cache [Boolean]
|
65
65
|
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
# @param ambiguous ['raise', 'earliest', 'latest', 'null']
|
67
|
+
# Determine how to deal with ambiguous datetimes:
|
68
|
+
#
|
69
|
+
# - `'raise'` (default): raise
|
70
|
+
# - `'earliest'`: use the earliest datetime
|
71
|
+
# - `'latest'`: use the latest datetime
|
72
|
+
# - `'null'`: set to null
|
66
73
|
#
|
67
74
|
# @return [Series]
|
68
75
|
#
|
@@ -177,82 +184,77 @@ module Polars
|
|
177
184
|
super
|
178
185
|
end
|
179
186
|
|
180
|
-
#
|
187
|
+
# Convert a String column into a Decimal column.
|
181
188
|
#
|
182
|
-
#
|
189
|
+
# This method infers the needed parameters `precision` and `scale`.
|
183
190
|
#
|
184
|
-
# @
|
185
|
-
#
|
186
|
-
#
|
191
|
+
# @param inference_length [Integer]
|
192
|
+
# Number of elements to parse to determine the `precision` and `scale`
|
193
|
+
#
|
194
|
+
# @return [Series]
|
187
195
|
#
|
188
196
|
# @example
|
189
|
-
# s = Polars::Series.new(
|
190
|
-
#
|
197
|
+
# s = Polars::Series.new(
|
198
|
+
# ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
|
199
|
+
# )
|
200
|
+
# s.str.to_decimal
|
191
201
|
# # =>
|
192
|
-
# # shape: (
|
193
|
-
# # Series: '' [
|
202
|
+
# # shape: (7,)
|
203
|
+
# # Series: '' [decimal[*,2]]
|
194
204
|
# # [
|
195
|
-
# #
|
196
|
-
# #
|
197
|
-
# #
|
198
|
-
# #
|
205
|
+
# # 40.12
|
206
|
+
# # 3420.13
|
207
|
+
# # 120134.19
|
208
|
+
# # 3212.98
|
209
|
+
# # 12.90
|
210
|
+
# # 143.09
|
211
|
+
# # 143.90
|
199
212
|
# # ]
|
200
|
-
def
|
213
|
+
def to_decimal(inference_length = 100)
|
201
214
|
super
|
202
215
|
end
|
203
216
|
|
204
|
-
#
|
217
|
+
# Return the length of each string as the number of bytes.
|
205
218
|
#
|
206
219
|
# @return [Series]
|
207
220
|
#
|
208
|
-
# @note
|
209
|
-
# If you know that you are working with ASCII text, `lengths` will be
|
210
|
-
# equivalent, and faster (returns length in terms of the number of bytes).
|
211
|
-
#
|
212
221
|
# @example
|
213
|
-
# s = Polars::Series.new(["Café",
|
214
|
-
# s.str.
|
222
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
223
|
+
# s.str.len_bytes
|
215
224
|
# # =>
|
216
225
|
# # shape: (4,)
|
217
226
|
# # Series: '' [u32]
|
218
227
|
# # [
|
219
|
-
# #
|
220
|
-
# # null
|
228
|
+
# # 5
|
221
229
|
# # 3
|
222
|
-
# #
|
230
|
+
# # 6
|
231
|
+
# # null
|
223
232
|
# # ]
|
224
|
-
def
|
233
|
+
def len_bytes
|
225
234
|
super
|
226
235
|
end
|
236
|
+
alias_method :lengths, :len_bytes
|
227
237
|
|
228
|
-
#
|
229
|
-
#
|
230
|
-
# @param delimiter [String]
|
231
|
-
# The delimiter to insert between consecutive string values.
|
238
|
+
# Return the length of each string as the number of characters.
|
232
239
|
#
|
233
240
|
# @return [Series]
|
234
241
|
#
|
235
242
|
# @example
|
236
|
-
# Polars::Series.new([
|
237
|
-
#
|
238
|
-
# # shape: (1,)
|
239
|
-
# # Series: '' [str]
|
240
|
-
# # [
|
241
|
-
# # "1-2"
|
242
|
-
# # ]
|
243
|
-
#
|
244
|
-
# @example
|
245
|
-
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
243
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
244
|
+
# s.str.len_chars
|
246
245
|
# # =>
|
247
|
-
# # shape: (
|
248
|
-
# # Series: '' [
|
246
|
+
# # shape: (4,)
|
247
|
+
# # Series: '' [u32]
|
249
248
|
# # [
|
249
|
+
# # 4
|
250
|
+
# # 3
|
251
|
+
# # 2
|
250
252
|
# # null
|
251
253
|
# # ]
|
252
|
-
def
|
254
|
+
def len_chars
|
253
255
|
super
|
254
256
|
end
|
255
|
-
alias_method :
|
257
|
+
alias_method :n_chars, :len_chars
|
256
258
|
|
257
259
|
# Check if strings in Series contain a substring that matches a regex.
|
258
260
|
#
|
@@ -291,6 +293,65 @@ module Polars
|
|
291
293
|
super
|
292
294
|
end
|
293
295
|
|
296
|
+
# Return the bytes offset of the first substring matching a pattern.
|
297
|
+
#
|
298
|
+
# If the pattern is not found, returns nil.
|
299
|
+
#
|
300
|
+
# @param pattern
|
301
|
+
# A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
302
|
+
# @param literal
|
303
|
+
# Treat `pattern` as a literal string, not as a regular expression.
|
304
|
+
# @param strict
|
305
|
+
# Raise an error if the underlying pattern is not a valid regex,
|
306
|
+
# otherwise mask out with a null value.
|
307
|
+
#
|
308
|
+
# @return [Series]
|
309
|
+
#
|
310
|
+
# @note
|
311
|
+
# To modify regular expression behaviour (such as case-sensitivity) with
|
312
|
+
# flags, use the inline `(?iLmsuxU)` syntax.
|
313
|
+
#
|
314
|
+
# @example Find the index of the first substring matching a regex pattern:
|
315
|
+
# s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
|
316
|
+
# s.str.find("a|e").rename("idx_rx")
|
317
|
+
# # =>
|
318
|
+
# # shape: (4,)
|
319
|
+
# # Series: 'idx_rx' [u32]
|
320
|
+
# # [
|
321
|
+
# # 2
|
322
|
+
# # 5
|
323
|
+
# # null
|
324
|
+
# # 5
|
325
|
+
# # ]
|
326
|
+
#
|
327
|
+
# @example Find the index of the first substring matching a literal pattern:
|
328
|
+
# s.str.find("e", literal: true).rename("idx_lit")
|
329
|
+
# # =>
|
330
|
+
# # shape: (4,)
|
331
|
+
# # Series: 'idx_lit' [u32]
|
332
|
+
# # [
|
333
|
+
# # null
|
334
|
+
# # 5
|
335
|
+
# # null
|
336
|
+
# # 7
|
337
|
+
# # ]
|
338
|
+
#
|
339
|
+
# @example Match against a pattern found in another column or (expression):
|
340
|
+
# p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
|
341
|
+
# s.str.find(p).rename("idx")
|
342
|
+
# # =>
|
343
|
+
# # shape: (4,)
|
344
|
+
# # Series: 'idx' [u32]
|
345
|
+
# # [
|
346
|
+
# # 2
|
347
|
+
# # 2
|
348
|
+
# # null
|
349
|
+
# # 5
|
350
|
+
# # ]
|
351
|
+
def find(pattern, literal: false, strict: true)
|
352
|
+
super
|
353
|
+
end
|
354
|
+
|
294
355
|
# Check if string values end with a substring.
|
295
356
|
#
|
296
357
|
# @param sub [String]
|
@@ -384,6 +445,34 @@ module Polars
|
|
384
445
|
super
|
385
446
|
end
|
386
447
|
|
448
|
+
# Parse string values as JSON.
|
449
|
+
#
|
450
|
+
# Throws an error if invalid JSON strings are encountered.
|
451
|
+
#
|
452
|
+
# @param dtype [Object]
|
453
|
+
# The dtype to cast the extracted value to. If None, the dtype will be
|
454
|
+
# inferred from the JSON value.
|
455
|
+
# @param infer_schema_length [Integer]
|
456
|
+
# The maximum number of rows to scan for schema inference.
|
457
|
+
# If set to `nil`, the full data may be scanned *(this is slow)*.
|
458
|
+
#
|
459
|
+
# @return [Series]
|
460
|
+
#
|
461
|
+
# @example
|
462
|
+
# s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
|
463
|
+
# s.str.json_decode
|
464
|
+
# # =>
|
465
|
+
# # shape: (3,)
|
466
|
+
# # Series: 'json' [struct[2]]
|
467
|
+
# # [
|
468
|
+
# # {1,true}
|
469
|
+
# # null
|
470
|
+
# # {2,false}
|
471
|
+
# # ]
|
472
|
+
def json_decode(dtype = nil, infer_schema_length: 100)
|
473
|
+
super
|
474
|
+
end
|
475
|
+
|
387
476
|
# Extract the first match of json string with provided JSONPath expression.
|
388
477
|
#
|
389
478
|
# Throw errors if encounter invalid json strings.
|
@@ -468,6 +557,39 @@ module Polars
|
|
468
557
|
super
|
469
558
|
end
|
470
559
|
|
560
|
+
# Extract all capture groups for the given regex pattern.
|
561
|
+
#
|
562
|
+
# @param pattern [String]
|
563
|
+
# A valid regular expression pattern containing at least one capture group,
|
564
|
+
# compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
565
|
+
#
|
566
|
+
# @return [Series]
|
567
|
+
#
|
568
|
+
# @note
|
569
|
+
# All group names are **strings**.
|
570
|
+
#
|
571
|
+
# @example
|
572
|
+
# s = Polars::Series.new(
|
573
|
+
# "url",
|
574
|
+
# [
|
575
|
+
# "http://vote.com/ballon_dor?candidate=messi&ref=python",
|
576
|
+
# "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
|
577
|
+
# "http://vote.com/ballon_dor?error=404&ref=rust"
|
578
|
+
# ]
|
579
|
+
# )
|
580
|
+
# s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
|
581
|
+
# # =>
|
582
|
+
# # shape: (3,)
|
583
|
+
# # Series: 'url' [struct[2]]
|
584
|
+
# # [
|
585
|
+
# # {"messi","python"}
|
586
|
+
# # {"weghorst","polars"}
|
587
|
+
# # {null,null}
|
588
|
+
# # ]
|
589
|
+
def extract_groups(pattern)
|
590
|
+
super
|
591
|
+
end
|
592
|
+
|
471
593
|
# Count all successive non-overlapping regex matches.
|
472
594
|
#
|
473
595
|
# @param pattern [String]
|
@@ -477,7 +599,7 @@ module Polars
|
|
477
599
|
#
|
478
600
|
# @example
|
479
601
|
# s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
|
480
|
-
# s.str.
|
602
|
+
# s.str.count_matches('\d')
|
481
603
|
# # =>
|
482
604
|
# # shape: (2,)
|
483
605
|
# # Series: 'foo' [u32]
|
@@ -485,9 +607,10 @@ module Polars
|
|
485
607
|
# # 5
|
486
608
|
# # 6
|
487
609
|
# # ]
|
488
|
-
def
|
610
|
+
def count_matches(pattern)
|
489
611
|
super
|
490
612
|
end
|
613
|
+
alias_method :count_match, :count_matches
|
491
614
|
|
492
615
|
# Split the string by a substring.
|
493
616
|
#
|
@@ -717,6 +840,108 @@ module Polars
|
|
717
840
|
end
|
718
841
|
alias_method :rstrip, :strip_chars_end
|
719
842
|
|
843
|
+
# Remove prefix.
|
844
|
+
#
|
845
|
+
# The prefix will be removed from the string exactly once, if found.
|
846
|
+
#
|
847
|
+
# @param prefix [String]
|
848
|
+
# The prefix to be removed.
|
849
|
+
#
|
850
|
+
# @return [Series]
|
851
|
+
#
|
852
|
+
# @example
|
853
|
+
# s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
|
854
|
+
# s.str.strip_prefix("foo")
|
855
|
+
# # =>
|
856
|
+
# # shape: (4,)
|
857
|
+
# # Series: '' [str]
|
858
|
+
# # [
|
859
|
+
# # "bar"
|
860
|
+
# # "foobar"
|
861
|
+
# # ""
|
862
|
+
# # "bar"
|
863
|
+
# # ]
|
864
|
+
def strip_prefix(prefix)
|
865
|
+
super
|
866
|
+
end
|
867
|
+
|
868
|
+
# Remove suffix.
|
869
|
+
#
|
870
|
+
# The suffix will be removed from the string exactly once, if found.
|
871
|
+
#
|
872
|
+
# @param suffix [String]
|
873
|
+
# The suffix to be removed.
|
874
|
+
#
|
875
|
+
# @return [Series]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
|
879
|
+
# s.str.strip_suffix("bar")
|
880
|
+
# # =>
|
881
|
+
# # shape: (4,)
|
882
|
+
# # Series: '' [str]
|
883
|
+
# # [
|
884
|
+
# # "foo"
|
885
|
+
# # "foobar"
|
886
|
+
# # "foo"
|
887
|
+
# # ""
|
888
|
+
# # ]
|
889
|
+
def strip_suffix(suffix)
|
890
|
+
super
|
891
|
+
end
|
892
|
+
|
893
|
+
# Pad the start of the string until it reaches the given length.
|
894
|
+
#
|
895
|
+
# @param length [Integer]
|
896
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
897
|
+
# greater than this value are returned as-is.
|
898
|
+
# @param fill_char [String]
|
899
|
+
# The character to pad the string with.
|
900
|
+
#
|
901
|
+
# @return [Series]
|
902
|
+
#
|
903
|
+
# @example
|
904
|
+
# s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
|
905
|
+
# s.str.pad_start(8, "*")
|
906
|
+
# # =>
|
907
|
+
# # shape: (4,)
|
908
|
+
# # Series: 'a' [str]
|
909
|
+
# # [
|
910
|
+
# # "*****cow"
|
911
|
+
# # "**monkey"
|
912
|
+
# # "hippopotamus"
|
913
|
+
# # null
|
914
|
+
# # ]
|
915
|
+
def pad_start(length, fill_char = " ")
|
916
|
+
super
|
917
|
+
end
|
918
|
+
|
919
|
+
# Pad the end of the string until it reaches the given length.
|
920
|
+
#
|
921
|
+
# @param length [Integer]
|
922
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
923
|
+
# greater than this value are returned as-is.
|
924
|
+
# @param fill_char [String]
|
925
|
+
# The character to pad the string with.
|
926
|
+
#
|
927
|
+
# @return [Series]
|
928
|
+
#
|
929
|
+
# @example
|
930
|
+
# s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
|
931
|
+
# s.str.pad_end(8, "*")
|
932
|
+
# # =>
|
933
|
+
# # shape: (4,)
|
934
|
+
# # Series: '' [str]
|
935
|
+
# # [
|
936
|
+
# # "cow*****"
|
937
|
+
# # "monkey**"
|
938
|
+
# # "hippopotamus"
|
939
|
+
# # null
|
940
|
+
# # ]
|
941
|
+
def pad_end(length, fill_char = " ")
|
942
|
+
super
|
943
|
+
end
|
944
|
+
|
720
945
|
# Fills the string with zeroes.
|
721
946
|
#
|
722
947
|
# Return a copy of the string left filled with ASCII '0' digits to make a string
|
@@ -839,6 +1064,25 @@ module Polars
|
|
839
1064
|
super
|
840
1065
|
end
|
841
1066
|
|
1067
|
+
# Returns string values in reversed order.
|
1068
|
+
#
|
1069
|
+
# @return [Series]
|
1070
|
+
#
|
1071
|
+
# @example
|
1072
|
+
# s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
|
1073
|
+
# s.str.reverse
|
1074
|
+
# # =>
|
1075
|
+
# # shape: (3,)
|
1076
|
+
# # Series: 'text' [str]
|
1077
|
+
# # [
|
1078
|
+
# # "oof"
|
1079
|
+
# # "rab"
|
1080
|
+
# # "anañam"
|
1081
|
+
# # ]
|
1082
|
+
def reverse
|
1083
|
+
super
|
1084
|
+
end
|
1085
|
+
|
842
1086
|
# Create subslices of the string values of a Utf8 Series.
|
843
1087
|
#
|
844
1088
|
# @param offset [Integer]
|
@@ -877,5 +1121,445 @@ module Polars
|
|
877
1121
|
s = Utils.wrap_s(_s)
|
878
1122
|
s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
|
879
1123
|
end
|
1124
|
+
|
1125
|
+
# Return the first n characters of each string in a String Series.
|
1126
|
+
#
|
1127
|
+
# @param n [Object]
|
1128
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1129
|
+
# see note (2) below.
|
1130
|
+
#
|
1131
|
+
# @return [Series]
|
1132
|
+
#
|
1133
|
+
# @example Return up to the first 5 characters.
|
1134
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1135
|
+
# s.str.head(5)
|
1136
|
+
# # =>
|
1137
|
+
# # shape: (4,)
|
1138
|
+
# # Series: '' [str]
|
1139
|
+
# # [
|
1140
|
+
# # "pear"
|
1141
|
+
# # null
|
1142
|
+
# # "papay"
|
1143
|
+
# # "drago"
|
1144
|
+
# # ]
|
1145
|
+
#
|
1146
|
+
# @example Return up to the 3rd character from the end.
|
1147
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1148
|
+
# s.str.head(-3)
|
1149
|
+
# # =>
|
1150
|
+
# # shape: (4,)
|
1151
|
+
# # Series: '' [str]
|
1152
|
+
# # [
|
1153
|
+
# # "p"
|
1154
|
+
# # null
|
1155
|
+
# # "pap"
|
1156
|
+
# # "dragonfr"
|
1157
|
+
# # ]
|
1158
|
+
def head(n)
|
1159
|
+
super
|
1160
|
+
end
|
1161
|
+
|
1162
|
+
# Return the last n characters of each string in a String Series.
|
1163
|
+
#
|
1164
|
+
# @param n [Object]
|
1165
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1166
|
+
# see note (2) below.
|
1167
|
+
#
|
1168
|
+
# @return [Series]
|
1169
|
+
#
|
1170
|
+
# @example Return up to the last 5 characters:
|
1171
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1172
|
+
# s.str.tail(5)
|
1173
|
+
# # =>
|
1174
|
+
# # shape: (4,)
|
1175
|
+
# # Series: '' [str]
|
1176
|
+
# # [
|
1177
|
+
# # "pear"
|
1178
|
+
# # null
|
1179
|
+
# # "apaya"
|
1180
|
+
# # "fruit"
|
1181
|
+
# # ]
|
1182
|
+
#
|
1183
|
+
# @example Return from the 3rd character to the end:
|
1184
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1185
|
+
# s.str.tail(-3)
|
1186
|
+
# # =>
|
1187
|
+
# # shape: (4,)
|
1188
|
+
# # Series: '' [str]
|
1189
|
+
# # [
|
1190
|
+
# # "r"
|
1191
|
+
# # null
|
1192
|
+
# # "aya"
|
1193
|
+
# # "gonfruit"
|
1194
|
+
# # ]
|
1195
|
+
def tail(n)
|
1196
|
+
super
|
1197
|
+
end
|
1198
|
+
|
1199
|
+
# Convert an String column into a column of dtype with base radix.
|
1200
|
+
#
|
1201
|
+
# @param base [Integer]
|
1202
|
+
# Positive integer or expression which is the base of the string
|
1203
|
+
# we are parsing.
|
1204
|
+
# Default: 10.
|
1205
|
+
# @param dtype [Object]
|
1206
|
+
# Polars integer type to cast to.
|
1207
|
+
# Default: `Int64`.
|
1208
|
+
# @param strict [Object]
|
1209
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
1210
|
+
# false silently convert to Null.
|
1211
|
+
#
|
1212
|
+
# @return [Series]
|
1213
|
+
#
|
1214
|
+
# @example
|
1215
|
+
# s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
|
1216
|
+
# s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
|
1217
|
+
# # =>
|
1218
|
+
# # shape: (4,)
|
1219
|
+
# # Series: 'bin' [i32]
|
1220
|
+
# # [
|
1221
|
+
# # 6
|
1222
|
+
# # 5
|
1223
|
+
# # 2
|
1224
|
+
# # null
|
1225
|
+
# # ]
|
1226
|
+
#
|
1227
|
+
# @example
|
1228
|
+
# s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
|
1229
|
+
# s.str.to_integer(base: 16)
|
1230
|
+
# # =>
|
1231
|
+
# # shape: (4,)
|
1232
|
+
# # Series: 'hex' [i64]
|
1233
|
+
# # [
|
1234
|
+
# # 64030
|
1235
|
+
# # 65280
|
1236
|
+
# # 51966
|
1237
|
+
# # null
|
1238
|
+
# # ]
|
1239
|
+
def to_integer(
|
1240
|
+
base: 10,
|
1241
|
+
dtype: Int64,
|
1242
|
+
strict: true
|
1243
|
+
)
|
1244
|
+
super
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
# Use the Aho-Corasick algorithm to find matches.
|
1248
|
+
#
|
1249
|
+
# Determines if any of the patterns are contained in the string.
|
1250
|
+
#
|
1251
|
+
# @param patterns [Object]
|
1252
|
+
# String patterns to search.
|
1253
|
+
# @param ascii_case_insensitive [Boolean]
|
1254
|
+
# Enable ASCII-aware case-insensitive matching.
|
1255
|
+
# When this option is enabled, searching will be performed without respect
|
1256
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1257
|
+
#
|
1258
|
+
# @return [Series]
|
1259
|
+
#
|
1260
|
+
# @note
|
1261
|
+
# This method supports matching on string literals only, and does not support
|
1262
|
+
# regular expression matching.
|
1263
|
+
#
|
1264
|
+
# @example
|
1265
|
+
# s = Polars::Series.new(
|
1266
|
+
# "lyrics",
|
1267
|
+
# [
|
1268
|
+
# "Everybody wants to rule the world",
|
1269
|
+
# "Tell me what you want, what you really really want",
|
1270
|
+
# "Can you feel the love tonight"
|
1271
|
+
# ]
|
1272
|
+
# )
|
1273
|
+
# s.str.contains_any(["you", "me"])
|
1274
|
+
# # =>
|
1275
|
+
# # shape: (3,)
|
1276
|
+
# # Series: 'lyrics' [bool]
|
1277
|
+
# # [
|
1278
|
+
# # false
|
1279
|
+
# # true
|
1280
|
+
# # true
|
1281
|
+
# # ]
|
1282
|
+
def contains_any(
|
1283
|
+
patterns,
|
1284
|
+
ascii_case_insensitive: false
|
1285
|
+
)
|
1286
|
+
super
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
# Use the Aho-Corasick algorithm to replace many matches.
|
1290
|
+
#
|
1291
|
+
# @param patterns
|
1292
|
+
# String patterns to search and replace.
|
1293
|
+
# Also accepts a mapping of patterns to their replacement as syntactic sugar
|
1294
|
+
# for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
|
1295
|
+
# @param replace_with
|
1296
|
+
# Strings to replace where a pattern was a match.
|
1297
|
+
# Length must match the length of `patterns` or have length 1. This can be
|
1298
|
+
# broadcasted, so it supports many:one and many:many.
|
1299
|
+
# @param ascii_case_insensitive
|
1300
|
+
# Enable ASCII-aware case-insensitive matching.
|
1301
|
+
# When this option is enabled, searching will be performed without respect
|
1302
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1303
|
+
#
|
1304
|
+
# @return [Series]
|
1305
|
+
#
|
1306
|
+
# @note
|
1307
|
+
# This method supports matching on string literals only, and does not support
|
1308
|
+
# regular expression matching.
|
1309
|
+
#
|
1310
|
+
# @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
|
1311
|
+
# s = Polars::Series.new(
|
1312
|
+
# "lyrics",
|
1313
|
+
# [
|
1314
|
+
# "Everybody wants to rule the world",
|
1315
|
+
# "Tell me what you want, what you really really want",
|
1316
|
+
# "Can you feel the love tonight"
|
1317
|
+
# ]
|
1318
|
+
# )
|
1319
|
+
# s.str.replace_many(["you", "me"], ["me", "you"])
|
1320
|
+
# # =>
|
1321
|
+
# # shape: (3,)
|
1322
|
+
# # Series: 'lyrics' [str]
|
1323
|
+
# # [
|
1324
|
+
# # "Everybody wants to rule the wo…
|
1325
|
+
# # "Tell you what me want, what me…
|
1326
|
+
# # "Can me feel the love tonight"
|
1327
|
+
# # ]
|
1328
|
+
#
|
1329
|
+
# @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
|
1330
|
+
# s = Polars::Series.new(
|
1331
|
+
# "lyrics",
|
1332
|
+
# [
|
1333
|
+
# "Everybody wants to rule the world",
|
1334
|
+
# "Tell me what you want, what you really really want",
|
1335
|
+
# "Can you feel the love tonight",
|
1336
|
+
# ]
|
1337
|
+
# )
|
1338
|
+
# s.str.replace_many(["me", "you", "they"], [""])
|
1339
|
+
# # =>
|
1340
|
+
# # shape: (3,)
|
1341
|
+
# # Series: 'lyrics' [str]
|
1342
|
+
# # [
|
1343
|
+
# # "Everybody wants to rule the wo…
|
1344
|
+
# # "Tell what want, what really…
|
1345
|
+
# # "Can feel the love tonight"
|
1346
|
+
# # ]
|
1347
|
+
#
|
1348
|
+
# @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
|
1349
|
+
# s = Polars::Series.new(
|
1350
|
+
# "lyrics",
|
1351
|
+
# [
|
1352
|
+
# "Everybody wants to rule the world",
|
1353
|
+
# "Tell me what you want, what you really really want",
|
1354
|
+
# "Can you feel the love tonight"
|
1355
|
+
# ]
|
1356
|
+
# )
|
1357
|
+
# mapping = {"me" => "you", "you" => "me", "want" => "need"}
|
1358
|
+
# s.str.replace_many(mapping)
|
1359
|
+
# # =>
|
1360
|
+
# # shape: (3,)
|
1361
|
+
# # Series: 'lyrics' [str]
|
1362
|
+
# # [
|
1363
|
+
# # "Everybody needs to rule the wo…
|
1364
|
+
# # "Tell you what me need, what me…
|
1365
|
+
# # "Can me feel the love tonight"
|
1366
|
+
# # ]
|
1367
|
+
def replace_many(
|
1368
|
+
patterns,
|
1369
|
+
replace_with = Expr::NO_DEFAULT,
|
1370
|
+
ascii_case_insensitive: false
|
1371
|
+
)
|
1372
|
+
super
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
# Use the Aho-Corasick algorithm to extract many matches.
|
1376
|
+
#
|
1377
|
+
# @param patterns [Object]
|
1378
|
+
# String patterns to search.
|
1379
|
+
# @param ascii_case_insensitive [Boolean]
|
1380
|
+
# Enable ASCII-aware case-insensitive matching.
|
1381
|
+
# When this option is enabled, searching will be performed without respect
|
1382
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1383
|
+
# @param overlapping [Boolean]
|
1384
|
+
# Whether matches may overlap.
|
1385
|
+
#
|
1386
|
+
# @return [Series]
|
1387
|
+
#
|
1388
|
+
# @note
|
1389
|
+
# This method supports matching on string literals only, and does not support
|
1390
|
+
# regular expression matching.
|
1391
|
+
#
|
1392
|
+
# @example
|
1393
|
+
# s = Polars::Series.new("values", ["discontent"])
|
1394
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1395
|
+
# s.str.extract_many(patterns, overlapping: true)
|
1396
|
+
# # =>
|
1397
|
+
# # shape: (1,)
|
1398
|
+
# # Series: 'values' [list[str]]
|
1399
|
+
# # [
|
1400
|
+
# # ["disco", "onte", "discontent"]
|
1401
|
+
# # ]
|
1402
|
+
def extract_many(
|
1403
|
+
patterns,
|
1404
|
+
ascii_case_insensitive: false,
|
1405
|
+
overlapping: false
|
1406
|
+
)
|
1407
|
+
super
|
1408
|
+
end
|
1409
|
+
|
1410
|
+
# Use the Aho-Corasick algorithm to find all matches.
|
1411
|
+
#
|
1412
|
+
# The function returns the byte offset of the start of each match.
|
1413
|
+
# The return type will be `List<UInt32>`
|
1414
|
+
#
|
1415
|
+
# @param patterns [Object]
|
1416
|
+
# String patterns to search.
|
1417
|
+
# @param ascii_case_insensitive [Boolean]
|
1418
|
+
# Enable ASCII-aware case-insensitive matching.
|
1419
|
+
# When this option is enabled, searching will be performed without respect
|
1420
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1421
|
+
# @param overlapping [Boolean]
|
1422
|
+
# Whether matches may overlap.
|
1423
|
+
#
|
1424
|
+
# @return [Series]
|
1425
|
+
#
|
1426
|
+
# @note
|
1427
|
+
# This method supports matching on string literals only, and does not support
|
1428
|
+
# regular expression matching.
|
1429
|
+
#
|
1430
|
+
# @example
|
1431
|
+
# df = Polars::DataFrame.new({"values" => ["discontent"]})
|
1432
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1433
|
+
# df.with_columns(
|
1434
|
+
# Polars.col("values")
|
1435
|
+
# .str.extract_many(patterns, overlapping: false)
|
1436
|
+
# .alias("matches"),
|
1437
|
+
# Polars.col("values")
|
1438
|
+
# .str.extract_many(patterns, overlapping: true)
|
1439
|
+
# .alias("matches_overlapping")
|
1440
|
+
# )
|
1441
|
+
# # =>
|
1442
|
+
# # shape: (1, 3)
|
1443
|
+
# # ┌────────────┬───────────┬─────────────────────────────────┐
|
1444
|
+
# # │ values ┆ matches ┆ matches_overlapping │
|
1445
|
+
# # │ --- ┆ --- ┆ --- │
|
1446
|
+
# # │ str ┆ list[str] ┆ list[str] │
|
1447
|
+
# # ╞════════════╪═══════════╪═════════════════════════════════╡
|
1448
|
+
# # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
|
1449
|
+
# # └────────────┴───────────┴─────────────────────────────────┘
|
1450
|
+
#
|
1451
|
+
# @example
|
1452
|
+
# df = Polars::DataFrame.new(
|
1453
|
+
# {
|
1454
|
+
# "values" => ["discontent", "rhapsody"],
|
1455
|
+
# "patterns" => [
|
1456
|
+
# ["winter", "disco", "onte", "discontent"],
|
1457
|
+
# ["rhap", "ody", "coalesce"]
|
1458
|
+
# ]
|
1459
|
+
# }
|
1460
|
+
# )
|
1461
|
+
# df.select(Polars.col("values").str.find_many("patterns"))
|
1462
|
+
# # =>
|
1463
|
+
# # shape: (2, 1)
|
1464
|
+
# # ┌───────────┐
|
1465
|
+
# # │ values │
|
1466
|
+
# # │ --- │
|
1467
|
+
# # │ list[u32] │
|
1468
|
+
# # ╞═══════════╡
|
1469
|
+
# # │ [0] │
|
1470
|
+
# # │ [0, 5] │
|
1471
|
+
# # └───────────┘
|
1472
|
+
def find_many(
|
1473
|
+
patterns,
|
1474
|
+
ascii_case_insensitive: false,
|
1475
|
+
overlapping: false
|
1476
|
+
)
|
1477
|
+
super
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
# Vertically concat the values in the Series to a single string value.
|
1481
|
+
#
|
1482
|
+
# @param delimiter [String]
|
1483
|
+
# The delimiter to insert between consecutive string values.
|
1484
|
+
# @param ignore_nulls [Boolean]
|
1485
|
+
# Ignore null values (default).
|
1486
|
+
# If set to `False`, null values will be propagated. This means that
|
1487
|
+
# if the column contains any null values, the output is null.
|
1488
|
+
#
|
1489
|
+
# @return [Series]
|
1490
|
+
#
|
1491
|
+
# @example
|
1492
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
1493
|
+
# # =>
|
1494
|
+
# # shape: (1,)
|
1495
|
+
# # Series: '' [str]
|
1496
|
+
# # [
|
1497
|
+
# # "1-2"
|
1498
|
+
# # ]
|
1499
|
+
#
|
1500
|
+
# @example
|
1501
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
1502
|
+
# # =>
|
1503
|
+
# # shape: (1,)
|
1504
|
+
# # Series: '' [str]
|
1505
|
+
# # [
|
1506
|
+
# # null
|
1507
|
+
# # ]
|
1508
|
+
def join(delimiter = "-", ignore_nulls: true)
|
1509
|
+
super
|
1510
|
+
end
|
1511
|
+
alias_method :concat, :join
|
1512
|
+
|
1513
|
+
# Returns string values with all regular expression meta characters escaped.
|
1514
|
+
#
|
1515
|
+
# @return [Series]
|
1516
|
+
#
|
1517
|
+
# @example
|
1518
|
+
# Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
|
1519
|
+
# # =>
|
1520
|
+
# # shape: (4,)
|
1521
|
+
# # Series: '' [str]
|
1522
|
+
# # [
|
1523
|
+
# # "abc"
|
1524
|
+
# # "def"
|
1525
|
+
# # null
|
1526
|
+
# # "abc\(\\w\+\)"
|
1527
|
+
# # ]
|
1528
|
+
def escape_regex
|
1529
|
+
super
|
1530
|
+
end
|
1531
|
+
|
1532
|
+
# Returns the Unicode normal form of the string values.
|
1533
|
+
#
|
1534
|
+
# This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
|
1535
|
+
#
|
1536
|
+
# @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
|
1537
|
+
# Unicode form to use.
|
1538
|
+
#
|
1539
|
+
# @return [Series]
|
1540
|
+
#
|
1541
|
+
# @example
|
1542
|
+
# s = Polars::Series.new(["01²", "KADOKAWA"])
|
1543
|
+
# s.str.normalize("NFC")
|
1544
|
+
# # =>
|
1545
|
+
# # shape: (2,)
|
1546
|
+
# # Series: '' [str]
|
1547
|
+
# # [
|
1548
|
+
# # "01²"
|
1549
|
+
# # "KADOKAWA"
|
1550
|
+
# # ]
|
1551
|
+
#
|
1552
|
+
# @example
|
1553
|
+
# s.str.normalize("NFKC")
|
1554
|
+
# # =>
|
1555
|
+
# # shape: (2,)
|
1556
|
+
# # Series: '' [str]
|
1557
|
+
# # [
|
1558
|
+
# # "012"
|
1559
|
+
# # "KADOKAWA"
|
1560
|
+
# # ]
|
1561
|
+
def normalize(form = "NFC")
|
1562
|
+
super
|
1563
|
+
end
|
880
1564
|
end
|
881
1565
|
end
|