polars-df 0.21.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion/mod.rs +92 -4
- data/ext/polars/src/exceptions.rs +1 -0
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +84 -5
- data/ext/polars/src/expr/list.rs +24 -0
- data/ext/polars/src/expr/meta.rs +11 -0
- data/ext/polars/src/expr/mod.rs +1 -0
- data/ext/polars/src/expr/name.rs +8 -0
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/string.rs +59 -0
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/functions/io.rs +19 -0
- data/ext/polars/src/functions/lazy.rs +4 -0
- data/ext/polars/src/lazyframe/general.rs +51 -0
- data/ext/polars/src/lib.rs +119 -10
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/series.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/general.rs +64 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +684 -19
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +14 -2
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1213 -180
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +13 -0
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +27 -5
- data/lib/polars/lazy_frame.rb +936 -20
- data/lib/polars/list_expr.rb +196 -4
- data/lib/polars/list_name_space.rb +201 -4
- data/lib/polars/meta_expr.rb +64 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1051 -54
- data/lib/polars/string_expr.rb +411 -6
- data/lib/polars/string_name_space.rb +722 -49
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -0
- metadata +4 -1
@@ -184,86 +184,77 @@ module Polars
|
|
184
184
|
super
|
185
185
|
end
|
186
186
|
|
187
|
-
#
|
187
|
+
# Convert a String column into a Decimal column.
|
188
188
|
#
|
189
|
-
#
|
189
|
+
# This method infers the needed parameters `precision` and `scale`.
|
190
190
|
#
|
191
|
-
# @
|
192
|
-
#
|
193
|
-
#
|
191
|
+
# @param inference_length [Integer]
|
192
|
+
# Number of elements to parse to determine the `precision` and `scale`
|
193
|
+
#
|
194
|
+
# @return [Series]
|
194
195
|
#
|
195
196
|
# @example
|
196
|
-
# s = Polars::Series.new(
|
197
|
-
#
|
197
|
+
# s = Polars::Series.new(
|
198
|
+
# ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
|
199
|
+
# )
|
200
|
+
# s.str.to_decimal
|
198
201
|
# # =>
|
199
|
-
# # shape: (
|
200
|
-
# # Series: '' [
|
202
|
+
# # shape: (7,)
|
203
|
+
# # Series: '' [decimal[*,2]]
|
201
204
|
# # [
|
202
|
-
# #
|
203
|
-
# #
|
204
|
-
# #
|
205
|
-
# #
|
205
|
+
# # 40.12
|
206
|
+
# # 3420.13
|
207
|
+
# # 120134.19
|
208
|
+
# # 3212.98
|
209
|
+
# # 12.90
|
210
|
+
# # 143.09
|
211
|
+
# # 143.90
|
206
212
|
# # ]
|
207
|
-
def
|
213
|
+
def to_decimal(inference_length = 100)
|
208
214
|
super
|
209
215
|
end
|
210
216
|
|
211
|
-
#
|
217
|
+
# Return the length of each string as the number of bytes.
|
212
218
|
#
|
213
219
|
# @return [Series]
|
214
220
|
#
|
215
|
-
# @note
|
216
|
-
# If you know that you are working with ASCII text, `lengths` will be
|
217
|
-
# equivalent, and faster (returns length in terms of the number of bytes).
|
218
|
-
#
|
219
221
|
# @example
|
220
|
-
# s = Polars::Series.new(["Café",
|
221
|
-
# s.str.
|
222
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
223
|
+
# s.str.len_bytes
|
222
224
|
# # =>
|
223
225
|
# # shape: (4,)
|
224
226
|
# # Series: '' [u32]
|
225
227
|
# # [
|
226
|
-
# #
|
227
|
-
# # null
|
228
|
+
# # 5
|
228
229
|
# # 3
|
229
|
-
# #
|
230
|
+
# # 6
|
231
|
+
# # null
|
230
232
|
# # ]
|
231
|
-
def
|
233
|
+
def len_bytes
|
232
234
|
super
|
233
235
|
end
|
236
|
+
alias_method :lengths, :len_bytes
|
234
237
|
|
235
|
-
#
|
236
|
-
#
|
237
|
-
# @param delimiter [String]
|
238
|
-
# The delimiter to insert between consecutive string values.
|
239
|
-
# @param ignore_nulls [Boolean]
|
240
|
-
# Ignore null values (default).
|
241
|
-
# If set to `False`, null values will be propagated. This means that
|
242
|
-
# if the column contains any null values, the output is null.
|
238
|
+
# Return the length of each string as the number of characters.
|
243
239
|
#
|
244
240
|
# @return [Series]
|
245
241
|
#
|
246
242
|
# @example
|
247
|
-
# Polars::Series.new([
|
243
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil])
|
244
|
+
# s.str.len_chars
|
248
245
|
# # =>
|
249
|
-
# # shape: (
|
250
|
-
# # Series: '' [
|
251
|
-
# # [
|
252
|
-
# # "1-2"
|
253
|
-
# # ]
|
254
|
-
#
|
255
|
-
# @example
|
256
|
-
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
257
|
-
# # =>
|
258
|
-
# # shape: (1,)
|
259
|
-
# # Series: '' [str]
|
246
|
+
# # shape: (4,)
|
247
|
+
# # Series: '' [u32]
|
260
248
|
# # [
|
249
|
+
# # 4
|
250
|
+
# # 3
|
251
|
+
# # 2
|
261
252
|
# # null
|
262
253
|
# # ]
|
263
|
-
def
|
254
|
+
def len_chars
|
264
255
|
super
|
265
256
|
end
|
266
|
-
alias_method :
|
257
|
+
alias_method :n_chars, :len_chars
|
267
258
|
|
268
259
|
# Check if strings in Series contain a substring that matches a regex.
|
269
260
|
#
|
@@ -302,6 +293,65 @@ module Polars
|
|
302
293
|
super
|
303
294
|
end
|
304
295
|
|
296
|
+
# Return the bytes offset of the first substring matching a pattern.
|
297
|
+
#
|
298
|
+
# If the pattern is not found, returns nil.
|
299
|
+
#
|
300
|
+
# @param pattern
|
301
|
+
# A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
302
|
+
# @param literal
|
303
|
+
# Treat `pattern` as a literal string, not as a regular expression.
|
304
|
+
# @param strict
|
305
|
+
# Raise an error if the underlying pattern is not a valid regex,
|
306
|
+
# otherwise mask out with a null value.
|
307
|
+
#
|
308
|
+
# @return [Series]
|
309
|
+
#
|
310
|
+
# @note
|
311
|
+
# To modify regular expression behaviour (such as case-sensitivity) with
|
312
|
+
# flags, use the inline `(?iLmsuxU)` syntax.
|
313
|
+
#
|
314
|
+
# @example Find the index of the first substring matching a regex pattern:
|
315
|
+
# s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
|
316
|
+
# s.str.find("a|e").rename("idx_rx")
|
317
|
+
# # =>
|
318
|
+
# # shape: (4,)
|
319
|
+
# # Series: 'idx_rx' [u32]
|
320
|
+
# # [
|
321
|
+
# # 2
|
322
|
+
# # 5
|
323
|
+
# # null
|
324
|
+
# # 5
|
325
|
+
# # ]
|
326
|
+
#
|
327
|
+
# @example Find the index of the first substring matching a literal pattern:
|
328
|
+
# s.str.find("e", literal: true).rename("idx_lit")
|
329
|
+
# # =>
|
330
|
+
# # shape: (4,)
|
331
|
+
# # Series: 'idx_lit' [u32]
|
332
|
+
# # [
|
333
|
+
# # null
|
334
|
+
# # 5
|
335
|
+
# # null
|
336
|
+
# # 7
|
337
|
+
# # ]
|
338
|
+
#
|
339
|
+
# @example Match against a pattern found in another column or (expression):
|
340
|
+
# p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
|
341
|
+
# s.str.find(p).rename("idx")
|
342
|
+
# # =>
|
343
|
+
# # shape: (4,)
|
344
|
+
# # Series: 'idx' [u32]
|
345
|
+
# # [
|
346
|
+
# # 2
|
347
|
+
# # 2
|
348
|
+
# # null
|
349
|
+
# # 5
|
350
|
+
# # ]
|
351
|
+
def find(pattern, literal: false, strict: true)
|
352
|
+
super
|
353
|
+
end
|
354
|
+
|
305
355
|
# Check if string values end with a substring.
|
306
356
|
#
|
307
357
|
# @param sub [String]
|
@@ -395,6 +445,34 @@ module Polars
|
|
395
445
|
super
|
396
446
|
end
|
397
447
|
|
448
|
+
# Parse string values as JSON.
|
449
|
+
#
|
450
|
+
# Throws an error if invalid JSON strings are encountered.
|
451
|
+
#
|
452
|
+
# @param dtype [Object]
|
453
|
+
# The dtype to cast the extracted value to. If None, the dtype will be
|
454
|
+
# inferred from the JSON value.
|
455
|
+
# @param infer_schema_length [Integer]
|
456
|
+
# The maximum number of rows to scan for schema inference.
|
457
|
+
# If set to `nil`, the full data may be scanned *(this is slow)*.
|
458
|
+
#
|
459
|
+
# @return [Series]
|
460
|
+
#
|
461
|
+
# @example
|
462
|
+
# s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
|
463
|
+
# s.str.json_decode
|
464
|
+
# # =>
|
465
|
+
# # shape: (3,)
|
466
|
+
# # Series: 'json' [struct[2]]
|
467
|
+
# # [
|
468
|
+
# # {1,true}
|
469
|
+
# # null
|
470
|
+
# # {2,false}
|
471
|
+
# # ]
|
472
|
+
def json_decode(dtype = nil, infer_schema_length: 100)
|
473
|
+
super
|
474
|
+
end
|
475
|
+
|
398
476
|
# Extract the first match of json string with provided JSONPath expression.
|
399
477
|
#
|
400
478
|
# Throw errors if encounter invalid json strings.
|
@@ -479,6 +557,39 @@ module Polars
|
|
479
557
|
super
|
480
558
|
end
|
481
559
|
|
560
|
+
# Extract all capture groups for the given regex pattern.
|
561
|
+
#
|
562
|
+
# @param pattern [String]
|
563
|
+
# A valid regular expression pattern containing at least one capture group,
|
564
|
+
# compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
|
565
|
+
#
|
566
|
+
# @return [Series]
|
567
|
+
#
|
568
|
+
# @note
|
569
|
+
# All group names are **strings**.
|
570
|
+
#
|
571
|
+
# @example
|
572
|
+
# s = Polars::Series.new(
|
573
|
+
# "url",
|
574
|
+
# [
|
575
|
+
# "http://vote.com/ballon_dor?candidate=messi&ref=python",
|
576
|
+
# "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
|
577
|
+
# "http://vote.com/ballon_dor?error=404&ref=rust"
|
578
|
+
# ]
|
579
|
+
# )
|
580
|
+
# s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
|
581
|
+
# # =>
|
582
|
+
# # shape: (3,)
|
583
|
+
# # Series: 'url' [struct[2]]
|
584
|
+
# # [
|
585
|
+
# # {"messi","python"}
|
586
|
+
# # {"weghorst","polars"}
|
587
|
+
# # {null,null}
|
588
|
+
# # ]
|
589
|
+
def extract_groups(pattern)
|
590
|
+
super
|
591
|
+
end
|
592
|
+
|
482
593
|
# Count all successive non-overlapping regex matches.
|
483
594
|
#
|
484
595
|
# @param pattern [String]
|
@@ -488,7 +599,7 @@ module Polars
|
|
488
599
|
#
|
489
600
|
# @example
|
490
601
|
# s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
|
491
|
-
# s.str.
|
602
|
+
# s.str.count_matches('\d')
|
492
603
|
# # =>
|
493
604
|
# # shape: (2,)
|
494
605
|
# # Series: 'foo' [u32]
|
@@ -496,9 +607,10 @@ module Polars
|
|
496
607
|
# # 5
|
497
608
|
# # 6
|
498
609
|
# # ]
|
499
|
-
def
|
610
|
+
def count_matches(pattern)
|
500
611
|
super
|
501
612
|
end
|
613
|
+
alias_method :count_match, :count_matches
|
502
614
|
|
503
615
|
# Split the string by a substring.
|
504
616
|
#
|
@@ -728,6 +840,108 @@ module Polars
|
|
728
840
|
end
|
729
841
|
alias_method :rstrip, :strip_chars_end
|
730
842
|
|
843
|
+
# Remove prefix.
|
844
|
+
#
|
845
|
+
# The prefix will be removed from the string exactly once, if found.
|
846
|
+
#
|
847
|
+
# @param prefix [String]
|
848
|
+
# The prefix to be removed.
|
849
|
+
#
|
850
|
+
# @return [Series]
|
851
|
+
#
|
852
|
+
# @example
|
853
|
+
# s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
|
854
|
+
# s.str.strip_prefix("foo")
|
855
|
+
# # =>
|
856
|
+
# # shape: (4,)
|
857
|
+
# # Series: '' [str]
|
858
|
+
# # [
|
859
|
+
# # "bar"
|
860
|
+
# # "foobar"
|
861
|
+
# # ""
|
862
|
+
# # "bar"
|
863
|
+
# # ]
|
864
|
+
def strip_prefix(prefix)
|
865
|
+
super
|
866
|
+
end
|
867
|
+
|
868
|
+
# Remove suffix.
|
869
|
+
#
|
870
|
+
# The suffix will be removed from the string exactly once, if found.
|
871
|
+
#
|
872
|
+
# @param suffix [String]
|
873
|
+
# The suffix to be removed.
|
874
|
+
#
|
875
|
+
# @return [Series]
|
876
|
+
#
|
877
|
+
# @example
|
878
|
+
# s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
|
879
|
+
# s.str.strip_suffix("bar")
|
880
|
+
# # =>
|
881
|
+
# # shape: (4,)
|
882
|
+
# # Series: '' [str]
|
883
|
+
# # [
|
884
|
+
# # "foo"
|
885
|
+
# # "foobar"
|
886
|
+
# # "foo"
|
887
|
+
# # ""
|
888
|
+
# # ]
|
889
|
+
def strip_suffix(suffix)
|
890
|
+
super
|
891
|
+
end
|
892
|
+
|
893
|
+
# Pad the start of the string until it reaches the given length.
|
894
|
+
#
|
895
|
+
# @param length [Integer]
|
896
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
897
|
+
# greater than this value are returned as-is.
|
898
|
+
# @param fill_char [String]
|
899
|
+
# The character to pad the string with.
|
900
|
+
#
|
901
|
+
# @return [Series]
|
902
|
+
#
|
903
|
+
# @example
|
904
|
+
# s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
|
905
|
+
# s.str.pad_start(8, "*")
|
906
|
+
# # =>
|
907
|
+
# # shape: (4,)
|
908
|
+
# # Series: 'a' [str]
|
909
|
+
# # [
|
910
|
+
# # "*****cow"
|
911
|
+
# # "**monkey"
|
912
|
+
# # "hippopotamus"
|
913
|
+
# # null
|
914
|
+
# # ]
|
915
|
+
def pad_start(length, fill_char = " ")
|
916
|
+
super
|
917
|
+
end
|
918
|
+
|
919
|
+
# Pad the end of the string until it reaches the given length.
|
920
|
+
#
|
921
|
+
# @param length [Integer]
|
922
|
+
# Pad the string until it reaches this length. Strings with length equal to or
|
923
|
+
# greater than this value are returned as-is.
|
924
|
+
# @param fill_char [String]
|
925
|
+
# The character to pad the string with.
|
926
|
+
#
|
927
|
+
# @return [Series]
|
928
|
+
#
|
929
|
+
# @example
|
930
|
+
# s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
|
931
|
+
# s.str.pad_end(8, "*")
|
932
|
+
# # =>
|
933
|
+
# # shape: (4,)
|
934
|
+
# # Series: '' [str]
|
935
|
+
# # [
|
936
|
+
# # "cow*****"
|
937
|
+
# # "monkey**"
|
938
|
+
# # "hippopotamus"
|
939
|
+
# # null
|
940
|
+
# # ]
|
941
|
+
def pad_end(length, fill_char = " ")
|
942
|
+
super
|
943
|
+
end
|
944
|
+
|
731
945
|
# Fills the string with zeroes.
|
732
946
|
#
|
733
947
|
# Return a copy of the string left filled with ASCII '0' digits to make a string
|
@@ -850,6 +1064,25 @@ module Polars
|
|
850
1064
|
super
|
851
1065
|
end
|
852
1066
|
|
1067
|
+
# Returns string values in reversed order.
|
1068
|
+
#
|
1069
|
+
# @return [Series]
|
1070
|
+
#
|
1071
|
+
# @example
|
1072
|
+
# s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
|
1073
|
+
# s.str.reverse
|
1074
|
+
# # =>
|
1075
|
+
# # shape: (3,)
|
1076
|
+
# # Series: 'text' [str]
|
1077
|
+
# # [
|
1078
|
+
# # "oof"
|
1079
|
+
# # "rab"
|
1080
|
+
# # "anañam"
|
1081
|
+
# # ]
|
1082
|
+
def reverse
|
1083
|
+
super
|
1084
|
+
end
|
1085
|
+
|
853
1086
|
# Create subslices of the string values of a Utf8 Series.
|
854
1087
|
#
|
855
1088
|
# @param offset [Integer]
|
@@ -888,5 +1121,445 @@ module Polars
|
|
888
1121
|
s = Utils.wrap_s(_s)
|
889
1122
|
s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
|
890
1123
|
end
|
1124
|
+
|
1125
|
+
# Return the first n characters of each string in a String Series.
|
1126
|
+
#
|
1127
|
+
# @param n [Object]
|
1128
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1129
|
+
# see note (2) below.
|
1130
|
+
#
|
1131
|
+
# @return [Series]
|
1132
|
+
#
|
1133
|
+
# @example Return up to the first 5 characters.
|
1134
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1135
|
+
# s.str.head(5)
|
1136
|
+
# # =>
|
1137
|
+
# # shape: (4,)
|
1138
|
+
# # Series: '' [str]
|
1139
|
+
# # [
|
1140
|
+
# # "pear"
|
1141
|
+
# # null
|
1142
|
+
# # "papay"
|
1143
|
+
# # "drago"
|
1144
|
+
# # ]
|
1145
|
+
#
|
1146
|
+
# @example Return up to the 3rd character from the end.
|
1147
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1148
|
+
# s.str.head(-3)
|
1149
|
+
# # =>
|
1150
|
+
# # shape: (4,)
|
1151
|
+
# # Series: '' [str]
|
1152
|
+
# # [
|
1153
|
+
# # "p"
|
1154
|
+
# # null
|
1155
|
+
# # "pap"
|
1156
|
+
# # "dragonfr"
|
1157
|
+
# # ]
|
1158
|
+
def head(n)
|
1159
|
+
super
|
1160
|
+
end
|
1161
|
+
|
1162
|
+
# Return the last n characters of each string in a String Series.
|
1163
|
+
#
|
1164
|
+
# @param n [Object]
|
1165
|
+
# Length of the slice (integer or expression). Negative indexing is supported;
|
1166
|
+
# see note (2) below.
|
1167
|
+
#
|
1168
|
+
# @return [Series]
|
1169
|
+
#
|
1170
|
+
# @example Return up to the last 5 characters:
|
1171
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1172
|
+
# s.str.tail(5)
|
1173
|
+
# # =>
|
1174
|
+
# # shape: (4,)
|
1175
|
+
# # Series: '' [str]
|
1176
|
+
# # [
|
1177
|
+
# # "pear"
|
1178
|
+
# # null
|
1179
|
+
# # "apaya"
|
1180
|
+
# # "fruit"
|
1181
|
+
# # ]
|
1182
|
+
#
|
1183
|
+
# @example Return from the 3rd character to the end:
|
1184
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
|
1185
|
+
# s.str.tail(-3)
|
1186
|
+
# # =>
|
1187
|
+
# # shape: (4,)
|
1188
|
+
# # Series: '' [str]
|
1189
|
+
# # [
|
1190
|
+
# # "r"
|
1191
|
+
# # null
|
1192
|
+
# # "aya"
|
1193
|
+
# # "gonfruit"
|
1194
|
+
# # ]
|
1195
|
+
def tail(n)
|
1196
|
+
super
|
1197
|
+
end
|
1198
|
+
|
1199
|
+
# Convert an String column into a column of dtype with base radix.
|
1200
|
+
#
|
1201
|
+
# @param base [Integer]
|
1202
|
+
# Positive integer or expression which is the base of the string
|
1203
|
+
# we are parsing.
|
1204
|
+
# Default: 10.
|
1205
|
+
# @param dtype [Object]
|
1206
|
+
# Polars integer type to cast to.
|
1207
|
+
# Default: `Int64`.
|
1208
|
+
# @param strict [Object]
|
1209
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
1210
|
+
# false silently convert to Null.
|
1211
|
+
#
|
1212
|
+
# @return [Series]
|
1213
|
+
#
|
1214
|
+
# @example
|
1215
|
+
# s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
|
1216
|
+
# s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
|
1217
|
+
# # =>
|
1218
|
+
# # shape: (4,)
|
1219
|
+
# # Series: 'bin' [i32]
|
1220
|
+
# # [
|
1221
|
+
# # 6
|
1222
|
+
# # 5
|
1223
|
+
# # 2
|
1224
|
+
# # null
|
1225
|
+
# # ]
|
1226
|
+
#
|
1227
|
+
# @example
|
1228
|
+
# s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
|
1229
|
+
# s.str.to_integer(base: 16)
|
1230
|
+
# # =>
|
1231
|
+
# # shape: (4,)
|
1232
|
+
# # Series: 'hex' [i64]
|
1233
|
+
# # [
|
1234
|
+
# # 64030
|
1235
|
+
# # 65280
|
1236
|
+
# # 51966
|
1237
|
+
# # null
|
1238
|
+
# # ]
|
1239
|
+
def to_integer(
|
1240
|
+
base: 10,
|
1241
|
+
dtype: Int64,
|
1242
|
+
strict: true
|
1243
|
+
)
|
1244
|
+
super
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
# Use the Aho-Corasick algorithm to find matches.
|
1248
|
+
#
|
1249
|
+
# Determines if any of the patterns are contained in the string.
|
1250
|
+
#
|
1251
|
+
# @param patterns [Object]
|
1252
|
+
# String patterns to search.
|
1253
|
+
# @param ascii_case_insensitive [Boolean]
|
1254
|
+
# Enable ASCII-aware case-insensitive matching.
|
1255
|
+
# When this option is enabled, searching will be performed without respect
|
1256
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1257
|
+
#
|
1258
|
+
# @return [Series]
|
1259
|
+
#
|
1260
|
+
# @note
|
1261
|
+
# This method supports matching on string literals only, and does not support
|
1262
|
+
# regular expression matching.
|
1263
|
+
#
|
1264
|
+
# @example
|
1265
|
+
# s = Polars::Series.new(
|
1266
|
+
# "lyrics",
|
1267
|
+
# [
|
1268
|
+
# "Everybody wants to rule the world",
|
1269
|
+
# "Tell me what you want, what you really really want",
|
1270
|
+
# "Can you feel the love tonight"
|
1271
|
+
# ]
|
1272
|
+
# )
|
1273
|
+
# s.str.contains_any(["you", "me"])
|
1274
|
+
# # =>
|
1275
|
+
# # shape: (3,)
|
1276
|
+
# # Series: 'lyrics' [bool]
|
1277
|
+
# # [
|
1278
|
+
# # false
|
1279
|
+
# # true
|
1280
|
+
# # true
|
1281
|
+
# # ]
|
1282
|
+
def contains_any(
|
1283
|
+
patterns,
|
1284
|
+
ascii_case_insensitive: false
|
1285
|
+
)
|
1286
|
+
super
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
# Use the Aho-Corasick algorithm to replace many matches.
|
1290
|
+
#
|
1291
|
+
# @param patterns
|
1292
|
+
# String patterns to search and replace.
|
1293
|
+
# Also accepts a mapping of patterns to their replacement as syntactic sugar
|
1294
|
+
# for `replace_many(Polars::Series.new(mapping.keys), Polars::Series.new(mapping.values))`.
|
1295
|
+
# @param replace_with
|
1296
|
+
# Strings to replace where a pattern was a match.
|
1297
|
+
# Length must match the length of `patterns` or have length 1. This can be
|
1298
|
+
# broadcasted, so it supports many:one and many:many.
|
1299
|
+
# @param ascii_case_insensitive
|
1300
|
+
# Enable ASCII-aware case-insensitive matching.
|
1301
|
+
# When this option is enabled, searching will be performed without respect
|
1302
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1303
|
+
#
|
1304
|
+
# @return [Series]
|
1305
|
+
#
|
1306
|
+
# @note
|
1307
|
+
# This method supports matching on string literals only, and does not support
|
1308
|
+
# regular expression matching.
|
1309
|
+
#
|
1310
|
+
# @example Replace many patterns by passing lists of equal length to the `patterns` and `replace_with` parameters.
|
1311
|
+
# s = Polars::Series.new(
|
1312
|
+
# "lyrics",
|
1313
|
+
# [
|
1314
|
+
# "Everybody wants to rule the world",
|
1315
|
+
# "Tell me what you want, what you really really want",
|
1316
|
+
# "Can you feel the love tonight"
|
1317
|
+
# ]
|
1318
|
+
# )
|
1319
|
+
# s.str.replace_many(["you", "me"], ["me", "you"])
|
1320
|
+
# # =>
|
1321
|
+
# # shape: (3,)
|
1322
|
+
# # Series: 'lyrics' [str]
|
1323
|
+
# # [
|
1324
|
+
# # "Everybody wants to rule the wo…
|
1325
|
+
# # "Tell you what me want, what me…
|
1326
|
+
# # "Can me feel the love tonight"
|
1327
|
+
# # ]
|
1328
|
+
#
|
1329
|
+
# @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
|
1330
|
+
# s = Polars::Series.new(
|
1331
|
+
# "lyrics",
|
1332
|
+
# [
|
1333
|
+
# "Everybody wants to rule the world",
|
1334
|
+
# "Tell me what you want, what you really really want",
|
1335
|
+
# "Can you feel the love tonight",
|
1336
|
+
# ]
|
1337
|
+
# )
|
1338
|
+
# s.str.replace_many(["me", "you", "they"], [""])
|
1339
|
+
# # =>
|
1340
|
+
# # shape: (3,)
|
1341
|
+
# # Series: 'lyrics' [str]
|
1342
|
+
# # [
|
1343
|
+
# # "Everybody wants to rule the wo…
|
1344
|
+
# # "Tell what want, what really…
|
1345
|
+
# # "Can feel the love tonight"
|
1346
|
+
# # ]
|
1347
|
+
#
|
1348
|
+
# @example Passing a mapping with patterns and replacements is also supported as syntactic sugar.
|
1349
|
+
# s = Polars::Series.new(
|
1350
|
+
# "lyrics",
|
1351
|
+
# [
|
1352
|
+
# "Everybody wants to rule the world",
|
1353
|
+
# "Tell me what you want, what you really really want",
|
1354
|
+
# "Can you feel the love tonight"
|
1355
|
+
# ]
|
1356
|
+
# )
|
1357
|
+
# mapping = {"me" => "you", "you" => "me", "want" => "need"}
|
1358
|
+
# s.str.replace_many(mapping)
|
1359
|
+
# # =>
|
1360
|
+
# # shape: (3,)
|
1361
|
+
# # Series: 'lyrics' [str]
|
1362
|
+
# # [
|
1363
|
+
# # "Everybody needs to rule the wo…
|
1364
|
+
# # "Tell you what me need, what me…
|
1365
|
+
# # "Can me feel the love tonight"
|
1366
|
+
# # ]
|
1367
|
+
def replace_many(
|
1368
|
+
patterns,
|
1369
|
+
replace_with = Expr::NO_DEFAULT,
|
1370
|
+
ascii_case_insensitive: false
|
1371
|
+
)
|
1372
|
+
super
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
# Use the Aho-Corasick algorithm to extract many matches.
|
1376
|
+
#
|
1377
|
+
# @param patterns [Object]
|
1378
|
+
# String patterns to search.
|
1379
|
+
# @param ascii_case_insensitive [Boolean]
|
1380
|
+
# Enable ASCII-aware case-insensitive matching.
|
1381
|
+
# When this option is enabled, searching will be performed without respect
|
1382
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1383
|
+
# @param overlapping [Boolean]
|
1384
|
+
# Whether matches may overlap.
|
1385
|
+
#
|
1386
|
+
# @return [Series]
|
1387
|
+
#
|
1388
|
+
# @note
|
1389
|
+
# This method supports matching on string literals only, and does not support
|
1390
|
+
# regular expression matching.
|
1391
|
+
#
|
1392
|
+
# @example
|
1393
|
+
# s = Polars::Series.new("values", ["discontent"])
|
1394
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1395
|
+
# s.str.extract_many(patterns, overlapping: true)
|
1396
|
+
# # =>
|
1397
|
+
# # shape: (1,)
|
1398
|
+
# # Series: 'values' [list[str]]
|
1399
|
+
# # [
|
1400
|
+
# # ["disco", "onte", "discontent"]
|
1401
|
+
# # ]
|
1402
|
+
def extract_many(
|
1403
|
+
patterns,
|
1404
|
+
ascii_case_insensitive: false,
|
1405
|
+
overlapping: false
|
1406
|
+
)
|
1407
|
+
super
|
1408
|
+
end
|
1409
|
+
|
1410
|
+
# Use the Aho-Corasick algorithm to find all matches.
|
1411
|
+
#
|
1412
|
+
# The function returns the byte offset of the start of each match.
|
1413
|
+
# The return type will be `List<UInt32>`
|
1414
|
+
#
|
1415
|
+
# @param patterns [Object]
|
1416
|
+
# String patterns to search.
|
1417
|
+
# @param ascii_case_insensitive [Boolean]
|
1418
|
+
# Enable ASCII-aware case-insensitive matching.
|
1419
|
+
# When this option is enabled, searching will be performed without respect
|
1420
|
+
# to case for ASCII letters (a-z and A-Z) only.
|
1421
|
+
# @param overlapping [Boolean]
|
1422
|
+
# Whether matches may overlap.
|
1423
|
+
#
|
1424
|
+
# @return [Series]
|
1425
|
+
#
|
1426
|
+
# @note
|
1427
|
+
# This method supports matching on string literals only, and does not support
|
1428
|
+
# regular expression matching.
|
1429
|
+
#
|
1430
|
+
# @example
|
1431
|
+
# df = Polars::DataFrame.new({"values" => ["discontent"]})
|
1432
|
+
# patterns = ["winter", "disco", "onte", "discontent"]
|
1433
|
+
# df.with_columns(
|
1434
|
+
# Polars.col("values")
|
1435
|
+
# .str.extract_many(patterns, overlapping: false)
|
1436
|
+
# .alias("matches"),
|
1437
|
+
# Polars.col("values")
|
1438
|
+
# .str.extract_many(patterns, overlapping: true)
|
1439
|
+
# .alias("matches_overlapping")
|
1440
|
+
# )
|
1441
|
+
# # =>
|
1442
|
+
# # shape: (1, 3)
|
1443
|
+
# # ┌────────────┬───────────┬─────────────────────────────────┐
|
1444
|
+
# # │ values ┆ matches ┆ matches_overlapping │
|
1445
|
+
# # │ --- ┆ --- ┆ --- │
|
1446
|
+
# # │ str ┆ list[str] ┆ list[str] │
|
1447
|
+
# # ╞════════════╪═══════════╪═════════════════════════════════╡
|
1448
|
+
# # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
|
1449
|
+
# # └────────────┴───────────┴─────────────────────────────────┘
|
1450
|
+
#
|
1451
|
+
# @example
|
1452
|
+
# df = Polars::DataFrame.new(
|
1453
|
+
# {
|
1454
|
+
# "values" => ["discontent", "rhapsody"],
|
1455
|
+
# "patterns" => [
|
1456
|
+
# ["winter", "disco", "onte", "discontent"],
|
1457
|
+
# ["rhap", "ody", "coalesce"]
|
1458
|
+
# ]
|
1459
|
+
# }
|
1460
|
+
# )
|
1461
|
+
# df.select(Polars.col("values").str.find_many("patterns"))
|
1462
|
+
# # =>
|
1463
|
+
# # shape: (2, 1)
|
1464
|
+
# # ┌───────────┐
|
1465
|
+
# # │ values │
|
1466
|
+
# # │ --- │
|
1467
|
+
# # │ list[u32] │
|
1468
|
+
# # ╞═══════════╡
|
1469
|
+
# # │ [0] │
|
1470
|
+
# # │ [0, 5] │
|
1471
|
+
# # └───────────┘
|
1472
|
+
def find_many(
|
1473
|
+
patterns,
|
1474
|
+
ascii_case_insensitive: false,
|
1475
|
+
overlapping: false
|
1476
|
+
)
|
1477
|
+
super
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
# Vertically concat the values in the Series to a single string value.
|
1481
|
+
#
|
1482
|
+
# @param delimiter [String]
|
1483
|
+
# The delimiter to insert between consecutive string values.
|
1484
|
+
# @param ignore_nulls [Boolean]
|
1485
|
+
# Ignore null values (default).
|
1486
|
+
# If set to `False`, null values will be propagated. This means that
|
1487
|
+
# if the column contains any null values, the output is null.
|
1488
|
+
#
|
1489
|
+
# @return [Series]
|
1490
|
+
#
|
1491
|
+
# @example
|
1492
|
+
# Polars::Series.new([1, nil, 2]).str.join("-")
|
1493
|
+
# # =>
|
1494
|
+
# # shape: (1,)
|
1495
|
+
# # Series: '' [str]
|
1496
|
+
# # [
|
1497
|
+
# # "1-2"
|
1498
|
+
# # ]
|
1499
|
+
#
|
1500
|
+
# @example
|
1501
|
+
# Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
|
1502
|
+
# # =>
|
1503
|
+
# # shape: (1,)
|
1504
|
+
# # Series: '' [str]
|
1505
|
+
# # [
|
1506
|
+
# # null
|
1507
|
+
# # ]
|
1508
|
+
def join(delimiter = "-", ignore_nulls: true)
|
1509
|
+
super
|
1510
|
+
end
|
1511
|
+
alias_method :concat, :join
|
1512
|
+
|
1513
|
+
# Returns string values with all regular expression meta characters escaped.
|
1514
|
+
#
|
1515
|
+
# @return [Series]
|
1516
|
+
#
|
1517
|
+
# @example
|
1518
|
+
# Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
|
1519
|
+
# # =>
|
1520
|
+
# # shape: (4,)
|
1521
|
+
# # Series: '' [str]
|
1522
|
+
# # [
|
1523
|
+
# # "abc"
|
1524
|
+
# # "def"
|
1525
|
+
# # null
|
1526
|
+
# # "abc\(\\w\+\)"
|
1527
|
+
# # ]
|
1528
|
+
def escape_regex
|
1529
|
+
super
|
1530
|
+
end
|
1531
|
+
|
1532
|
+
# Returns the Unicode normal form of the string values.
|
1533
|
+
#
|
1534
|
+
# This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
|
1535
|
+
#
|
1536
|
+
# @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
|
1537
|
+
# Unicode form to use.
|
1538
|
+
#
|
1539
|
+
# @return [Series]
|
1540
|
+
#
|
1541
|
+
# @example
|
1542
|
+
# s = Polars::Series.new(["01²", "KADOKAWA"])
|
1543
|
+
# s.str.normalize("NFC")
|
1544
|
+
# # =>
|
1545
|
+
# # shape: (2,)
|
1546
|
+
# # Series: '' [str]
|
1547
|
+
# # [
|
1548
|
+
# # "01²"
|
1549
|
+
# # "KADOKAWA"
|
1550
|
+
# # ]
|
1551
|
+
#
|
1552
|
+
# @example
|
1553
|
+
# s.str.normalize("NFKC")
|
1554
|
+
# # =>
|
1555
|
+
# # shape: (2,)
|
1556
|
+
# # Series: '' [str]
|
1557
|
+
# # [
|
1558
|
+
# # "012"
|
1559
|
+
# # "KADOKAWA"
|
1560
|
+
# # ]
|
1561
|
+
def normalize(form = "NFC")
|
1562
|
+
super
|
1563
|
+
end
|
891
1564
|
end
|
892
1565
|
end
|