polars-df 0.21.0-aarch64-linux → 0.22.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/LICENSE-THIRD-PARTY.txt +23 -49
  6. data/README.md +12 -0
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/3.4/polars.so +0 -0
  10. data/lib/polars/array_expr.rb +382 -3
  11. data/lib/polars/array_name_space.rb +281 -0
  12. data/lib/polars/binary_expr.rb +67 -0
  13. data/lib/polars/binary_name_space.rb +43 -0
  14. data/lib/polars/cat_expr.rb +224 -0
  15. data/lib/polars/cat_name_space.rb +138 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/convert.rb +6 -6
  18. data/lib/polars/data_frame.rb +794 -27
  19. data/lib/polars/data_type_expr.rb +52 -0
  20. data/lib/polars/data_types.rb +26 -5
  21. data/lib/polars/date_time_expr.rb +252 -1
  22. data/lib/polars/date_time_name_space.rb +299 -0
  23. data/lib/polars/expr.rb +1248 -206
  24. data/lib/polars/functions/business.rb +95 -0
  25. data/lib/polars/functions/datatype.rb +21 -0
  26. data/lib/polars/functions/lazy.rb +14 -1
  27. data/lib/polars/io/csv.rb +1 -1
  28. data/lib/polars/io/iceberg.rb +27 -0
  29. data/lib/polars/io/json.rb +4 -4
  30. data/lib/polars/io/ndjson.rb +4 -4
  31. data/lib/polars/io/parquet.rb +32 -7
  32. data/lib/polars/io/scan_options.rb +4 -1
  33. data/lib/polars/lazy_frame.rb +1028 -28
  34. data/lib/polars/list_expr.rb +217 -17
  35. data/lib/polars/list_name_space.rb +231 -22
  36. data/lib/polars/meta_expr.rb +89 -0
  37. data/lib/polars/name_expr.rb +36 -0
  38. data/lib/polars/query_opt_flags.rb +50 -0
  39. data/lib/polars/scan_cast_options.rb +20 -1
  40. data/lib/polars/schema.rb +79 -3
  41. data/lib/polars/selector.rb +72 -0
  42. data/lib/polars/selectors.rb +3 -3
  43. data/lib/polars/series.rb +1053 -54
  44. data/lib/polars/string_expr.rb +436 -32
  45. data/lib/polars/string_name_space.rb +736 -50
  46. data/lib/polars/struct_expr.rb +103 -0
  47. data/lib/polars/struct_name_space.rb +19 -1
  48. data/lib/polars/utils/serde.rb +17 -0
  49. data/lib/polars/utils/various.rb +22 -1
  50. data/lib/polars/utils.rb +5 -1
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +6 -0
  53. metadata +8 -2
@@ -222,10 +222,8 @@ module Polars
222
222
 
223
223
  # Convert a String column into a Decimal column.
224
224
  #
225
- # This method infers the needed parameters `precision` and `scale`.
226
- #
227
- # @param inference_length [Integer]
228
- # Number of elements to parse to determine the `precision` and `scale`.
225
+ # @param scale [Integer]
226
+ # Number of digits after the comma to use for the decimals.
229
227
  #
230
228
  # @return [Expr]
231
229
  #
@@ -243,7 +241,7 @@ module Polars
243
241
  # ]
244
242
  # }
245
243
  # )
246
- # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal)
244
+ # df.with_columns(numbers_decimal: Polars.col("numbers").str.to_decimal(scale: 2))
247
245
  # # =>
248
246
  # # shape: (7, 2)
249
247
  # # ┌───────────┬─────────────────┐
@@ -259,8 +257,8 @@ module Polars
259
257
  # # │ 143.09 ┆ 143.09 │
260
258
  # # │ 143.9 ┆ 143.90 │
261
259
  # # └───────────┴─────────────────┘
262
- def to_decimal(inference_length = 100)
263
- Utils.wrap_expr(_rbexpr.str_to_decimal(inference_length))
260
+ def to_decimal(scale:)
261
+ Utils.wrap_expr(_rbexpr.str_to_decimal(scale))
264
262
  end
265
263
 
266
264
  # Get length of the strings as `:u32` (as number of bytes).
@@ -368,6 +366,71 @@ module Polars
368
366
  end
369
367
  alias_method :concat, :join
370
368
 
369
+ # Returns string values with all regular expression meta characters escaped.
370
+ #
371
+ # @return [Expr]
372
+ #
373
+ # @example
374
+ # df = Polars::DataFrame.new({"text" => ["abc", "def", nil, "abc(\\w+)"]})
375
+ # df.with_columns(Polars.col("text").str.escape_regex.alias("escaped"))
376
+ # # =>
377
+ # # shape: (4, 2)
378
+ # # ┌──────────┬──────────────┐
379
+ # # │ text ┆ escaped │
380
+ # # │ --- ┆ --- │
381
+ # # │ str ┆ str │
382
+ # # ╞══════════╪══════════════╡
383
+ # # │ abc ┆ abc │
384
+ # # │ def ┆ def │
385
+ # # │ null ┆ null │
386
+ # # │ abc(\w+) ┆ abc\(\\w\+\) │
387
+ # # └──────────┴──────────────┘
388
+ def escape_regex
389
+ Utils.wrap_expr(_rbexpr.str_escape_regex)
390
+ end
391
+
392
+ # Returns the Unicode normal form of the string values.
393
+ #
394
+ # This uses the forms described in Unicode Standard Annex 15: <https://www.unicode.org/reports/tr15/>.
395
+ #
396
+ # @param form ['NFC', 'NFKC', 'NFD', 'NFKD']
397
+ # Unicode form to use.
398
+ #
399
+ # @return [Expr]
400
+ #
401
+ # @example
402
+ # df = Polars::DataFrame.new({"text" => ["01²", "KADOKAWA"]})
403
+ # new = df.with_columns(
404
+ # nfc: Polars.col("text").str.normalize("NFC"),
405
+ # nfkc: Polars.col("text").str.normalize("NFKC")
406
+ # )
407
+ # # =>
408
+ # # shape: (2, 3)
409
+ # # ┌──────────────────┬──────────────────┬──────────┐
410
+ # # │ text ┆ nfc ┆ nfkc │
411
+ # # │ --- ┆ --- ┆ --- │
412
+ # # │ str ┆ str ┆ str │
413
+ # # ╞══════════════════╪══════════════════╪══════════╡
414
+ # # │ 01² ┆ 01² ┆ 012 │
415
+ # # │ KADOKAWA ┆ KADOKAWA ┆ KADOKAWA │
416
+ # # └──────────────────┴──────────────────┴──────────┘
417
+ #
418
+ # @example
419
+ # new.select(Polars.all.str.len_bytes)
420
+ # # =>
421
+ # # shape: (2, 3)
422
+ # # ┌──────┬─────┬──────┐
423
+ # # │ text ┆ nfc ┆ nfkc │
424
+ # # │ --- ┆ --- ┆ --- │
425
+ # # │ u32 ┆ u32 ┆ u32 │
426
+ # # ╞══════╪═════╪══════╡
427
+ # # │ 4 ┆ 4 ┆ 3 │
428
+ # # │ 24 ┆ 24 ┆ 8 │
429
+ # # └──────┴─────┴──────┘
430
+ def normalize(form = "NFC")
431
+ Utils.wrap_expr(_rbexpr.str_normalize(form))
432
+ end
433
+
371
434
  # Transform to uppercase variant.
372
435
  #
373
436
  # @return [Expr]
@@ -707,6 +770,68 @@ module Polars
707
770
  Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
708
771
  end
709
772
 
773
+ # Return the bytes offset of the first substring matching a pattern.
774
+ #
775
+ # If the pattern is not found, returns nil.
776
+ #
777
+ # @param pattern [String]
778
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
779
+ # @param literal [Boolean]
780
+ # Treat `pattern` as a literal string, not as a regular expression.
781
+ # @param strict [Boolean]
782
+ # Raise an error if the underlying pattern is not a valid regex,
783
+ # otherwise mask out with a null value.
784
+ #
785
+ # @return [Expr]
786
+ #
787
+ # @note
788
+ # To modify regular expression behaviour (such as case-sensitivity) with
789
+ # flags, use the inline `(?iLmsuxU)` syntax.
790
+ #
791
+ # @example Find the index of the first substring matching a regex or literal pattern:
792
+ # df = Polars::DataFrame.new(
793
+ # {
794
+ # "txt" => ["Crab", "Lobster", nil, "Crustacean"],
795
+ # "pat" => ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"]
796
+ # }
797
+ # )
798
+ # df.select(
799
+ # Polars.col("txt"),
800
+ # Polars.col("txt").str.find("a|e").alias("a|e (regex)"),
801
+ # Polars.col("txt").str.find("e", literal: true).alias("e (lit)"),
802
+ # )
803
+ # # =>
804
+ # # shape: (4, 3)
805
+ # # ┌────────────┬─────────────┬─────────┐
806
+ # # │ txt ┆ a|e (regex) ┆ e (lit) │
807
+ # # │ --- ┆ --- ┆ --- │
808
+ # # │ str ┆ u32 ┆ u32 │
809
+ # # ╞════════════╪═════════════╪═════════╡
810
+ # # │ Crab ┆ 2 ┆ null │
811
+ # # │ Lobster ┆ 5 ┆ 5 │
812
+ # # │ null ┆ null ┆ null │
813
+ # # │ Crustacean ┆ 5 ┆ 7 │
814
+ # # └────────────┴─────────────┴─────────┘
815
+ #
816
+ # @example Match against a pattern found in another column or (expression):
817
+ # df.with_columns(Polars.col("txt").str.find(Polars.col("pat")).alias("find_pat"))
818
+ # # =>
819
+ # # shape: (4, 3)
820
+ # # ┌────────────┬───────────┬──────────┐
821
+ # # │ txt ┆ pat ┆ find_pat │
822
+ # # │ --- ┆ --- ┆ --- │
823
+ # # │ str ┆ str ┆ u32 │
824
+ # # ╞════════════╪═══════════╪══════════╡
825
+ # # │ Crab ┆ a[bc] ┆ 2 │
826
+ # # │ Lobster ┆ b.t ┆ 2 │
827
+ # # │ null ┆ [aeiuo] ┆ null │
828
+ # # │ Crustacean ┆ (?i)A[BC] ┆ 5 │
829
+ # # └────────────┴───────────┴──────────┘
830
+ def find(pattern, literal: false, strict: true)
831
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
832
+ Utils.wrap_expr(_rbexpr.str_find(pattern, literal, strict))
833
+ end
834
+
710
835
  # Check if string values end with a substring.
711
836
  #
712
837
  # @param sub [String]
@@ -792,11 +917,9 @@ module Polars
792
917
  # Throw errors if encounter invalid JSON strings.
793
918
  #
794
919
  # @param dtype [Object]
795
- # The dtype to cast the extracted value to. If nil, the dtype will be
796
- # inferred from the JSON value.
920
+ # The dtype to cast the extracted value to.
797
921
  # @param infer_schema_length [Integer]
798
- # The maximum number of rows to scan for schema inference.
799
- # If set to `nil`, the full data may be scanned *(this is slow)*.
922
+ # Deprecated and ignored.
800
923
  #
801
924
  # @return [Expr]
802
925
  #
@@ -805,23 +928,26 @@ module Polars
805
928
  # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
806
929
  # )
807
930
  # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
808
- # df.select(Polars.col("json").str.json_decode(dtype))
931
+ # df.with_columns(decoded: Polars.col("json").str.json_decode(dtype))
809
932
  # # =>
810
- # # shape: (3, 1)
811
- # # ┌───────────┐
812
- # # │ json
813
- # # │ --- │
814
- # # │ struct[2] │
815
- # # ╞═══════════╡
816
- # # │ {1,true} │
817
- # # │ null │
818
- # # │ {2,false} │
819
- # # └───────────┘
820
- def json_decode(dtype = nil, infer_schema_length: 100)
821
- if !dtype.nil?
822
- dtype = Utils.rb_type_to_dtype(dtype)
933
+ # # shape: (3, 2)
934
+ # # ┌─────────────────────┬───────────┐
935
+ # # │ json ┆ decoded
936
+ # # │ --- ┆ ---
937
+ # # │ str ┆ struct[2] │
938
+ # # ╞═════════════════════╪═══════════╡
939
+ # # │ {"a":1, "b": true} ┆ {1,true}
940
+ # # │ null ┆ null
941
+ # # │ {"a":2, "b": false} ┆ {2,false}
942
+ # # └─────────────────────┴───────────┘
943
+ def json_decode(dtype, infer_schema_length: nil)
944
+ if dtype.nil?
945
+ msg = "`Expr.str.json_decode` needs an explicitly given `dtype` otherwise Polars is not able to determine the output type. If you want to eagerly infer datatype you can use `Series.str.json_decode`."
946
+ raise TypeError, msg
823
947
  end
824
- Utils.wrap_expr(_rbexpr.str_json_decode(dtype, infer_schema_length))
948
+
949
+ dtype_expr = Utils.parse_into_datatype_expr(dtype)._rbdatatype_expr
950
+ Utils.wrap_expr(_rbexpr.str_json_decode(dtype_expr))
825
951
  end
826
952
  alias_method :json_extract, :json_decode
827
953
 
@@ -1307,6 +1433,130 @@ module Polars
1307
1433
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
1308
1434
  end
1309
1435
 
1436
+ # Return the first n characters of each string in a String Series.
1437
+ #
1438
+ # @param n [Integer]
1439
+ # Length of the slice (integer or expression). Negative indexing is supported;
1440
+ # see note (2) below.
1441
+ #
1442
+ # @return [Expr]
1443
+ #
1444
+ # @note
1445
+ # 1) The `n` input is defined in terms of the number of characters in the (UTF8)
1446
+ # string. A character is defined as a [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value). A single
1447
+ # character is represented by a single byte when working with ASCII text, and a
1448
+ # maximum of 4 bytes otherwise.
1449
+ #
1450
+ # 2) When the `n` input is negative, `head` returns characters up to the `n`th
1451
+ # from the end of the string. For example, if `n = -3`, then all characters
1452
+ # except the last three are returned.
1453
+ #
1454
+ # 3) If the length of the string has fewer than `n` characters, the full string is
1455
+ # returned.
1456
+ #
1457
+ # @example Return up to the first 5 characters:
1458
+ # df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
1459
+ # df.with_columns(Polars.col("s").str.head(5).alias("s_head_5"))
1460
+ # # =>
1461
+ # # shape: (4, 2)
1462
+ # # ┌─────────────┬──────────┐
1463
+ # # │ s ┆ s_head_5 │
1464
+ # # │ --- ┆ --- │
1465
+ # # │ str ┆ str │
1466
+ # # ╞═════════════╪══════════╡
1467
+ # # │ pear ┆ pear │
1468
+ # # │ null ┆ null │
1469
+ # # │ papaya ┆ papay │
1470
+ # # │ dragonfruit ┆ drago │
1471
+ # # └─────────────┴──────────┘
1472
+ #
1473
+ # @example Return characters determined by column `n`:
1474
+ # df = Polars::DataFrame.new(
1475
+ # {
1476
+ # "s" => ["pear", nil, "papaya", "dragonfruit"],
1477
+ # "n" => [3, 4, -2, -5]
1478
+ # }
1479
+ # )
1480
+ # df.with_columns(Polars.col("s").str.head("n").alias("s_head_n"))
1481
+ # # =>
1482
+ # # shape: (4, 3)
1483
+ # # ┌─────────────┬─────┬──────────┐
1484
+ # # │ s ┆ n ┆ s_head_n │
1485
+ # # │ --- ┆ --- ┆ --- │
1486
+ # # │ str ┆ i64 ┆ str │
1487
+ # # ╞═════════════╪═════╪══════════╡
1488
+ # # │ pear ┆ 3 ┆ pea │
1489
+ # # │ null ┆ 4 ┆ null │
1490
+ # # │ papaya ┆ -2 ┆ papa │
1491
+ # # │ dragonfruit ┆ -5 ┆ dragon │
1492
+ # # └─────────────┴─────┴──────────┘
1493
+ def head(n)
1494
+ n = Utils.parse_into_expression(n)
1495
+ Utils.wrap_expr(_rbexpr.str_head(n))
1496
+ end
1497
+
1498
+ # Return the last n characters of each string in a String Series.
1499
+ #
1500
+ # @param n [Integer]
1501
+ # Length of the slice (integer or expression). Negative indexing is supported;
1502
+ # see note (2) below.
1503
+ #
1504
+ # @return [Expr]
1505
+ #
1506
+ # @note
1507
+ # 1) The `n` input is defined in terms of the number of characters in the (UTF8)
1508
+ # string. A character is defined as a [Unicode scalar value](https://www.unicode.org/glossary/#unicode_scalar_value). A single
1509
+ # character is represented by a single byte when working with ASCII text, and a
1510
+ # maximum of 4 bytes otherwise.
1511
+ #
1512
+ # 2) When the `n` input is negative, `tail` returns characters starting from the
1513
+ # `n`th from the beginning of the string. For example, if `n = -3`, then all
1514
+ # characters except the first three are returned.
1515
+ #
1516
+ # 3) If the length of the string has fewer than `n` characters, the full string is
1517
+ # returned.
1518
+ #
1519
+ # @example Return up to the last 5 characters:
1520
+ # df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
1521
+ # df.with_columns(Polars.col("s").str.tail(5).alias("s_tail_5"))
1522
+ # # =>
1523
+ # # shape: (4, 2)
1524
+ # # ┌─────────────┬──────────┐
1525
+ # # │ s ┆ s_tail_5 │
1526
+ # # │ --- ┆ --- │
1527
+ # # │ str ┆ str │
1528
+ # # ╞═════════════╪══════════╡
1529
+ # # │ pear ┆ pear │
1530
+ # # │ null ┆ null │
1531
+ # # │ papaya ┆ apaya │
1532
+ # # │ dragonfruit ┆ fruit │
1533
+ # # └─────────────┴──────────┘
1534
+ #
1535
+ # @example Return characters determined by column `n`:
1536
+ # df = Polars::DataFrame.new(
1537
+ # {
1538
+ # "s" => ["pear", nil, "papaya", "dragonfruit"],
1539
+ # "n" => [3, 4, -2, -5]
1540
+ # }
1541
+ # )
1542
+ # df.with_columns(Polars.col("s").str.tail("n").alias("s_tail_n"))
1543
+ # # =>
1544
+ # # shape: (4, 3)
1545
+ # # ┌─────────────┬─────┬──────────┐
1546
+ # # │ s ┆ n ┆ s_tail_n │
1547
+ # # │ --- ┆ --- ┆ --- │
1548
+ # # │ str ┆ i64 ┆ str │
1549
+ # # ╞═════════════╪═════╪══════════╡
1550
+ # # │ pear ┆ 3 ┆ ear │
1551
+ # # │ null ┆ 4 ┆ null │
1552
+ # # │ papaya ┆ -2 ┆ paya │
1553
+ # # │ dragonfruit ┆ -5 ┆ nfruit │
1554
+ # # └─────────────┴─────┴──────────┘
1555
+ def tail(n)
1556
+ n = Utils.parse_into_expression(n)
1557
+ Utils.wrap_expr(_rbexpr.str_tail(n))
1558
+ end
1559
+
1310
1560
  # Convert an Utf8 column into an Int64 column with base radix.
1311
1561
  #
1312
1562
  # @param base [Integer]
@@ -1432,9 +1682,9 @@ module Polars
1432
1682
 
1433
1683
  # Use the aho-corasick algorithm to replace many matches.
1434
1684
  #
1435
- # @param patterns [String]
1685
+ # @param patterns [Object]
1436
1686
  # String patterns to search and replace.
1437
- # @param replace_with [String]
1687
+ # @param replace_with [Object]
1438
1688
  # Strings to replace where a pattern was a match.
1439
1689
  # This can be broadcasted. So it supports many:one and many:many.
1440
1690
  # @param ascii_case_insensitive [Boolean]
@@ -1494,11 +1744,22 @@ module Polars
1494
1744
  # # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
1495
1745
  # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1496
1746
  # # └─────────────────────────────────┴─────────────────────────────────┘
1497
- def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1747
+ def replace_many(patterns, replace_with = Expr::NO_DEFAULT, ascii_case_insensitive: false)
1748
+ if replace_with == Expr::NO_DEFAULT
1749
+ if !patterns.is_a?(Hash)
1750
+ msg = "`replace_with` argument is required if `patterns` argument is not a Hash type"
1751
+ raise TypeError, msg
1752
+ end
1753
+ # Early return in case of an empty mapping.
1754
+ if patterns.empty?
1755
+ return Utils.wrap_expr(_rbexpr)
1756
+ end
1757
+ replace_with = patterns.values
1758
+ patterns = patterns.keys
1759
+ end
1760
+
1498
1761
  patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1499
- replace_with = Utils.parse_into_expression(
1500
- replace_with, str_as_lit: true
1501
- )
1762
+ replace_with = Utils.parse_into_expression(replace_with, str_as_lit: true)
1502
1763
  Utils.wrap_expr(
1503
1764
  _rbexpr.str_replace_many(
1504
1765
  patterns, replace_with, ascii_case_insensitive
@@ -1506,6 +1767,149 @@ module Polars
1506
1767
  )
1507
1768
  end
1508
1769
 
1770
+ # Use the Aho-Corasick algorithm to extract many matches.
1771
+ #
1772
+ # @param patterns [Object]
1773
+ # String patterns to search.
1774
+ # @param ascii_case_insensitive [Boolean]
1775
+ # Enable ASCII-aware case-insensitive matching.
1776
+ # When this option is enabled, searching will be performed without respect
1777
+ # to case for ASCII letters (a-z and A-Z) only.
1778
+ # @param overlapping [Boolean]
1779
+ # Whether matches may overlap.
1780
+ #
1781
+ # @return [Expr]
1782
+ #
1783
+ # @note
1784
+ # This method supports matching on string literals only, and does not support
1785
+ # regular expression matching.
1786
+ #
1787
+ # @example
1788
+ # df = Polars::DataFrame.new({"values" => ["discontent"]})
1789
+ # patterns = ["winter", "disco", "onte", "discontent"]
1790
+ # df.with_columns(
1791
+ # Polars.col("values")
1792
+ # .str.extract_many(patterns, overlapping: false)
1793
+ # .alias("matches"),
1794
+ # Polars.col("values")
1795
+ # .str.extract_many(patterns, overlapping: true)
1796
+ # .alias("matches_overlapping"),
1797
+ # )
1798
+ # # =>
1799
+ # # shape: (1, 3)
1800
+ # # ┌────────────┬───────────┬─────────────────────────────────┐
1801
+ # # │ values ┆ matches ┆ matches_overlapping │
1802
+ # # │ --- ┆ --- ┆ --- │
1803
+ # # │ str ┆ list[str] ┆ list[str] │
1804
+ # # ╞════════════╪═══════════╪═════════════════════════════════╡
1805
+ # # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
1806
+ # # └────────────┴───────────┴─────────────────────────────────┘
1807
+ #
1808
+ # @example
1809
+ # df = Polars::DataFrame.new(
1810
+ # {
1811
+ # "values" => ["discontent", "rhapsody"],
1812
+ # "patterns" => [
1813
+ # ["winter", "disco", "onte", "discontent"],
1814
+ # ["rhap", "ody", "coalesce"]
1815
+ # ]
1816
+ # }
1817
+ # )
1818
+ # df.select(Polars.col("values").str.extract_many("patterns"))
1819
+ # # =>
1820
+ # # shape: (2, 1)
1821
+ # # ┌─────────────────┐
1822
+ # # │ values │
1823
+ # # │ --- │
1824
+ # # │ list[str] │
1825
+ # # ╞═════════════════╡
1826
+ # # │ ["disco"] │
1827
+ # # │ ["rhap", "ody"] │
1828
+ # # └─────────────────┘
1829
+ def extract_many(
1830
+ patterns,
1831
+ ascii_case_insensitive: false,
1832
+ overlapping: false
1833
+ )
1834
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1835
+ Utils.wrap_expr(
1836
+ _rbexpr.str_extract_many(patterns, ascii_case_insensitive, overlapping)
1837
+ )
1838
+ end
1839
+
1840
+ # Use the Aho-Corasick algorithm to find many matches.
1841
+ #
1842
+ # The function will return the bytes offset of the start of each match.
1843
+ # The return type will be `List<UInt32>`
1844
+ #
1845
+ # @param patterns [Object]
1846
+ # String patterns to search.
1847
+ # @param ascii_case_insensitive [Boolean]
1848
+ # Enable ASCII-aware case-insensitive matching.
1849
+ # When this option is enabled, searching will be performed without respect
1850
+ # to case for ASCII letters (a-z and A-Z) only.
1851
+ # @param overlapping [Boolean]
1852
+ # Whether matches may overlap.
1853
+ #
1854
+ # @return [Expr]
1855
+ #
1856
+ # @note
1857
+ # This method supports matching on string literals only, and does not support
1858
+ # regular expression matching.
1859
+ #
1860
+ # @example
1861
+ # df = Polars::DataFrame.new({"values" => ["discontent"]})
1862
+ # patterns = ["winter", "disco", "onte", "discontent"]
1863
+ # df.with_columns(
1864
+ # Polars.col("values")
1865
+ # .str.extract_many(patterns, overlapping: false)
1866
+ # .alias("matches"),
1867
+ # Polars.col("values")
1868
+ # .str.extract_many(patterns, overlapping: true)
1869
+ # .alias("matches_overlapping"),
1870
+ # )
1871
+ # # =>
1872
+ # # shape: (1, 3)
1873
+ # # ┌────────────┬───────────┬─────────────────────────────────┐
1874
+ # # │ values ┆ matches ┆ matches_overlapping │
1875
+ # # │ --- ┆ --- ┆ --- │
1876
+ # # │ str ┆ list[str] ┆ list[str] │
1877
+ # # ╞════════════╪═══════════╪═════════════════════════════════╡
1878
+ # # │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
1879
+ # # └────────────┴───────────┴─────────────────────────────────┘
1880
+ #
1881
+ # @example
1882
+ # df = Polars::DataFrame.new(
1883
+ # {
1884
+ # "values" => ["discontent", "rhapsody"],
1885
+ # "patterns" => [
1886
+ # ["winter", "disco", "onte", "discontent"],
1887
+ # ["rhap", "ody", "coalesce"]
1888
+ # ]
1889
+ # }
1890
+ # )
1891
+ # df.select(Polars.col("values").str.find_many("patterns"))
1892
+ # # =>
1893
+ # # shape: (2, 1)
1894
+ # # ┌───────────┐
1895
+ # # │ values │
1896
+ # # │ --- │
1897
+ # # │ list[u32] │
1898
+ # # ╞═══════════╡
1899
+ # # │ [0] │
1900
+ # # │ [0, 5] │
1901
+ # # └───────────┘
1902
+ def find_many(
1903
+ patterns,
1904
+ ascii_case_insensitive: false,
1905
+ overlapping: false
1906
+ )
1907
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1908
+ Utils.wrap_expr(
1909
+ _rbexpr.str_find_many(patterns, ascii_case_insensitive, overlapping)
1910
+ )
1911
+ end
1912
+
1509
1913
  private
1510
1914
 
1511
1915
  def _validate_format_argument(format)