re2js 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -513,7 +513,7 @@ RE2JS.compile('(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)')
513
513
  Note that the replacement string can include references to capturing groups from the pattern
514
514
 
515
515
  Parameters:
516
- - `replacement (String)`: The string that replaces the substrings found. Capture groups and special characters in the replacement string have special behavior. For example:
516
+ - `replacement (String | Function)`: The string that replaces the substrings found, or a function invoked to create the new substring. When passing a string, capture groups and special characters have special behavior. For example:
517
517
  - `$&` refers to the entire matched substring
518
518
  - `$1, $2, ...` refer to the corresponding capture groups in the pattern
519
519
  - `$$` inserts a literal `$`
@@ -556,7 +556,42 @@ RE2JS.compile('(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)')
556
556
  .replaceFirst('$10$20') // 'jb0nopqrstuvwxyz123'
557
557
  ```
558
558
 
559
- Function support second argument `javaMode`, which work in the same way, as for `replaceAll` function
559
+ Function support second argument `javaMode`, which work in the same way, as for `replaceAll` function.
560
+
561
+ #### Using a Replacer Function
562
+
563
+ For a more modern JavaScript developer experience, RE2JS supports passing a **replacer function** to `replaceAll()` and `replaceFirst()`, perfectly mirroring native `String.prototype.replace(regex, replacer)` behavior while taking advantage of the high-speed linear-time engine.
564
+
565
+ The replacer function is invoked for each match, and its return value is used as the replacement string. The function receives the following arguments:
566
+
567
+ 1. `match`: The matched substring.
568
+ 2. `p1, p2, ...`: The string found by a capture group (if any). Unmatched optional groups evaluate to `undefined`.
569
+ 3. `offset`: The offset of the matched substring within the whole string.
570
+ 4. `string`: The original input string (or byte array).
571
+ 5. `groups`: A dictionary object of named capture groups (if any exist in the pattern).
572
+
573
+ ```js
574
+ import { RE2JS } from 're2js'
575
+
576
+ // Example 1: Dynamic replacements
577
+ const re1 = RE2JS.compile('\\d+');
578
+ const m1 = re1.matcher('Numbers: 1, 2, 3');
579
+
580
+ m1.replaceAll((match) => String(Number(match) * 10));
581
+ // 'Numbers: 10, 20, 30'
582
+
583
+
584
+ // Example 2: Using named capture groups and function signature
585
+ const re2 = RE2JS.compile('(?P<first>\\w+) (?:(?P<middle>\\w+) )?(?P<last>\\w+)');
586
+ const m2 = re2.matcher('Hello World');
587
+
588
+ m2.replaceFirst((match, p1, p2, p3, offset, string, groups) => {
589
+ // 'middle' didn't match, so p2 and groups.middle will be undefined
590
+ return `${groups.last}, ${groups.first}`;
591
+ });
592
+ // 'World, Hello'
593
+
594
+ ```
560
595
 
561
596
  ### Safe Replacements
562
597
 
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.7.1
5
+ * @version v2.8.0
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -1411,6 +1411,13 @@ class RE2JSInternalException extends RE2JSException {
1411
1411
  */
1412
1412
 
1413
1413
  class Matcher {
1414
+ /**
1415
+ * V8 and WebKit have historical hard limits on the number of arguments
1416
+ * that can be passed to a function. We cap replacer arguments to prevent
1417
+ * Call Stack Overflow (DoS) vulnerabilities on massive ASTs.
1418
+ */
1419
+ static MAX_REPLACER_ARGS = 65535;
1420
+
1414
1421
  /**
1415
1422
  * Quotes '\' and '$' in {@code s}, so that the returned string could be used in
1416
1423
  * {@link #appendReplacement} as a literal replacement of {@code s}.
@@ -1707,16 +1714,14 @@ class Matcher {
1707
1714
  * @private
1708
1715
  */
1709
1716
  genMatch(startByte, anchor) {
1710
- const hasLookbehinds = this.patternInput.re2().prog.numLb > 0;
1711
- const ngroup = hasLookbehinds ? 1 + this.patternGroupCount : 1;
1712
- const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, ngroup);
1717
+ const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, 1);
1713
1718
  const ok = res[0];
1714
1719
  if (!ok) {
1715
1720
  return false;
1716
1721
  }
1717
1722
  this.groups = res[1];
1718
1723
  this.hasMatch = true;
1719
- this.hasGroups = hasLookbehinds || this.patternGroupCount === 0;
1724
+ this.hasGroups = this.patternGroupCount === 0;
1720
1725
  this.anchorFlag = anchor;
1721
1726
  return true;
1722
1727
  }
@@ -1969,7 +1974,7 @@ class Matcher {
1969
1974
  * Returns the input with all matches replaced by {@code replacement}, interpreted as for
1970
1975
  * {@code appendReplacement}.
1971
1976
  *
1972
- * @param {string} replacement - the replacement string
1977
+ * @param {string|Function} replacement - the replacement string or a replacer function
1973
1978
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
1974
1979
  * @returns {string} the input string with the matches replaced
1975
1980
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
@@ -1982,7 +1987,7 @@ class Matcher {
1982
1987
  * Returns the input with the first match replaced by {@code replacement}, interpreted as for
1983
1988
  * {@code appendReplacement}.
1984
1989
  *
1985
- * @param {string} replacement - the replacement string
1990
+ * @param {string|Function} replacement - the replacement string or a replacer function
1986
1991
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
1987
1992
  * @returns {string} the input string with the first match replaced
1988
1993
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
@@ -1993,7 +1998,7 @@ class Matcher {
1993
1998
 
1994
1999
  /**
1995
2000
  * Helper: replaceAll/replaceFirst hybrid.
1996
- * @param {string} replacement - the replacement string
2001
+ * @param {string|Function} replacement - the replacement string or a replacer function
1997
2002
  * @param {boolean} [all=true] - replace all matches
1998
2003
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
1999
2004
  * @returns {string}
@@ -2002,8 +2007,21 @@ class Matcher {
2002
2007
  replace(replacement, all = true, javaMode = false) {
2003
2008
  let res = '';
2004
2009
  this.reset();
2010
+ const isFunc = typeof replacement === 'function';
2011
+
2012
+ // Cache named groups check to avoid GC thrashing on every match
2013
+ const hasNamedGroups = Object.keys(this.namedGroups).length > 0;
2014
+ let originalInput = null;
2015
+ if (isFunc) {
2016
+ // Prevent V8 Call Stack Overflow (DoS vector) on massive capture group counts
2017
+ if (this.groupCount() >= Matcher.MAX_REPLACER_ARGS) {
2018
+ throw new RE2JSGroupException('Too many capture groups to safely invoke replacer function');
2019
+ }
2020
+ // Resolve the original input reference exactly once outside the hot loop
2021
+ originalInput = this.matcherInput.isUTF8Encoding() ? this.matcherInput.asBytes() : this.matcherInput.asCharSequence();
2022
+ }
2005
2023
  while (this.find()) {
2006
- res += this.appendReplacement(replacement, javaMode);
2024
+ res += isFunc ? this.appendReplacementFunc(replacement, hasNamedGroups, originalInput) : this.appendReplacement(replacement, javaMode);
2007
2025
  if (!all) {
2008
2026
  break;
2009
2027
  }
@@ -2011,6 +2029,66 @@ class Matcher {
2011
2029
  res += this.appendTail();
2012
2030
  return res;
2013
2031
  }
2032
+
2033
+ /**
2034
+ * Evaluates a replacer function for the current match and appends the result,
2035
+ * along with any un-matched preceding text, advancing the append position.
2036
+ * @param {Function} replacer - the replacer function
2037
+ * @param {boolean} hasNamedGroups - cached flag if pattern has named groups
2038
+ * @param {string|Uint8Array|number[]} originalInput - the cached original input reference
2039
+ * @returns {string} the evaluated string to append
2040
+ * @private
2041
+ */
2042
+ appendReplacementFunc(replacer, hasNamedGroups, originalInput) {
2043
+ let res = '';
2044
+ const s = this.start();
2045
+ const e = this.end();
2046
+ if (this.appendPos < s) {
2047
+ res += this.substring(this.appendPos, s);
2048
+ }
2049
+ this.appendPos = e;
2050
+ const args = this.buildReplacerArgs(s, hasNamedGroups, originalInput);
2051
+ res += String(replacer(...args));
2052
+ return res;
2053
+ }
2054
+
2055
+ /**
2056
+ * Builds the argument array for the replacer function matching the standard
2057
+ * JS String.prototype.replace(regex, replacer) signature.
2058
+ * @param {number} matchStart - the start index of the match
2059
+ * @param {boolean} hasNamedGroups - cached flag if pattern has named groups
2060
+ * @param {string|Uint8Array|number[]} originalInput - the cached original input reference
2061
+ * @returns {Array} array of arguments
2062
+ * @private
2063
+ */
2064
+ buildReplacerArgs(matchStart, hasNamedGroups, originalInput) {
2065
+ const args = [this.group(0)]; // match
2066
+
2067
+ const numGroups = this.groupCount();
2068
+ // Fast-path capture group extraction
2069
+ for (let i = 1; i <= numGroups; i++) {
2070
+ const start = this.start(i);
2071
+ if (start < 0) {
2072
+ args.push(void 0);
2073
+ } else {
2074
+ args.push(this.substring(start, this.end(i)));
2075
+ }
2076
+ }
2077
+ args.push(matchStart); // offset
2078
+ args.push(originalInput); // original string (cached)
2079
+
2080
+ // Append named groups object if pattern contains them
2081
+ if (hasNamedGroups) {
2082
+ const parsedGroups = this.getNamedGroups();
2083
+ for (const key in parsedGroups) {
2084
+ if (parsedGroups[key] === null) {
2085
+ parsedGroups[key] = void 0;
2086
+ }
2087
+ }
2088
+ args.push(parsedGroups);
2089
+ }
2090
+ return args;
2091
+ }
2014
2092
  }
2015
2093
 
2016
2094
  /**