re2js 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.7.1
5
+ * @version v2.8.0
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -1415,6 +1415,13 @@
1415
1415
  */
1416
1416
 
1417
1417
  class Matcher {
1418
+ /**
1419
+ * V8 and WebKit have historical hard limits on the number of arguments
1420
+ * that can be passed to a function. We cap replacer arguments to prevent
1421
+ * Call Stack Overflow (DoS) vulnerabilities on massive ASTs.
1422
+ */
1423
+ static MAX_REPLACER_ARGS = 65535;
1424
+
1418
1425
  /**
1419
1426
  * Quotes '\' and '$' in {@code s}, so that the returned string could be used in
1420
1427
  * {@link #appendReplacement} as a literal replacement of {@code s}.
@@ -1711,16 +1718,14 @@
1711
1718
  * @private
1712
1719
  */
1713
1720
  genMatch(startByte, anchor) {
1714
- const hasLookbehinds = this.patternInput.re2().prog.numLb > 0;
1715
- const ngroup = hasLookbehinds ? 1 + this.patternGroupCount : 1;
1716
- const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, ngroup);
1721
+ const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, 1);
1717
1722
  const ok = res[0];
1718
1723
  if (!ok) {
1719
1724
  return false;
1720
1725
  }
1721
1726
  this.groups = res[1];
1722
1727
  this.hasMatch = true;
1723
- this.hasGroups = hasLookbehinds || this.patternGroupCount === 0;
1728
+ this.hasGroups = this.patternGroupCount === 0;
1724
1729
  this.anchorFlag = anchor;
1725
1730
  return true;
1726
1731
  }
@@ -1973,7 +1978,7 @@
1973
1978
  * Returns the input with all matches replaced by {@code replacement}, interpreted as for
1974
1979
  * {@code appendReplacement}.
1975
1980
  *
1976
- * @param {string} replacement - the replacement string
1981
+ * @param {string|Function} replacement - the replacement string or a replacer function
1977
1982
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
1978
1983
  * @returns {string} the input string with the matches replaced
1979
1984
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
@@ -1986,7 +1991,7 @@
1986
1991
  * Returns the input with the first match replaced by {@code replacement}, interpreted as for
1987
1992
  * {@code appendReplacement}.
1988
1993
  *
1989
- * @param {string} replacement - the replacement string
1994
+ * @param {string|Function} replacement - the replacement string or a replacer function
1990
1995
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
1991
1996
  * @returns {string} the input string with the first match replaced
1992
1997
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
@@ -1997,7 +2002,7 @@
1997
2002
 
1998
2003
  /**
1999
2004
  * Helper: replaceAll/replaceFirst hybrid.
2000
- * @param {string} replacement - the replacement string
2005
+ * @param {string|Function} replacement - the replacement string or a replacer function
2001
2006
  * @param {boolean} [all=true] - replace all matches
2002
2007
  * @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
2003
2008
  * @returns {string}
@@ -2006,8 +2011,21 @@
2006
2011
  replace(replacement, all = true, javaMode = false) {
2007
2012
  let res = '';
2008
2013
  this.reset();
2014
+ const isFunc = typeof replacement === 'function';
2015
+
2016
+ // Cache named groups check to avoid GC thrashing on every match
2017
+ const hasNamedGroups = Object.keys(this.namedGroups).length > 0;
2018
+ let originalInput = null;
2019
+ if (isFunc) {
2020
+ // Prevent V8 Call Stack Overflow (DoS vector) on massive capture group counts
2021
+ if (this.groupCount() >= Matcher.MAX_REPLACER_ARGS) {
2022
+ throw new RE2JSGroupException('Too many capture groups to safely invoke replacer function');
2023
+ }
2024
+ // Resolve the original input reference exactly once outside the hot loop
2025
+ originalInput = this.matcherInput.isUTF8Encoding() ? this.matcherInput.asBytes() : this.matcherInput.asCharSequence();
2026
+ }
2009
2027
  while (this.find()) {
2010
- res += this.appendReplacement(replacement, javaMode);
2028
+ res += isFunc ? this.appendReplacementFunc(replacement, hasNamedGroups, originalInput) : this.appendReplacement(replacement, javaMode);
2011
2029
  if (!all) {
2012
2030
  break;
2013
2031
  }
@@ -2015,6 +2033,66 @@
2015
2033
  res += this.appendTail();
2016
2034
  return res;
2017
2035
  }
2036
+
2037
+ /**
2038
+ * Evaluates a replacer function for the current match and appends the result,
2039
+ * along with any un-matched preceding text, advancing the append position.
2040
+ * @param {Function} replacer - the replacer function
2041
+ * @param {boolean} hasNamedGroups - cached flag if pattern has named groups
2042
+ * @param {string|Uint8Array|number[]} originalInput - the cached original input reference
2043
+ * @returns {string} the evaluated string to append
2044
+ * @private
2045
+ */
2046
+ appendReplacementFunc(replacer, hasNamedGroups, originalInput) {
2047
+ let res = '';
2048
+ const s = this.start();
2049
+ const e = this.end();
2050
+ if (this.appendPos < s) {
2051
+ res += this.substring(this.appendPos, s);
2052
+ }
2053
+ this.appendPos = e;
2054
+ const args = this.buildReplacerArgs(s, hasNamedGroups, originalInput);
2055
+ res += String(replacer(...args));
2056
+ return res;
2057
+ }
2058
+
2059
+ /**
2060
+ * Builds the argument array for the replacer function matching the standard
2061
+ * JS String.prototype.replace(regex, replacer) signature.
2062
+ * @param {number} matchStart - the start index of the match
2063
+ * @param {boolean} hasNamedGroups - cached flag if pattern has named groups
2064
+ * @param {string|Uint8Array|number[]} originalInput - the cached original input reference
2065
+ * @returns {Array} array of arguments
2066
+ * @private
2067
+ */
2068
+ buildReplacerArgs(matchStart, hasNamedGroups, originalInput) {
2069
+ const args = [this.group(0)]; // match
2070
+
2071
+ const numGroups = this.groupCount();
2072
+ // Fast-path capture group extraction
2073
+ for (let i = 1; i <= numGroups; i++) {
2074
+ const start = this.start(i);
2075
+ if (start < 0) {
2076
+ args.push(void 0);
2077
+ } else {
2078
+ args.push(this.substring(start, this.end(i)));
2079
+ }
2080
+ }
2081
+ args.push(matchStart); // offset
2082
+ args.push(originalInput); // original string (cached)
2083
+
2084
+ // Append named groups object if pattern contains them
2085
+ if (hasNamedGroups) {
2086
+ const parsedGroups = this.getNamedGroups();
2087
+ for (const key in parsedGroups) {
2088
+ if (parsedGroups[key] === null) {
2089
+ parsedGroups[key] = void 0;
2090
+ }
2091
+ }
2092
+ args.push(parsedGroups);
2093
+ }
2094
+ return args;
2095
+ }
2018
2096
  }
2019
2097
 
2020
2098
  /**