re2js 2.7.1 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -2
- package/build/index.cjs.cjs +87 -9
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +31 -5
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +87 -9
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +87 -9
- package/build/index.umd.js.map +1 -1
- package/package.json +6 -6
package/README.md
CHANGED
|
@@ -513,7 +513,7 @@ RE2JS.compile('(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)')
|
|
|
513
513
|
Note that the replacement string can include references to capturing groups from the pattern
|
|
514
514
|
|
|
515
515
|
Parameters:
|
|
516
|
-
- `replacement (String)`: The string that replaces the substrings found
|
|
516
|
+
- `replacement (String | Function)`: The string that replaces the substrings found, or a function invoked to create the new substring. When passing a string, capture groups and special characters have special behavior. For example:
|
|
517
517
|
- `$&` refers to the entire matched substring
|
|
518
518
|
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
519
519
|
- `$$` inserts a literal `$`
|
|
@@ -556,7 +556,42 @@ RE2JS.compile('(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)')
|
|
|
556
556
|
.replaceFirst('$10$20') // 'jb0nopqrstuvwxyz123'
|
|
557
557
|
```
|
|
558
558
|
|
|
559
|
-
Function support second argument `javaMode`, which work in the same way, as for `replaceAll` function
|
|
559
|
+
Function support second argument `javaMode`, which work in the same way, as for `replaceAll` function.
|
|
560
|
+
|
|
561
|
+
#### Using a Replacer Function
|
|
562
|
+
|
|
563
|
+
For a more modern JavaScript developer experience, RE2JS supports passing a **replacer function** to `replaceAll()` and `replaceFirst()`, perfectly mirroring native `String.prototype.replace(regex, replacer)` behavior while taking advantage of the high-speed linear-time engine.
|
|
564
|
+
|
|
565
|
+
The replacer function is invoked for each match, and its return value is used as the replacement string. The function receives the following arguments:
|
|
566
|
+
|
|
567
|
+
1. `match`: The matched substring.
|
|
568
|
+
2. `p1, p2, ...`: The string found by a capture group (if any). Unmatched optional groups evaluate to `undefined`.
|
|
569
|
+
3. `offset`: The offset of the matched substring within the whole string.
|
|
570
|
+
4. `string`: The original input string (or byte array).
|
|
571
|
+
5. `groups`: A dictionary object of named capture groups (if any exist in the pattern).
|
|
572
|
+
|
|
573
|
+
```js
|
|
574
|
+
import { RE2JS } from 're2js'
|
|
575
|
+
|
|
576
|
+
// Example 1: Dynamic replacements
|
|
577
|
+
const re1 = RE2JS.compile('\\d+');
|
|
578
|
+
const m1 = re1.matcher('Numbers: 1, 2, 3');
|
|
579
|
+
|
|
580
|
+
m1.replaceAll((match) => String(Number(match) * 10));
|
|
581
|
+
// 'Numbers: 10, 20, 30'
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
// Example 2: Using named capture groups and function signature
|
|
585
|
+
const re2 = RE2JS.compile('(?P<first>\\w+) (?:(?P<middle>\\w+) )?(?P<last>\\w+)');
|
|
586
|
+
const m2 = re2.matcher('Hello World');
|
|
587
|
+
|
|
588
|
+
m2.replaceFirst((match, p1, p2, p3, offset, string, groups) => {
|
|
589
|
+
// 'middle' didn't match, so p2 and groups.middle will be undefined
|
|
590
|
+
return `${groups.last}, ${groups.first}`;
|
|
591
|
+
});
|
|
592
|
+
// 'World, Hello'
|
|
593
|
+
|
|
594
|
+
```
|
|
560
595
|
|
|
561
596
|
### Safe Replacements
|
|
562
597
|
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.8.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1411,6 +1411,13 @@ class RE2JSInternalException extends RE2JSException {
|
|
|
1411
1411
|
*/
|
|
1412
1412
|
|
|
1413
1413
|
class Matcher {
|
|
1414
|
+
/**
|
|
1415
|
+
* V8 and WebKit have historical hard limits on the number of arguments
|
|
1416
|
+
* that can be passed to a function. We cap replacer arguments to prevent
|
|
1417
|
+
* Call Stack Overflow (DoS) vulnerabilities on massive ASTs.
|
|
1418
|
+
*/
|
|
1419
|
+
static MAX_REPLACER_ARGS = 65535;
|
|
1420
|
+
|
|
1414
1421
|
/**
|
|
1415
1422
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
1416
1423
|
* {@link #appendReplacement} as a literal replacement of {@code s}.
|
|
@@ -1707,16 +1714,14 @@ class Matcher {
|
|
|
1707
1714
|
* @private
|
|
1708
1715
|
*/
|
|
1709
1716
|
genMatch(startByte, anchor) {
|
|
1710
|
-
const
|
|
1711
|
-
const ngroup = hasLookbehinds ? 1 + this.patternGroupCount : 1;
|
|
1712
|
-
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, ngroup);
|
|
1717
|
+
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, 1);
|
|
1713
1718
|
const ok = res[0];
|
|
1714
1719
|
if (!ok) {
|
|
1715
1720
|
return false;
|
|
1716
1721
|
}
|
|
1717
1722
|
this.groups = res[1];
|
|
1718
1723
|
this.hasMatch = true;
|
|
1719
|
-
this.hasGroups =
|
|
1724
|
+
this.hasGroups = this.patternGroupCount === 0;
|
|
1720
1725
|
this.anchorFlag = anchor;
|
|
1721
1726
|
return true;
|
|
1722
1727
|
}
|
|
@@ -1969,7 +1974,7 @@ class Matcher {
|
|
|
1969
1974
|
* Returns the input with all matches replaced by {@code replacement}, interpreted as for
|
|
1970
1975
|
* {@code appendReplacement}.
|
|
1971
1976
|
*
|
|
1972
|
-
* @param {string} replacement - the replacement string
|
|
1977
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
1973
1978
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1974
1979
|
* @returns {string} the input string with the matches replaced
|
|
1975
1980
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
@@ -1982,7 +1987,7 @@ class Matcher {
|
|
|
1982
1987
|
* Returns the input with the first match replaced by {@code replacement}, interpreted as for
|
|
1983
1988
|
* {@code appendReplacement}.
|
|
1984
1989
|
*
|
|
1985
|
-
* @param {string} replacement - the replacement string
|
|
1990
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
1986
1991
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1987
1992
|
* @returns {string} the input string with the first match replaced
|
|
1988
1993
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
@@ -1993,7 +1998,7 @@ class Matcher {
|
|
|
1993
1998
|
|
|
1994
1999
|
/**
|
|
1995
2000
|
* Helper: replaceAll/replaceFirst hybrid.
|
|
1996
|
-
* @param {string} replacement - the replacement string
|
|
2001
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
1997
2002
|
* @param {boolean} [all=true] - replace all matches
|
|
1998
2003
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1999
2004
|
* @returns {string}
|
|
@@ -2002,8 +2007,21 @@ class Matcher {
|
|
|
2002
2007
|
replace(replacement, all = true, javaMode = false) {
|
|
2003
2008
|
let res = '';
|
|
2004
2009
|
this.reset();
|
|
2010
|
+
const isFunc = typeof replacement === 'function';
|
|
2011
|
+
|
|
2012
|
+
// Cache named groups check to avoid GC thrashing on every match
|
|
2013
|
+
const hasNamedGroups = Object.keys(this.namedGroups).length > 0;
|
|
2014
|
+
let originalInput = null;
|
|
2015
|
+
if (isFunc) {
|
|
2016
|
+
// Prevent V8 Call Stack Overflow (DoS vector) on massive capture group counts
|
|
2017
|
+
if (this.groupCount() >= Matcher.MAX_REPLACER_ARGS) {
|
|
2018
|
+
throw new RE2JSGroupException('Too many capture groups to safely invoke replacer function');
|
|
2019
|
+
}
|
|
2020
|
+
// Resolve the original input reference exactly once outside the hot loop
|
|
2021
|
+
originalInput = this.matcherInput.isUTF8Encoding() ? this.matcherInput.asBytes() : this.matcherInput.asCharSequence();
|
|
2022
|
+
}
|
|
2005
2023
|
while (this.find()) {
|
|
2006
|
-
res += this.appendReplacement(replacement, javaMode);
|
|
2024
|
+
res += isFunc ? this.appendReplacementFunc(replacement, hasNamedGroups, originalInput) : this.appendReplacement(replacement, javaMode);
|
|
2007
2025
|
if (!all) {
|
|
2008
2026
|
break;
|
|
2009
2027
|
}
|
|
@@ -2011,6 +2029,66 @@ class Matcher {
|
|
|
2011
2029
|
res += this.appendTail();
|
|
2012
2030
|
return res;
|
|
2013
2031
|
}
|
|
2032
|
+
|
|
2033
|
+
/**
|
|
2034
|
+
* Evaluates a replacer function for the current match and appends the result,
|
|
2035
|
+
* along with any un-matched preceding text, advancing the append position.
|
|
2036
|
+
* @param {Function} replacer - the replacer function
|
|
2037
|
+
* @param {boolean} hasNamedGroups - cached flag if pattern has named groups
|
|
2038
|
+
* @param {string|Uint8Array|number[]} originalInput - the cached original input reference
|
|
2039
|
+
* @returns {string} the evaluated string to append
|
|
2040
|
+
* @private
|
|
2041
|
+
*/
|
|
2042
|
+
appendReplacementFunc(replacer, hasNamedGroups, originalInput) {
|
|
2043
|
+
let res = '';
|
|
2044
|
+
const s = this.start();
|
|
2045
|
+
const e = this.end();
|
|
2046
|
+
if (this.appendPos < s) {
|
|
2047
|
+
res += this.substring(this.appendPos, s);
|
|
2048
|
+
}
|
|
2049
|
+
this.appendPos = e;
|
|
2050
|
+
const args = this.buildReplacerArgs(s, hasNamedGroups, originalInput);
|
|
2051
|
+
res += String(replacer(...args));
|
|
2052
|
+
return res;
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
/**
|
|
2056
|
+
* Builds the argument array for the replacer function matching the standard
|
|
2057
|
+
* JS String.prototype.replace(regex, replacer) signature.
|
|
2058
|
+
* @param {number} matchStart - the start index of the match
|
|
2059
|
+
* @param {boolean} hasNamedGroups - cached flag if pattern has named groups
|
|
2060
|
+
* @param {string|Uint8Array|number[]} originalInput - the cached original input reference
|
|
2061
|
+
* @returns {Array} array of arguments
|
|
2062
|
+
* @private
|
|
2063
|
+
*/
|
|
2064
|
+
buildReplacerArgs(matchStart, hasNamedGroups, originalInput) {
|
|
2065
|
+
const args = [this.group(0)]; // match
|
|
2066
|
+
|
|
2067
|
+
const numGroups = this.groupCount();
|
|
2068
|
+
// Fast-path capture group extraction
|
|
2069
|
+
for (let i = 1; i <= numGroups; i++) {
|
|
2070
|
+
const start = this.start(i);
|
|
2071
|
+
if (start < 0) {
|
|
2072
|
+
args.push(void 0);
|
|
2073
|
+
} else {
|
|
2074
|
+
args.push(this.substring(start, this.end(i)));
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
args.push(matchStart); // offset
|
|
2078
|
+
args.push(originalInput); // original string (cached)
|
|
2079
|
+
|
|
2080
|
+
// Append named groups object if pattern contains them
|
|
2081
|
+
if (hasNamedGroups) {
|
|
2082
|
+
const parsedGroups = this.getNamedGroups();
|
|
2083
|
+
for (const key in parsedGroups) {
|
|
2084
|
+
if (parsedGroups[key] === null) {
|
|
2085
|
+
parsedGroups[key] = void 0;
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
args.push(parsedGroups);
|
|
2089
|
+
}
|
|
2090
|
+
return args;
|
|
2091
|
+
}
|
|
2014
2092
|
}
|
|
2015
2093
|
|
|
2016
2094
|
/**
|