re2js 2.7.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -2
- package/build/index.cjs.cjs +144 -43
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +31 -5
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +144 -43
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +144 -43
- package/build/index.umd.js.map +1 -1
- package/package.json +6 -6
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.8.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1415,6 +1415,13 @@
|
|
|
1415
1415
|
*/
|
|
1416
1416
|
|
|
1417
1417
|
class Matcher {
|
|
1418
|
+
/**
|
|
1419
|
+
* V8 and WebKit have historical hard limits on the number of arguments
|
|
1420
|
+
* that can be passed to a function. We cap replacer arguments to prevent
|
|
1421
|
+
* Call Stack Overflow (DoS) vulnerabilities on massive ASTs.
|
|
1422
|
+
*/
|
|
1423
|
+
static MAX_REPLACER_ARGS = 65535;
|
|
1424
|
+
|
|
1418
1425
|
/**
|
|
1419
1426
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
1420
1427
|
* {@link #appendReplacement} as a literal replacement of {@code s}.
|
|
@@ -1711,16 +1718,14 @@
|
|
|
1711
1718
|
* @private
|
|
1712
1719
|
*/
|
|
1713
1720
|
genMatch(startByte, anchor) {
|
|
1714
|
-
const
|
|
1715
|
-
const ngroup = hasLookbehinds ? 1 + this.patternGroupCount : 1;
|
|
1716
|
-
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, ngroup);
|
|
1721
|
+
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, startByte, this.matcherInputLength, anchor, 1);
|
|
1717
1722
|
const ok = res[0];
|
|
1718
1723
|
if (!ok) {
|
|
1719
1724
|
return false;
|
|
1720
1725
|
}
|
|
1721
1726
|
this.groups = res[1];
|
|
1722
1727
|
this.hasMatch = true;
|
|
1723
|
-
this.hasGroups =
|
|
1728
|
+
this.hasGroups = this.patternGroupCount === 0;
|
|
1724
1729
|
this.anchorFlag = anchor;
|
|
1725
1730
|
return true;
|
|
1726
1731
|
}
|
|
@@ -1973,7 +1978,7 @@
|
|
|
1973
1978
|
* Returns the input with all matches replaced by {@code replacement}, interpreted as for
|
|
1974
1979
|
* {@code appendReplacement}.
|
|
1975
1980
|
*
|
|
1976
|
-
* @param {string} replacement - the replacement string
|
|
1981
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
1977
1982
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1978
1983
|
* @returns {string} the input string with the matches replaced
|
|
1979
1984
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
@@ -1986,7 +1991,7 @@
|
|
|
1986
1991
|
* Returns the input with the first match replaced by {@code replacement}, interpreted as for
|
|
1987
1992
|
* {@code appendReplacement}.
|
|
1988
1993
|
*
|
|
1989
|
-
* @param {string} replacement - the replacement string
|
|
1994
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
1990
1995
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1991
1996
|
* @returns {string} the input string with the first match replaced
|
|
1992
1997
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
@@ -1997,7 +2002,7 @@
|
|
|
1997
2002
|
|
|
1998
2003
|
/**
|
|
1999
2004
|
* Helper: replaceAll/replaceFirst hybrid.
|
|
2000
|
-
* @param {string} replacement - the replacement string
|
|
2005
|
+
* @param {string|Function} replacement - the replacement string or a replacer function
|
|
2001
2006
|
* @param {boolean} [all=true] - replace all matches
|
|
2002
2007
|
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
2003
2008
|
* @returns {string}
|
|
@@ -2006,8 +2011,21 @@
|
|
|
2006
2011
|
replace(replacement, all = true, javaMode = false) {
|
|
2007
2012
|
let res = '';
|
|
2008
2013
|
this.reset();
|
|
2014
|
+
const isFunc = typeof replacement === 'function';
|
|
2015
|
+
|
|
2016
|
+
// Cache named groups check to avoid GC thrashing on every match
|
|
2017
|
+
const hasNamedGroups = Object.keys(this.namedGroups).length > 0;
|
|
2018
|
+
let originalInput = null;
|
|
2019
|
+
if (isFunc) {
|
|
2020
|
+
// Prevent V8 Call Stack Overflow (DoS vector) on massive capture group counts
|
|
2021
|
+
if (this.groupCount() >= Matcher.MAX_REPLACER_ARGS) {
|
|
2022
|
+
throw new RE2JSGroupException('Too many capture groups to safely invoke replacer function');
|
|
2023
|
+
}
|
|
2024
|
+
// Resolve the original input reference exactly once outside the hot loop
|
|
2025
|
+
originalInput = this.matcherInput.isUTF8Encoding() ? this.matcherInput.asBytes() : this.matcherInput.asCharSequence();
|
|
2026
|
+
}
|
|
2009
2027
|
while (this.find()) {
|
|
2010
|
-
res += this.appendReplacement(replacement, javaMode);
|
|
2028
|
+
res += isFunc ? this.appendReplacementFunc(replacement, hasNamedGroups, originalInput) : this.appendReplacement(replacement, javaMode);
|
|
2011
2029
|
if (!all) {
|
|
2012
2030
|
break;
|
|
2013
2031
|
}
|
|
@@ -2015,6 +2033,66 @@
|
|
|
2015
2033
|
res += this.appendTail();
|
|
2016
2034
|
return res;
|
|
2017
2035
|
}
|
|
2036
|
+
|
|
2037
|
+
/**
|
|
2038
|
+
* Evaluates a replacer function for the current match and appends the result,
|
|
2039
|
+
* along with any un-matched preceding text, advancing the append position.
|
|
2040
|
+
* @param {Function} replacer - the replacer function
|
|
2041
|
+
* @param {boolean} hasNamedGroups - cached flag if pattern has named groups
|
|
2042
|
+
* @param {string|Uint8Array|number[]} originalInput - the cached original input reference
|
|
2043
|
+
* @returns {string} the evaluated string to append
|
|
2044
|
+
* @private
|
|
2045
|
+
*/
|
|
2046
|
+
appendReplacementFunc(replacer, hasNamedGroups, originalInput) {
|
|
2047
|
+
let res = '';
|
|
2048
|
+
const s = this.start();
|
|
2049
|
+
const e = this.end();
|
|
2050
|
+
if (this.appendPos < s) {
|
|
2051
|
+
res += this.substring(this.appendPos, s);
|
|
2052
|
+
}
|
|
2053
|
+
this.appendPos = e;
|
|
2054
|
+
const args = this.buildReplacerArgs(s, hasNamedGroups, originalInput);
|
|
2055
|
+
res += String(replacer(...args));
|
|
2056
|
+
return res;
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
/**
|
|
2060
|
+
* Builds the argument array for the replacer function matching the standard
|
|
2061
|
+
* JS String.prototype.replace(regex, replacer) signature.
|
|
2062
|
+
* @param {number} matchStart - the start index of the match
|
|
2063
|
+
* @param {boolean} hasNamedGroups - cached flag if pattern has named groups
|
|
2064
|
+
* @param {string|Uint8Array|number[]} originalInput - the cached original input reference
|
|
2065
|
+
* @returns {Array} array of arguments
|
|
2066
|
+
* @private
|
|
2067
|
+
*/
|
|
2068
|
+
buildReplacerArgs(matchStart, hasNamedGroups, originalInput) {
|
|
2069
|
+
const args = [this.group(0)]; // match
|
|
2070
|
+
|
|
2071
|
+
const numGroups = this.groupCount();
|
|
2072
|
+
// Fast-path capture group extraction
|
|
2073
|
+
for (let i = 1; i <= numGroups; i++) {
|
|
2074
|
+
const start = this.start(i);
|
|
2075
|
+
if (start < 0) {
|
|
2076
|
+
args.push(void 0);
|
|
2077
|
+
} else {
|
|
2078
|
+
args.push(this.substring(start, this.end(i)));
|
|
2079
|
+
}
|
|
2080
|
+
}
|
|
2081
|
+
args.push(matchStart); // offset
|
|
2082
|
+
args.push(originalInput); // original string (cached)
|
|
2083
|
+
|
|
2084
|
+
// Append named groups object if pattern contains them
|
|
2085
|
+
if (hasNamedGroups) {
|
|
2086
|
+
const parsedGroups = this.getNamedGroups();
|
|
2087
|
+
for (const key in parsedGroups) {
|
|
2088
|
+
if (parsedGroups[key] === null) {
|
|
2089
|
+
parsedGroups[key] = void 0;
|
|
2090
|
+
}
|
|
2091
|
+
}
|
|
2092
|
+
args.push(parsedGroups);
|
|
2093
|
+
}
|
|
2094
|
+
return args;
|
|
2095
|
+
}
|
|
2018
2096
|
}
|
|
2019
2097
|
|
|
2020
2098
|
/**
|
|
@@ -2341,30 +2419,35 @@
|
|
|
2341
2419
|
}
|
|
2342
2420
|
this.matched = false;
|
|
2343
2421
|
this.matchcap.fill(-1);
|
|
2422
|
+
|
|
2423
|
+
// Lookbehinds must scan from the beginning of the string to build their state table,
|
|
2424
|
+
// even if the main pattern search is requested to start mid-string.
|
|
2425
|
+
let currentPos = this.prog.numLb > 0 ? 0 : pos;
|
|
2426
|
+
let matchStartPos = pos;
|
|
2344
2427
|
let runq = this.q0;
|
|
2345
2428
|
let nextq = this.q1;
|
|
2346
|
-
let r = input.step(
|
|
2429
|
+
let r = input.step(currentPos);
|
|
2347
2430
|
let rune = r >> 3;
|
|
2348
2431
|
let width = r & 7;
|
|
2349
2432
|
let rune1 = -1;
|
|
2350
2433
|
let width1 = 0;
|
|
2351
2434
|
if (r !== MachineInputBase.EOF()) {
|
|
2352
|
-
r = input.step(
|
|
2435
|
+
r = input.step(currentPos + width);
|
|
2353
2436
|
rune1 = r >> 3;
|
|
2354
2437
|
width1 = r & 7;
|
|
2355
2438
|
}
|
|
2356
2439
|
let flag;
|
|
2357
|
-
if (
|
|
2440
|
+
if (currentPos === 0) {
|
|
2358
2441
|
flag = Utils.emptyOpContext(-1, rune);
|
|
2359
2442
|
} else {
|
|
2360
|
-
flag = input.context(
|
|
2443
|
+
flag = input.context(currentPos);
|
|
2361
2444
|
}
|
|
2362
2445
|
while (true) {
|
|
2363
2446
|
if (runq.isEmpty()) {
|
|
2364
|
-
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 &&
|
|
2447
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) {
|
|
2365
2448
|
break;
|
|
2366
2449
|
}
|
|
2367
|
-
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) &&
|
|
2450
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
|
|
2368
2451
|
break;
|
|
2369
2452
|
}
|
|
2370
2453
|
if (this.matched) {
|
|
@@ -2374,43 +2457,50 @@
|
|
|
2374
2457
|
// Fast-forwarding the string pointer will skip over the positions where
|
|
2375
2458
|
// the parallel lookbehind automata need to be spawned.
|
|
2376
2459
|
if (this.prog.numLb === 0 && !(this.re2.prefix.length === 0) && rune1 !== this.re2.prefixRune && input.canCheckPrefix()) {
|
|
2377
|
-
const advance = input.index(this.re2,
|
|
2460
|
+
const advance = input.index(this.re2, currentPos);
|
|
2378
2461
|
if (advance < 0) {
|
|
2379
2462
|
break;
|
|
2380
2463
|
}
|
|
2381
|
-
|
|
2382
|
-
r = input.step(
|
|
2464
|
+
currentPos += advance;
|
|
2465
|
+
r = input.step(currentPos);
|
|
2383
2466
|
rune = r >> 3;
|
|
2384
2467
|
width = r & 7;
|
|
2385
|
-
r = input.step(
|
|
2468
|
+
r = input.step(currentPos + width);
|
|
2386
2469
|
rune1 = r >> 3;
|
|
2387
2470
|
width1 = r & 7;
|
|
2388
2471
|
}
|
|
2389
2472
|
}
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
// Spawn Lookbehind threads BEFORE the main pattern
|
|
2473
|
+
|
|
2474
|
+
// Optimize lookbehind spawning. Because lookbehinds are prefixed with `.*` by the compiler,
|
|
2475
|
+
// they only need to be spawned exactly once at the beginning of the string (currentPos === 0).
|
|
2476
|
+
if (currentPos === 0 && this.prog.numLb > 0) {
|
|
2395
2477
|
for (let i = 0; i < this.prog.lbStarts.length; i++) {
|
|
2396
|
-
this.add(runq, this.prog.lbStarts[i],
|
|
2478
|
+
this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
|
|
2479
|
+
}
|
|
2480
|
+
}
|
|
2481
|
+
if (!this.matched && (currentPos === 0 || anchor === RE2Flags.UNANCHORED)) {
|
|
2482
|
+
// ONLY spawn the main pattern if we have reached the requested search start boundary
|
|
2483
|
+
if (currentPos >= matchStartPos) {
|
|
2484
|
+
if (this.ncap > 0) {
|
|
2485
|
+
this.matchcap[0] = currentPos;
|
|
2486
|
+
}
|
|
2487
|
+
this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
|
|
2397
2488
|
}
|
|
2398
|
-
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2399
2489
|
}
|
|
2400
|
-
const nextPos =
|
|
2490
|
+
const nextPos = currentPos + width;
|
|
2401
2491
|
flag = input.context(nextPos);
|
|
2402
|
-
this.step(runq, nextq,
|
|
2492
|
+
this.step(runq, nextq, currentPos, nextPos, rune, flag, anchor, currentPos === input.endPos());
|
|
2403
2493
|
if (width === 0) {
|
|
2404
2494
|
break;
|
|
2405
2495
|
}
|
|
2406
2496
|
if (this.ncap === 0 && this.matched) {
|
|
2407
2497
|
break;
|
|
2408
2498
|
}
|
|
2409
|
-
|
|
2499
|
+
currentPos += width;
|
|
2410
2500
|
rune = rune1;
|
|
2411
2501
|
width = width1;
|
|
2412
2502
|
if (rune !== -1) {
|
|
2413
|
-
r = input.step(
|
|
2503
|
+
r = input.step(currentPos + width);
|
|
2414
2504
|
rune1 = r >> 3;
|
|
2415
2505
|
width1 = r & 7;
|
|
2416
2506
|
}
|
|
@@ -2427,35 +2517,46 @@
|
|
|
2427
2517
|
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2428
2518
|
return [];
|
|
2429
2519
|
}
|
|
2520
|
+
|
|
2521
|
+
// Lookbehinds must scan from the beginning of the string to build their state table,
|
|
2522
|
+
// even if the main pattern search is requested to start mid-string.
|
|
2523
|
+
let currentPos = this.prog.numLb > 0 ? 0 : pos;
|
|
2524
|
+
let matchStartPos = pos;
|
|
2430
2525
|
let runq = this.q0;
|
|
2431
2526
|
let nextq = this.q1;
|
|
2432
|
-
let r = input.step(
|
|
2527
|
+
let r = input.step(currentPos);
|
|
2433
2528
|
let rune = r >> 3;
|
|
2434
2529
|
let width = r & 7;
|
|
2435
2530
|
let rune1 = -1;
|
|
2436
2531
|
let width1 = 0;
|
|
2437
2532
|
if (r !== MachineInputBase.EOF()) {
|
|
2438
|
-
r = input.step(
|
|
2533
|
+
r = input.step(currentPos + width);
|
|
2439
2534
|
rune1 = r >> 3;
|
|
2440
2535
|
width1 = r & 7;
|
|
2441
2536
|
}
|
|
2442
|
-
let flag =
|
|
2537
|
+
let flag = currentPos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(currentPos);
|
|
2443
2538
|
const matches = new Set();
|
|
2444
2539
|
while (true) {
|
|
2445
2540
|
if (runq.isEmpty()) {
|
|
2446
|
-
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 &&
|
|
2447
|
-
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) &&
|
|
2541
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) break;
|
|
2542
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
|
|
2448
2543
|
break;
|
|
2449
2544
|
}
|
|
2450
2545
|
}
|
|
2451
|
-
|
|
2452
|
-
|
|
2546
|
+
|
|
2547
|
+
// Optimize lookbehind spawning to exactly once at BOF
|
|
2548
|
+
if (currentPos === 0 && this.prog.numLb > 0) {
|
|
2453
2549
|
for (let i = 0; i < this.prog.lbStarts.length; i++) {
|
|
2454
|
-
this.add(runq, this.prog.lbStarts[i],
|
|
2550
|
+
this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
|
|
2455
2551
|
}
|
|
2456
|
-
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2457
2552
|
}
|
|
2458
|
-
|
|
2553
|
+
if (currentPos === 0 || anchor === RE2Flags.UNANCHORED) {
|
|
2554
|
+
// ONLY spawn the main pattern if we have reached the requested search start boundary
|
|
2555
|
+
if (currentPos >= matchStartPos) {
|
|
2556
|
+
this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
const nextPos = currentPos + width;
|
|
2459
2560
|
flag = input.context(nextPos);
|
|
2460
2561
|
for (let j = 0; j < runq.size; j++) {
|
|
2461
2562
|
let t = runq.denseThreads[j];
|
|
@@ -2464,7 +2565,7 @@
|
|
|
2464
2565
|
let add = false;
|
|
2465
2566
|
switch (i.op) {
|
|
2466
2567
|
case Inst.MATCH:
|
|
2467
|
-
if (anchor === RE2Flags.ANCHOR_BOTH &&
|
|
2568
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== input.endPos()) break;
|
|
2468
2569
|
matches.add(i.arg); // Record the matched Set ID
|
|
2469
2570
|
break;
|
|
2470
2571
|
case Inst.RUNE:
|
|
@@ -2492,11 +2593,11 @@
|
|
|
2492
2593
|
}
|
|
2493
2594
|
runq.clear();
|
|
2494
2595
|
if (width === 0) break;
|
|
2495
|
-
|
|
2596
|
+
currentPos += width;
|
|
2496
2597
|
rune = rune1;
|
|
2497
2598
|
width = width1;
|
|
2498
2599
|
if (rune !== -1) {
|
|
2499
|
-
r = input.step(
|
|
2600
|
+
r = input.step(currentPos + width);
|
|
2500
2601
|
rune1 = r >> 3;
|
|
2501
2602
|
width1 = r & 7;
|
|
2502
2603
|
}
|