re2js 2.6.1 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.cjs.cjs +26 -10
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +26 -10
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +26 -10
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -733,7 +733,7 @@ negative.test('foobar'); // false
|
|
|
733
733
|
|
|
734
734
|
1. **Performance Overhead:** If a regex contains a lookbehind, the engine is forced to safely bypass the ultra-fast Lazy DFA and OnePass engines. It evaluates the lookbehinds using parallel automata running on the NFA (Pike VM). While execution remains mathematically safe and linear $O(n)$, the NFA engine is generally slower than the DFA fast-paths. Use lookbehinds only when necessary.
|
|
735
735
|
2. **Prefix Acceleration is Disabled:** To ensure the parallel tracking automata initialize correctly, high-speed string prefix skipping (e.g., using `indexOf` to jump to a starting literal) is disabled when lookbehinds are present.
|
|
736
|
-
3. **Captureless Guarantee:** To prevent state-explosion vulnerabilities, lookbehinds are strictly evaluated as *captureless*. If you include a capturing group inside a lookbehind (e.g., `(?<=(foo))bar`), the engine will
|
|
736
|
+
3. **Captureless Guarantee:** To prevent state-explosion vulnerabilities and maintain strict safety invariants, lookbehinds are strictly evaluated as *captureless*. If you attempt to include a capturing group inside a lookbehind (e.g., `(?<=(foo))bar`), the engine will proactively throw a `SyntaxError` at compile time. Use non-capturing groups `(?:...)` instead.
|
|
737
737
|
|
|
738
738
|
|
|
739
739
|
## Development
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.7.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -2155,9 +2155,9 @@ class Inst {
|
|
|
2155
2155
|
case Inst.NOP:
|
|
2156
2156
|
return `nop -> ${this.out}`;
|
|
2157
2157
|
case Inst.LB_WRITE:
|
|
2158
|
-
return `lbwrite ${this.
|
|
2158
|
+
return `lbwrite ${this.arg} -> ${this.out}`;
|
|
2159
2159
|
case Inst.LB_CHECK:
|
|
2160
|
-
return `lbcheck ${this.
|
|
2160
|
+
return `lbcheck ${this.arg} -> ${this.out}`;
|
|
2161
2161
|
case Inst.RUNE:
|
|
2162
2162
|
if (this.runes === null) {
|
|
2163
2163
|
return 'rune <null>';
|
|
@@ -2596,17 +2596,17 @@ class Machine {
|
|
|
2596
2596
|
continue;
|
|
2597
2597
|
}
|
|
2598
2598
|
case Inst.LB_WRITE:
|
|
2599
|
-
this.lbTable[Math.abs(inst.
|
|
2599
|
+
this.lbTable[Math.abs(inst.arg)] = pos;
|
|
2600
2600
|
pc = inst.out;
|
|
2601
2601
|
continue;
|
|
2602
2602
|
case Inst.LB_CHECK:
|
|
2603
|
-
if (inst.
|
|
2603
|
+
if (inst.arg > 0) {
|
|
2604
2604
|
// Positive Lookbehind
|
|
2605
|
-
if (this.lbTable[inst.
|
|
2605
|
+
if (this.lbTable[inst.arg] === pos) {
|
|
2606
2606
|
pc = inst.out; // Flattened tail recursion
|
|
2607
2607
|
continue;
|
|
2608
2608
|
}
|
|
2609
|
-
} else if (this.lbTable[-inst.
|
|
2609
|
+
} else if (this.lbTable[-inst.arg] !== pos) {
|
|
2610
2610
|
// Negative Lookbehind
|
|
2611
2611
|
pc = inst.out; // Flattened tail recursion
|
|
2612
2612
|
continue;
|
|
@@ -4686,7 +4686,7 @@ class Compiler {
|
|
|
4686
4686
|
}
|
|
4687
4687
|
lookBehind(a, lb) {
|
|
4688
4688
|
const id = this.newInst(Inst.LB_WRITE);
|
|
4689
|
-
this.prog.getInst(id.i).
|
|
4689
|
+
this.prog.getInst(id.i).arg = lb;
|
|
4690
4690
|
|
|
4691
4691
|
// Create the prefix wildcard `.*` for the lookbehind automaton
|
|
4692
4692
|
const any = this.rune(Compiler.ANY_RUNE(), 0);
|
|
@@ -4694,7 +4694,7 @@ class Compiler {
|
|
|
4694
4694
|
const lbAutomaton = this.cat(dotStar, a);
|
|
4695
4695
|
this.prog.patch(lbAutomaton.out, id.i);
|
|
4696
4696
|
const checkId = this.newInst(Inst.LB_CHECK);
|
|
4697
|
-
this.prog.getInst(checkId.i).
|
|
4697
|
+
this.prog.getInst(checkId.i).arg = lb;
|
|
4698
4698
|
|
|
4699
4699
|
// Save the starting point of this lookbehind automaton
|
|
4700
4700
|
this.prog.lbStarts.push(lbAutomaton.i);
|
|
@@ -5469,6 +5469,7 @@ class Parser {
|
|
|
5469
5469
|
static ERR_UNEXPECTED_PAREN = 'unexpected )';
|
|
5470
5470
|
static ERR_NESTING_DEPTH = 'expression nests too deeply';
|
|
5471
5471
|
static ERR_LARGE = 'expression too large';
|
|
5472
|
+
static ERR_INVALID_CAPTURE_IN_LOOKBEHIND = 'invalid capture in lookbehind';
|
|
5472
5473
|
|
|
5473
5474
|
// maxHeight is the maximum height of a regexp parse tree.
|
|
5474
5475
|
// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
@@ -5872,6 +5873,18 @@ class Parser {
|
|
|
5872
5873
|
}
|
|
5873
5874
|
return x;
|
|
5874
5875
|
}
|
|
5876
|
+
|
|
5877
|
+
// recursively check for captures
|
|
5878
|
+
static hasCapture(re) {
|
|
5879
|
+
if (re === null) return false;
|
|
5880
|
+
if (re.op === Regexp.Op.CAPTURE) return true;
|
|
5881
|
+
if (re.subs) {
|
|
5882
|
+
for (let sub of re.subs) {
|
|
5883
|
+
if (Parser.hasCapture(sub)) return true;
|
|
5884
|
+
}
|
|
5885
|
+
}
|
|
5886
|
+
return false;
|
|
5887
|
+
}
|
|
5875
5888
|
constructor(wholeRegexp, flags = 0) {
|
|
5876
5889
|
this.wholeRegexp = wholeRegexp;
|
|
5877
5890
|
// Flags control the behavior of the parser and record information about
|
|
@@ -6555,7 +6568,7 @@ class Parser {
|
|
|
6555
6568
|
case 1:
|
|
6556
6569
|
// Impossible but handle.
|
|
6557
6570
|
re.op = Regexp.Op.EMPTY_MATCH;
|
|
6558
|
-
re.subs =
|
|
6571
|
+
re.subs = Regexp.emptySubs();
|
|
6559
6572
|
break;
|
|
6560
6573
|
case 2:
|
|
6561
6574
|
{
|
|
@@ -7006,6 +7019,9 @@ class Parser {
|
|
|
7006
7019
|
|
|
7007
7020
|
// Handle lookbehinds
|
|
7008
7021
|
if (re2.lb !== 0) {
|
|
7022
|
+
if (Parser.hasCapture(re1)) {
|
|
7023
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_CAPTURE_IN_LOOKBEHIND, this.wholeRegexp);
|
|
7024
|
+
}
|
|
7009
7025
|
if (re2.lb > 0) {
|
|
7010
7026
|
re2.op = Regexp.Op.PLB;
|
|
7011
7027
|
} else {
|