re2js 2.6.1 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -733,7 +733,7 @@ negative.test('foobar'); // false
733
733
 
734
734
  1. **Performance Overhead:** If a regex contains a lookbehind, the engine is forced to safely bypass the ultra-fast Lazy DFA and OnePass engines. It evaluates the lookbehinds using parallel automata running on the NFA (Pike VM). While execution remains mathematically safe and linear $O(n)$, the NFA engine is generally slower than the DFA fast-paths. Use lookbehinds only when necessary.
735
735
  2. **Prefix Acceleration is Disabled:** To ensure the parallel tracking automata initialize correctly, high-speed string prefix skipping (e.g., using `indexOf` to jump to a starting literal) is disabled when lookbehinds are present.
736
- 3. **Captureless Guarantee:** To prevent state-explosion vulnerabilities, lookbehinds are strictly evaluated as *captureless*. If you include a capturing group inside a lookbehind (e.g., `(?<=(foo))bar`), the engine will match successfully, but `group(1)` will safely return `null`.
736
+ 3. **Captureless Guarantee:** To prevent state-explosion vulnerabilities and maintain strict safety invariants, lookbehinds are strictly evaluated as *captureless*. If you attempt to include a capturing group inside a lookbehind (e.g., `(?<=(foo))bar`), the engine will proactively throw a `SyntaxError` at compile time. Use non-capturing groups `(?:...)` instead.
737
737
 
738
738
 
739
739
  ## Development
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.6.1
5
+ * @version v2.7.0
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -2155,9 +2155,9 @@ class Inst {
2155
2155
  case Inst.NOP:
2156
2156
  return `nop -> ${this.out}`;
2157
2157
  case Inst.LB_WRITE:
2158
- return `lbwrite ${this.lb} -> ${this.out}`;
2158
+ return `lbwrite ${this.arg} -> ${this.out}`;
2159
2159
  case Inst.LB_CHECK:
2160
- return `lbcheck ${this.lb} -> ${this.out}, ${this.arg}`;
2160
+ return `lbcheck ${this.arg} -> ${this.out}`;
2161
2161
  case Inst.RUNE:
2162
2162
  if (this.runes === null) {
2163
2163
  return 'rune <null>';
@@ -2596,17 +2596,17 @@ class Machine {
2596
2596
  continue;
2597
2597
  }
2598
2598
  case Inst.LB_WRITE:
2599
- this.lbTable[Math.abs(inst.lb)] = pos;
2599
+ this.lbTable[Math.abs(inst.arg)] = pos;
2600
2600
  pc = inst.out;
2601
2601
  continue;
2602
2602
  case Inst.LB_CHECK:
2603
- if (inst.lb > 0) {
2603
+ if (inst.arg > 0) {
2604
2604
  // Positive Lookbehind
2605
- if (this.lbTable[inst.lb] === pos) {
2605
+ if (this.lbTable[inst.arg] === pos) {
2606
2606
  pc = inst.out; // Flattened tail recursion
2607
2607
  continue;
2608
2608
  }
2609
- } else if (this.lbTable[-inst.lb] !== pos) {
2609
+ } else if (this.lbTable[-inst.arg] !== pos) {
2610
2610
  // Negative Lookbehind
2611
2611
  pc = inst.out; // Flattened tail recursion
2612
2612
  continue;
@@ -4686,7 +4686,7 @@ class Compiler {
4686
4686
  }
4687
4687
  lookBehind(a, lb) {
4688
4688
  const id = this.newInst(Inst.LB_WRITE);
4689
- this.prog.getInst(id.i).lb = lb;
4689
+ this.prog.getInst(id.i).arg = lb;
4690
4690
 
4691
4691
  // Create the prefix wildcard `.*` for the lookbehind automaton
4692
4692
  const any = this.rune(Compiler.ANY_RUNE(), 0);
@@ -4694,7 +4694,7 @@ class Compiler {
4694
4694
  const lbAutomaton = this.cat(dotStar, a);
4695
4695
  this.prog.patch(lbAutomaton.out, id.i);
4696
4696
  const checkId = this.newInst(Inst.LB_CHECK);
4697
- this.prog.getInst(checkId.i).lb = lb;
4697
+ this.prog.getInst(checkId.i).arg = lb;
4698
4698
 
4699
4699
  // Save the starting point of this lookbehind automaton
4700
4700
  this.prog.lbStarts.push(lbAutomaton.i);
@@ -5469,6 +5469,7 @@ class Parser {
5469
5469
  static ERR_UNEXPECTED_PAREN = 'unexpected )';
5470
5470
  static ERR_NESTING_DEPTH = 'expression nests too deeply';
5471
5471
  static ERR_LARGE = 'expression too large';
5472
+ static ERR_INVALID_CAPTURE_IN_LOOKBEHIND = 'invalid capture in lookbehind';
5472
5473
 
5473
5474
  // maxHeight is the maximum height of a regexp parse tree.
5474
5475
  // It is somewhat arbitrarily chosen, but the idea is to be large enough
@@ -5872,6 +5873,18 @@ class Parser {
5872
5873
  }
5873
5874
  return x;
5874
5875
  }
5876
+
5877
+ // recursively check for captures
5878
+ static hasCapture(re) {
5879
+ if (re === null) return false;
5880
+ if (re.op === Regexp.Op.CAPTURE) return true;
5881
+ if (re.subs) {
5882
+ for (let sub of re.subs) {
5883
+ if (Parser.hasCapture(sub)) return true;
5884
+ }
5885
+ }
5886
+ return false;
5887
+ }
5875
5888
  constructor(wholeRegexp, flags = 0) {
5876
5889
  this.wholeRegexp = wholeRegexp;
5877
5890
  // Flags control the behavior of the parser and record information about
@@ -6555,7 +6568,7 @@ class Parser {
6555
6568
  case 1:
6556
6569
  // Impossible but handle.
6557
6570
  re.op = Regexp.Op.EMPTY_MATCH;
6558
- re.subs = null;
6571
+ re.subs = Regexp.emptySubs();
6559
6572
  break;
6560
6573
  case 2:
6561
6574
  {
@@ -7006,6 +7019,9 @@ class Parser {
7006
7019
 
7007
7020
  // Handle lookbehinds
7008
7021
  if (re2.lb !== 0) {
7022
+ if (Parser.hasCapture(re1)) {
7023
+ throw new RE2JSSyntaxException(Parser.ERR_INVALID_CAPTURE_IN_LOOKBEHIND, this.wholeRegexp);
7024
+ }
7009
7025
  if (re2.lb > 0) {
7010
7026
  re2.op = Regexp.Op.PLB;
7011
7027
  } else {