re2js 2.6.1 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.cjs.cjs +83 -44
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +83 -44
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +83 -44
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.7.1
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -2159,9 +2159,9 @@
|
|
|
2159
2159
|
case Inst.NOP:
|
|
2160
2160
|
return `nop -> ${this.out}`;
|
|
2161
2161
|
case Inst.LB_WRITE:
|
|
2162
|
-
return `lbwrite ${this.
|
|
2162
|
+
return `lbwrite ${this.arg} -> ${this.out}`;
|
|
2163
2163
|
case Inst.LB_CHECK:
|
|
2164
|
-
return `lbcheck ${this.
|
|
2164
|
+
return `lbcheck ${this.arg} -> ${this.out}`;
|
|
2165
2165
|
case Inst.RUNE:
|
|
2166
2166
|
if (this.runes === null) {
|
|
2167
2167
|
return 'rune <null>';
|
|
@@ -2341,30 +2341,35 @@
|
|
|
2341
2341
|
}
|
|
2342
2342
|
this.matched = false;
|
|
2343
2343
|
this.matchcap.fill(-1);
|
|
2344
|
+
|
|
2345
|
+
// Lookbehinds must scan from the beginning of the string to build their state table,
|
|
2346
|
+
// even if the main pattern search is requested to start mid-string.
|
|
2347
|
+
let currentPos = this.prog.numLb > 0 ? 0 : pos;
|
|
2348
|
+
let matchStartPos = pos;
|
|
2344
2349
|
let runq = this.q0;
|
|
2345
2350
|
let nextq = this.q1;
|
|
2346
|
-
let r = input.step(
|
|
2351
|
+
let r = input.step(currentPos);
|
|
2347
2352
|
let rune = r >> 3;
|
|
2348
2353
|
let width = r & 7;
|
|
2349
2354
|
let rune1 = -1;
|
|
2350
2355
|
let width1 = 0;
|
|
2351
2356
|
if (r !== MachineInputBase.EOF()) {
|
|
2352
|
-
r = input.step(
|
|
2357
|
+
r = input.step(currentPos + width);
|
|
2353
2358
|
rune1 = r >> 3;
|
|
2354
2359
|
width1 = r & 7;
|
|
2355
2360
|
}
|
|
2356
2361
|
let flag;
|
|
2357
|
-
if (
|
|
2362
|
+
if (currentPos === 0) {
|
|
2358
2363
|
flag = Utils.emptyOpContext(-1, rune);
|
|
2359
2364
|
} else {
|
|
2360
|
-
flag = input.context(
|
|
2365
|
+
flag = input.context(currentPos);
|
|
2361
2366
|
}
|
|
2362
2367
|
while (true) {
|
|
2363
2368
|
if (runq.isEmpty()) {
|
|
2364
|
-
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 &&
|
|
2369
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) {
|
|
2365
2370
|
break;
|
|
2366
2371
|
}
|
|
2367
|
-
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) &&
|
|
2372
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
|
|
2368
2373
|
break;
|
|
2369
2374
|
}
|
|
2370
2375
|
if (this.matched) {
|
|
@@ -2374,43 +2379,50 @@
|
|
|
2374
2379
|
// Fast-forwarding the string pointer will skip over the positions where
|
|
2375
2380
|
// the parallel lookbehind automata need to be spawned.
|
|
2376
2381
|
if (this.prog.numLb === 0 && !(this.re2.prefix.length === 0) && rune1 !== this.re2.prefixRune && input.canCheckPrefix()) {
|
|
2377
|
-
const advance = input.index(this.re2,
|
|
2382
|
+
const advance = input.index(this.re2, currentPos);
|
|
2378
2383
|
if (advance < 0) {
|
|
2379
2384
|
break;
|
|
2380
2385
|
}
|
|
2381
|
-
|
|
2382
|
-
r = input.step(
|
|
2386
|
+
currentPos += advance;
|
|
2387
|
+
r = input.step(currentPos);
|
|
2383
2388
|
rune = r >> 3;
|
|
2384
2389
|
width = r & 7;
|
|
2385
|
-
r = input.step(
|
|
2390
|
+
r = input.step(currentPos + width);
|
|
2386
2391
|
rune1 = r >> 3;
|
|
2387
2392
|
width1 = r & 7;
|
|
2388
2393
|
}
|
|
2389
2394
|
}
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
// Spawn Lookbehind threads BEFORE the main pattern
|
|
2395
|
+
|
|
2396
|
+
// Optimize lookbehind spawning. Because lookbehinds are prefixed with `.*` by the compiler,
|
|
2397
|
+
// they only need to be spawned exactly once at the beginning of the string (currentPos === 0).
|
|
2398
|
+
if (currentPos === 0 && this.prog.numLb > 0) {
|
|
2395
2399
|
for (let i = 0; i < this.prog.lbStarts.length; i++) {
|
|
2396
|
-
this.add(runq, this.prog.lbStarts[i],
|
|
2400
|
+
this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
|
|
2401
|
+
}
|
|
2402
|
+
}
|
|
2403
|
+
if (!this.matched && (currentPos === 0 || anchor === RE2Flags.UNANCHORED)) {
|
|
2404
|
+
// ONLY spawn the main pattern if we have reached the requested search start boundary
|
|
2405
|
+
if (currentPos >= matchStartPos) {
|
|
2406
|
+
if (this.ncap > 0) {
|
|
2407
|
+
this.matchcap[0] = currentPos;
|
|
2408
|
+
}
|
|
2409
|
+
this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
|
|
2397
2410
|
}
|
|
2398
|
-
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2399
2411
|
}
|
|
2400
|
-
const nextPos =
|
|
2412
|
+
const nextPos = currentPos + width;
|
|
2401
2413
|
flag = input.context(nextPos);
|
|
2402
|
-
this.step(runq, nextq,
|
|
2414
|
+
this.step(runq, nextq, currentPos, nextPos, rune, flag, anchor, currentPos === input.endPos());
|
|
2403
2415
|
if (width === 0) {
|
|
2404
2416
|
break;
|
|
2405
2417
|
}
|
|
2406
2418
|
if (this.ncap === 0 && this.matched) {
|
|
2407
2419
|
break;
|
|
2408
2420
|
}
|
|
2409
|
-
|
|
2421
|
+
currentPos += width;
|
|
2410
2422
|
rune = rune1;
|
|
2411
2423
|
width = width1;
|
|
2412
2424
|
if (rune !== -1) {
|
|
2413
|
-
r = input.step(
|
|
2425
|
+
r = input.step(currentPos + width);
|
|
2414
2426
|
rune1 = r >> 3;
|
|
2415
2427
|
width1 = r & 7;
|
|
2416
2428
|
}
|
|
@@ -2427,35 +2439,46 @@
|
|
|
2427
2439
|
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2428
2440
|
return [];
|
|
2429
2441
|
}
|
|
2442
|
+
|
|
2443
|
+
// Lookbehinds must scan from the beginning of the string to build their state table,
|
|
2444
|
+
// even if the main pattern search is requested to start mid-string.
|
|
2445
|
+
let currentPos = this.prog.numLb > 0 ? 0 : pos;
|
|
2446
|
+
let matchStartPos = pos;
|
|
2430
2447
|
let runq = this.q0;
|
|
2431
2448
|
let nextq = this.q1;
|
|
2432
|
-
let r = input.step(
|
|
2449
|
+
let r = input.step(currentPos);
|
|
2433
2450
|
let rune = r >> 3;
|
|
2434
2451
|
let width = r & 7;
|
|
2435
2452
|
let rune1 = -1;
|
|
2436
2453
|
let width1 = 0;
|
|
2437
2454
|
if (r !== MachineInputBase.EOF()) {
|
|
2438
|
-
r = input.step(
|
|
2455
|
+
r = input.step(currentPos + width);
|
|
2439
2456
|
rune1 = r >> 3;
|
|
2440
2457
|
width1 = r & 7;
|
|
2441
2458
|
}
|
|
2442
|
-
let flag =
|
|
2459
|
+
let flag = currentPos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(currentPos);
|
|
2443
2460
|
const matches = new Set();
|
|
2444
2461
|
while (true) {
|
|
2445
2462
|
if (runq.isEmpty()) {
|
|
2446
|
-
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 &&
|
|
2447
|
-
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) &&
|
|
2463
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) break;
|
|
2464
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
|
|
2448
2465
|
break;
|
|
2449
2466
|
}
|
|
2450
2467
|
}
|
|
2451
|
-
|
|
2452
|
-
|
|
2468
|
+
|
|
2469
|
+
// Optimize lookbehind spawning to exactly once at BOF
|
|
2470
|
+
if (currentPos === 0 && this.prog.numLb > 0) {
|
|
2453
2471
|
for (let i = 0; i < this.prog.lbStarts.length; i++) {
|
|
2454
|
-
this.add(runq, this.prog.lbStarts[i],
|
|
2472
|
+
this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
|
|
2473
|
+
}
|
|
2474
|
+
}
|
|
2475
|
+
if (currentPos === 0 || anchor === RE2Flags.UNANCHORED) {
|
|
2476
|
+
// ONLY spawn the main pattern if we have reached the requested search start boundary
|
|
2477
|
+
if (currentPos >= matchStartPos) {
|
|
2478
|
+
this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
|
|
2455
2479
|
}
|
|
2456
|
-
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2457
2480
|
}
|
|
2458
|
-
const nextPos =
|
|
2481
|
+
const nextPos = currentPos + width;
|
|
2459
2482
|
flag = input.context(nextPos);
|
|
2460
2483
|
for (let j = 0; j < runq.size; j++) {
|
|
2461
2484
|
let t = runq.denseThreads[j];
|
|
@@ -2464,7 +2487,7 @@
|
|
|
2464
2487
|
let add = false;
|
|
2465
2488
|
switch (i.op) {
|
|
2466
2489
|
case Inst.MATCH:
|
|
2467
|
-
if (anchor === RE2Flags.ANCHOR_BOTH &&
|
|
2490
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== input.endPos()) break;
|
|
2468
2491
|
matches.add(i.arg); // Record the matched Set ID
|
|
2469
2492
|
break;
|
|
2470
2493
|
case Inst.RUNE:
|
|
@@ -2492,11 +2515,11 @@
|
|
|
2492
2515
|
}
|
|
2493
2516
|
runq.clear();
|
|
2494
2517
|
if (width === 0) break;
|
|
2495
|
-
|
|
2518
|
+
currentPos += width;
|
|
2496
2519
|
rune = rune1;
|
|
2497
2520
|
width = width1;
|
|
2498
2521
|
if (rune !== -1) {
|
|
2499
|
-
r = input.step(
|
|
2522
|
+
r = input.step(currentPos + width);
|
|
2500
2523
|
rune1 = r >> 3;
|
|
2501
2524
|
width1 = r & 7;
|
|
2502
2525
|
}
|
|
@@ -2600,17 +2623,17 @@
|
|
|
2600
2623
|
continue;
|
|
2601
2624
|
}
|
|
2602
2625
|
case Inst.LB_WRITE:
|
|
2603
|
-
this.lbTable[Math.abs(inst.
|
|
2626
|
+
this.lbTable[Math.abs(inst.arg)] = pos;
|
|
2604
2627
|
pc = inst.out;
|
|
2605
2628
|
continue;
|
|
2606
2629
|
case Inst.LB_CHECK:
|
|
2607
|
-
if (inst.
|
|
2630
|
+
if (inst.arg > 0) {
|
|
2608
2631
|
// Positive Lookbehind
|
|
2609
|
-
if (this.lbTable[inst.
|
|
2632
|
+
if (this.lbTable[inst.arg] === pos) {
|
|
2610
2633
|
pc = inst.out; // Flattened tail recursion
|
|
2611
2634
|
continue;
|
|
2612
2635
|
}
|
|
2613
|
-
} else if (this.lbTable[-inst.
|
|
2636
|
+
} else if (this.lbTable[-inst.arg] !== pos) {
|
|
2614
2637
|
// Negative Lookbehind
|
|
2615
2638
|
pc = inst.out; // Flattened tail recursion
|
|
2616
2639
|
continue;
|
|
@@ -4690,7 +4713,7 @@
|
|
|
4690
4713
|
}
|
|
4691
4714
|
lookBehind(a, lb) {
|
|
4692
4715
|
const id = this.newInst(Inst.LB_WRITE);
|
|
4693
|
-
this.prog.getInst(id.i).
|
|
4716
|
+
this.prog.getInst(id.i).arg = lb;
|
|
4694
4717
|
|
|
4695
4718
|
// Create the prefix wildcard `.*` for the lookbehind automaton
|
|
4696
4719
|
const any = this.rune(Compiler.ANY_RUNE(), 0);
|
|
@@ -4698,7 +4721,7 @@
|
|
|
4698
4721
|
const lbAutomaton = this.cat(dotStar, a);
|
|
4699
4722
|
this.prog.patch(lbAutomaton.out, id.i);
|
|
4700
4723
|
const checkId = this.newInst(Inst.LB_CHECK);
|
|
4701
|
-
this.prog.getInst(checkId.i).
|
|
4724
|
+
this.prog.getInst(checkId.i).arg = lb;
|
|
4702
4725
|
|
|
4703
4726
|
// Save the starting point of this lookbehind automaton
|
|
4704
4727
|
this.prog.lbStarts.push(lbAutomaton.i);
|
|
@@ -5473,6 +5496,7 @@
|
|
|
5473
5496
|
static ERR_UNEXPECTED_PAREN = 'unexpected )';
|
|
5474
5497
|
static ERR_NESTING_DEPTH = 'expression nests too deeply';
|
|
5475
5498
|
static ERR_LARGE = 'expression too large';
|
|
5499
|
+
static ERR_INVALID_CAPTURE_IN_LOOKBEHIND = 'invalid capture in lookbehind';
|
|
5476
5500
|
|
|
5477
5501
|
// maxHeight is the maximum height of a regexp parse tree.
|
|
5478
5502
|
// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
@@ -5876,6 +5900,18 @@
|
|
|
5876
5900
|
}
|
|
5877
5901
|
return x;
|
|
5878
5902
|
}
|
|
5903
|
+
|
|
5904
|
+
// recursively check for captures
|
|
5905
|
+
static hasCapture(re) {
|
|
5906
|
+
if (re === null) return false;
|
|
5907
|
+
if (re.op === Regexp.Op.CAPTURE) return true;
|
|
5908
|
+
if (re.subs) {
|
|
5909
|
+
for (let sub of re.subs) {
|
|
5910
|
+
if (Parser.hasCapture(sub)) return true;
|
|
5911
|
+
}
|
|
5912
|
+
}
|
|
5913
|
+
return false;
|
|
5914
|
+
}
|
|
5879
5915
|
constructor(wholeRegexp, flags = 0) {
|
|
5880
5916
|
this.wholeRegexp = wholeRegexp;
|
|
5881
5917
|
// Flags control the behavior of the parser and record information about
|
|
@@ -6559,7 +6595,7 @@
|
|
|
6559
6595
|
case 1:
|
|
6560
6596
|
// Impossible but handle.
|
|
6561
6597
|
re.op = Regexp.Op.EMPTY_MATCH;
|
|
6562
|
-
re.subs =
|
|
6598
|
+
re.subs = Regexp.emptySubs();
|
|
6563
6599
|
break;
|
|
6564
6600
|
case 2:
|
|
6565
6601
|
{
|
|
@@ -7010,6 +7046,9 @@
|
|
|
7010
7046
|
|
|
7011
7047
|
// Handle lookbehinds
|
|
7012
7048
|
if (re2.lb !== 0) {
|
|
7049
|
+
if (Parser.hasCapture(re1)) {
|
|
7050
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_CAPTURE_IN_LOOKBEHIND, this.wholeRegexp);
|
|
7051
|
+
}
|
|
7013
7052
|
if (re2.lb > 0) {
|
|
7014
7053
|
re2.op = Regexp.Op.PLB;
|
|
7015
7054
|
} else {
|