re2js 0.4.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/build/index.cjs.cjs +291 -22
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +23 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +291 -22
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +291 -22
- package/build/index.umd.js.map +1 -1
- package/package.json +13 -13
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version
|
|
5
|
+
* @version v1.0.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -328,7 +328,7 @@
|
|
|
328
328
|
// Minimum and maximum runes involved in folding.
|
|
329
329
|
// Checked during test.
|
|
330
330
|
static MIN_FOLD = 0x0041;
|
|
331
|
-
static MAX_FOLD =
|
|
331
|
+
static MAX_FOLD = 0x1e943;
|
|
332
332
|
|
|
333
333
|
// is32 uses binary search to test whether rune is in the specified
|
|
334
334
|
// slice of 32-bit ranges.
|
|
@@ -932,6 +932,7 @@
|
|
|
932
932
|
// The group indexes, in [start, end) pairs. Zeroth pair is overall match.
|
|
933
933
|
this.groups = [];
|
|
934
934
|
this.namedGroups = re2.namedGroups;
|
|
935
|
+
this.numberOfInstructions = re2.numberOfInstructions();
|
|
935
936
|
if (input instanceof MatcherInputBase) {
|
|
936
937
|
this.resetMatcherInput(input);
|
|
937
938
|
} else if (Array.isArray(input)) {
|
|
@@ -1018,6 +1019,20 @@
|
|
|
1018
1019
|
return this.groups[2 * group + 1];
|
|
1019
1020
|
}
|
|
1020
1021
|
|
|
1022
|
+
/**
|
|
1023
|
+
* Returns the program size of this pattern.
|
|
1024
|
+
*
|
|
1025
|
+
* <p>
|
|
1026
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
1027
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
1028
|
+
* </p>
|
|
1029
|
+
*
|
|
1030
|
+
* @return the program size of this pattern
|
|
1031
|
+
*/
|
|
1032
|
+
programSize() {
|
|
1033
|
+
return this.numberOfInstructions;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1021
1036
|
/**
|
|
1022
1037
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1023
1038
|
* @param {string|number} [group=0]
|
|
@@ -1658,7 +1673,7 @@
|
|
|
1658
1673
|
// subexpressions, if any. Never null.
|
|
1659
1674
|
// subs[0] is used as the freelist.
|
|
1660
1675
|
this.subs = Regexp.emptySubs();
|
|
1661
|
-
this.runes =
|
|
1676
|
+
this.runes = []; // matched runes, for LITERAL, CHAR_CLASS
|
|
1662
1677
|
this.min = 0; // min for REPEAT
|
|
1663
1678
|
this.max = 0; // max for REPEAT
|
|
1664
1679
|
this.cap = 0; // capturing index, for CAPTURE
|
|
@@ -1668,7 +1683,7 @@
|
|
|
1668
1683
|
reinit() {
|
|
1669
1684
|
this.flags = 0;
|
|
1670
1685
|
this.subs = Regexp.emptySubs();
|
|
1671
|
-
this.runes =
|
|
1686
|
+
this.runes = [];
|
|
1672
1687
|
this.cap = 0;
|
|
1673
1688
|
this.min = 0;
|
|
1674
1689
|
this.max = 0;
|
|
@@ -1969,7 +1984,7 @@
|
|
|
1969
1984
|
this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
|
|
1970
1985
|
// length==1 => exact match
|
|
1971
1986
|
// otherwise a list of [lo,hi] pairs. hi is *inclusive*.
|
|
1972
|
-
this.runes =
|
|
1987
|
+
this.runes = [];
|
|
1973
1988
|
}
|
|
1974
1989
|
|
|
1975
1990
|
// MatchRune returns true if the instruction matches (and consumes) r.
|
|
@@ -2363,7 +2378,7 @@
|
|
|
2363
2378
|
i.runes = runes;
|
|
2364
2379
|
flags &= RE2Flags.FOLD_CASE;
|
|
2365
2380
|
if (runes.length !== 1 || Unicode.simpleFold(runes[0]) === runes[0]) {
|
|
2366
|
-
flags &=
|
|
2381
|
+
flags &= -2;
|
|
2367
2382
|
}
|
|
2368
2383
|
i.arg = flags;
|
|
2369
2384
|
f.out = f.i << 1;
|
|
@@ -2480,7 +2495,7 @@
|
|
|
2480
2495
|
const nsub = Simplify.simplify(sub);
|
|
2481
2496
|
if (nre === re && nsub !== sub) {
|
|
2482
2497
|
nre = Regexp.fromRegexp(re);
|
|
2483
|
-
nre.runes =
|
|
2498
|
+
nre.runes = [];
|
|
2484
2499
|
nre.subs = re.subs.slice(0, re.subs.length);
|
|
2485
2500
|
}
|
|
2486
2501
|
if (nre !== re) {
|
|
@@ -2621,7 +2636,7 @@
|
|
|
2621
2636
|
const code1 = [0x30, 0x39];
|
|
2622
2637
|
const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
|
|
2623
2638
|
const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
2624
|
-
const PERL_GROUPS = new Map([['\\d', new CharGroup(
|
|
2639
|
+
const PERL_GROUPS = new Map([['\\d', new CharGroup(1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(1, code3)], ['\\W', new CharGroup(-1, code3)]]);
|
|
2625
2640
|
const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
|
|
2626
2641
|
const code5 = [0x41, 0x5a, 0x61, 0x7a];
|
|
2627
2642
|
const code6 = [0x0, 0x7f];
|
|
@@ -2636,7 +2651,7 @@
|
|
|
2636
2651
|
const code15 = [0x41, 0x5a];
|
|
2637
2652
|
const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
2638
2653
|
const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
|
|
2639
|
-
const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(
|
|
2654
|
+
const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
|
|
2640
2655
|
|
|
2641
2656
|
/**
|
|
2642
2657
|
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
@@ -2675,7 +2690,7 @@
|
|
|
2675
2690
|
// qsortIntPair() quicksorts pairs of ints in |array| according to lt().
|
|
2676
2691
|
// Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
|
|
2677
2692
|
static qsortIntPair(array, left, right) {
|
|
2678
|
-
const pivotIndex = ((left + right) / 2 | 0) &
|
|
2693
|
+
const pivotIndex = ((left + right) / 2 | 0) & -2;
|
|
2679
2694
|
const pivotFrom = array[pivotIndex];
|
|
2680
2695
|
const pivotTo = array[pivotIndex + 1];
|
|
2681
2696
|
let i = left;
|
|
@@ -3060,6 +3075,48 @@
|
|
|
3060
3075
|
static ERR_MISSING_REPEAT_ARGUMENT = 'missing argument to repetition operator';
|
|
3061
3076
|
static ERR_TRAILING_BACKSLASH = 'trailing backslash at end of expression';
|
|
3062
3077
|
static ERR_DUPLICATE_NAMED_CAPTURE = 'duplicate capture group name';
|
|
3078
|
+
static ERR_UNEXPECTED_PAREN = 'unexpected )';
|
|
3079
|
+
static ERR_NESTING_DEPTH = 'expression nests too deeply';
|
|
3080
|
+
static ERR_LARGE = 'expression too large';
|
|
3081
|
+
|
|
3082
|
+
// maxHeight is the maximum height of a regexp parse tree.
|
|
3083
|
+
// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
3084
|
+
// that no one will actually hit in real use but at the same time small enough
|
|
3085
|
+
// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
|
|
3086
|
+
// The maximum amount of stack for a single recursive frame is probably
|
|
3087
|
+
// closer to 1kB, so this could potentially be raised, but it seems unlikely
|
|
3088
|
+
// that people have regexps nested even this deeply.
|
|
3089
|
+
// We ran a test on Google's C++ code base and turned up only
|
|
3090
|
+
// a single use case with depth > 100; it had depth 128.
|
|
3091
|
+
// Using depth 1000 should be plenty of margin.
|
|
3092
|
+
// As an optimization, we don't even bother calculating heights
|
|
3093
|
+
// until we've allocated at least maxHeight Regexp structures.
|
|
3094
|
+
static MAX_HEIGHT = 1000;
|
|
3095
|
+
|
|
3096
|
+
// maxSize is the maximum size of a compiled regexp in Insts.
|
|
3097
|
+
// It too is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
3098
|
+
// to allow significant regexps while at the same time small enough that
|
|
3099
|
+
// the compiled form will not take up too much memory.
|
|
3100
|
+
// 128 MB is enough for a 3.3 million Inst structures, which roughly
|
|
3101
|
+
// corresponds to a 3.3 MB regexp.
|
|
3102
|
+
static MAX_SIZE = 3355443; // 128 << 20 / (5 * 8) (instSize = byte, 2 uint32, slice is 5 64-bit words)
|
|
3103
|
+
|
|
3104
|
+
// maxRunes is the maximum number of runes allowed in a regexp tree
|
|
3105
|
+
// counting the runes in all the nodes.
|
|
3106
|
+
// Ignoring character classes p.numRunes is always less than the length of the regexp.
|
|
3107
|
+
// Character classes can make it much larger: each \pL adds 1292 runes.
|
|
3108
|
+
// 128 MB is enough for 32M runes, which is over 26k \pL instances.
|
|
3109
|
+
// Note that repetitions do not make copies of the rune slices,
|
|
3110
|
+
// so \pL{1000} is only one rune slice, not 1000.
|
|
3111
|
+
// We could keep a cache of character classes we've seen,
|
|
3112
|
+
// so that all the \pL we see use the same rune list,
|
|
3113
|
+
// but that doesn't remove the problem entirely:
|
|
3114
|
+
// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
|
|
3115
|
+
// And because the Rune slice is exposed directly in the Regexp,
|
|
3116
|
+
// there is not an opportunity to change the representation to allow
|
|
3117
|
+
// partial sharing between different character classes.
|
|
3118
|
+
// So the limit is the best we can do.
|
|
3119
|
+
static MAX_RUNES = 33554432; // 128 << 20 / 4 (runeSize, int32 is 4 bytes)
|
|
3063
3120
|
|
|
3064
3121
|
// RangeTables are represented as int[][], a list of triples (start, end,
|
|
3065
3122
|
// stride).
|
|
@@ -3358,7 +3415,7 @@
|
|
|
3358
3415
|
case Codepoint.CODES.get('v'):
|
|
3359
3416
|
return Codepoint.CODES.get('\v');
|
|
3360
3417
|
default:
|
|
3361
|
-
if (!Utils.isalnum(c)) {
|
|
3418
|
+
if (c <= Unicode.MAX_ASCII && !Utils.isalnum(c)) {
|
|
3362
3419
|
return c;
|
|
3363
3420
|
}
|
|
3364
3421
|
break;
|
|
@@ -3392,6 +3449,12 @@
|
|
|
3392
3449
|
// Stack of parsed expressions.
|
|
3393
3450
|
this.stack = [];
|
|
3394
3451
|
this.free = null;
|
|
3452
|
+
// checks
|
|
3453
|
+
this.numRegexp = 0; // number of regexps allocated
|
|
3454
|
+
this.numRunes = 0; // number of runes in char classes
|
|
3455
|
+
this.repeats = 0; // product of all repetitions seen
|
|
3456
|
+
this.height = null; // regexp height, for height limit check
|
|
3457
|
+
this.size = null; // regexp compiled size, for size limit check
|
|
3395
3458
|
}
|
|
3396
3459
|
|
|
3397
3460
|
// Allocate a Regexp, from the free list if possible.
|
|
@@ -3403,15 +3466,159 @@
|
|
|
3403
3466
|
re.op = op;
|
|
3404
3467
|
} else {
|
|
3405
3468
|
re = new Regexp(op);
|
|
3469
|
+
this.numRegexp += 1;
|
|
3406
3470
|
}
|
|
3407
3471
|
return re;
|
|
3408
3472
|
}
|
|
3409
3473
|
reuse(re) {
|
|
3474
|
+
if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
|
|
3475
|
+
delete this.height[re];
|
|
3476
|
+
}
|
|
3410
3477
|
if (re.subs !== null && re.subs.length > 0) {
|
|
3411
3478
|
re.subs[0] = this.free;
|
|
3412
3479
|
}
|
|
3413
3480
|
this.free = re;
|
|
3414
3481
|
}
|
|
3482
|
+
checkLimits(re) {
|
|
3483
|
+
if (this.numRunes > Parser.MAX_RUNES) {
|
|
3484
|
+
throw new RE2JSSyntaxException(Parser.ERR_LARGE);
|
|
3485
|
+
}
|
|
3486
|
+
this.checkSize(re);
|
|
3487
|
+
this.checkHeight(re);
|
|
3488
|
+
}
|
|
3489
|
+
checkSize(re) {
|
|
3490
|
+
if (this.size === null) {
|
|
3491
|
+
// We haven't started tracking size yet.
|
|
3492
|
+
// Do a relatively cheap check to see if we need to start.
|
|
3493
|
+
// Maintain the product of all the repeats we've seen
|
|
3494
|
+
// and don't track if the total number of regexp nodes
|
|
3495
|
+
// we've seen times the repeat product is in budget.
|
|
3496
|
+
if (this.repeats === 0) {
|
|
3497
|
+
this.repeats = 1;
|
|
3498
|
+
}
|
|
3499
|
+
if (re.op === Regexp.Op.REPEAT) {
|
|
3500
|
+
let n = re.max;
|
|
3501
|
+
if (n === -1) {
|
|
3502
|
+
n = re.min;
|
|
3503
|
+
}
|
|
3504
|
+
if (n <= 0) {
|
|
3505
|
+
n = 1;
|
|
3506
|
+
}
|
|
3507
|
+
if (n > Parser.MAX_SIZE / this.repeats) {
|
|
3508
|
+
this.repeats = Parser.MAX_SIZE;
|
|
3509
|
+
} else {
|
|
3510
|
+
this.repeats *= n;
|
|
3511
|
+
}
|
|
3512
|
+
}
|
|
3513
|
+
if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
|
|
3514
|
+
return;
|
|
3515
|
+
}
|
|
3516
|
+
|
|
3517
|
+
// We need to start tracking size.
|
|
3518
|
+
// Make the map and belatedly populate it
|
|
3519
|
+
// with info about everything we've constructed so far.
|
|
3520
|
+
this.size = {};
|
|
3521
|
+
for (let reEx of this.stack) {
|
|
3522
|
+
this.checkSize(reEx);
|
|
3523
|
+
}
|
|
3524
|
+
}
|
|
3525
|
+
if (this.calcSize(re, true) > Parser.MAX_SIZE) {
|
|
3526
|
+
throw new RE2JSSyntaxException(Parser.ERR_LARGE);
|
|
3527
|
+
}
|
|
3528
|
+
}
|
|
3529
|
+
calcSize(re, force = false) {
|
|
3530
|
+
if (!force) {
|
|
3531
|
+
if (Object.prototype.hasOwnProperty.call(this.size, re)) {
|
|
3532
|
+
return this.size[re];
|
|
3533
|
+
}
|
|
3534
|
+
}
|
|
3535
|
+
let size = 0;
|
|
3536
|
+
switch (re.op) {
|
|
3537
|
+
case Regexp.Op.LITERAL:
|
|
3538
|
+
{
|
|
3539
|
+
size = re.runes.length;
|
|
3540
|
+
break;
|
|
3541
|
+
}
|
|
3542
|
+
case Regexp.Op.CAPTURE:
|
|
3543
|
+
case Regexp.Op.STAR:
|
|
3544
|
+
{
|
|
3545
|
+
// star can be 1+ or 2+; assume 2 pessimistically
|
|
3546
|
+
size = 2 + this.calcSize(re.subs[0]);
|
|
3547
|
+
break;
|
|
3548
|
+
}
|
|
3549
|
+
case Regexp.Op.PLUS:
|
|
3550
|
+
case Regexp.Op.QUEST:
|
|
3551
|
+
{
|
|
3552
|
+
size = 1 + this.calcSize(re.subs[0]);
|
|
3553
|
+
break;
|
|
3554
|
+
}
|
|
3555
|
+
case Regexp.Op.CONCAT:
|
|
3556
|
+
{
|
|
3557
|
+
for (let sub of re.subs) {
|
|
3558
|
+
size = size + this.calcSize(sub);
|
|
3559
|
+
}
|
|
3560
|
+
break;
|
|
3561
|
+
}
|
|
3562
|
+
case Regexp.Op.ALTERNATE:
|
|
3563
|
+
{
|
|
3564
|
+
for (let sub of re.subs) {
|
|
3565
|
+
size = size + this.calcSize(sub);
|
|
3566
|
+
}
|
|
3567
|
+
if (re.subs.length > 1) {
|
|
3568
|
+
size = size + re.subs.length - 1;
|
|
3569
|
+
}
|
|
3570
|
+
break;
|
|
3571
|
+
}
|
|
3572
|
+
case Regexp.Op.REPEAT:
|
|
3573
|
+
{
|
|
3574
|
+
let sub = this.calcSize(re.subs[0]);
|
|
3575
|
+
if (re.max === -1) {
|
|
3576
|
+
if (re.min === 0) {
|
|
3577
|
+
size = 2 + sub; // x*
|
|
3578
|
+
} else {
|
|
3579
|
+
size = 1 + re.min * sub; // xxx+
|
|
3580
|
+
}
|
|
3581
|
+
break;
|
|
3582
|
+
}
|
|
3583
|
+
// x{2,5} = xx(x(x(x)?)?)?
|
|
3584
|
+
size = re.max * sub + (re.max - re.min);
|
|
3585
|
+
break;
|
|
3586
|
+
}
|
|
3587
|
+
}
|
|
3588
|
+
size = Math.max(1, size);
|
|
3589
|
+
this.size[re] = size;
|
|
3590
|
+
return size;
|
|
3591
|
+
}
|
|
3592
|
+
checkHeight(re) {
|
|
3593
|
+
if (this.numRegexp < Parser.MAX_HEIGHT) {
|
|
3594
|
+
return;
|
|
3595
|
+
}
|
|
3596
|
+
if (this.height === null) {
|
|
3597
|
+
this.height = {};
|
|
3598
|
+
for (let reEx of this.stack) {
|
|
3599
|
+
this.checkHeight(reEx);
|
|
3600
|
+
}
|
|
3601
|
+
}
|
|
3602
|
+
if (this.calcHeight(re, true) > Parser.MAX_HEIGHT) {
|
|
3603
|
+
throw new RE2JSSyntaxException(Parser.ERR_NESTING_DEPTH);
|
|
3604
|
+
}
|
|
3605
|
+
}
|
|
3606
|
+
calcHeight(re, force = false) {
|
|
3607
|
+
if (!force) {
|
|
3608
|
+
if (Object.prototype.hasOwnProperty.call(this.height, re)) {
|
|
3609
|
+
return this.height[re];
|
|
3610
|
+
}
|
|
3611
|
+
}
|
|
3612
|
+
let h = 1;
|
|
3613
|
+
for (let sub of re.subs) {
|
|
3614
|
+
const hsub = this.calcHeight(sub);
|
|
3615
|
+
if (h < 1 + hsub) {
|
|
3616
|
+
h = 1 + hsub;
|
|
3617
|
+
}
|
|
3618
|
+
}
|
|
3619
|
+
this.height[re] = h;
|
|
3620
|
+
return h;
|
|
3621
|
+
}
|
|
3415
3622
|
|
|
3416
3623
|
// Parse stack manipulation.
|
|
3417
3624
|
|
|
@@ -3432,13 +3639,14 @@
|
|
|
3432
3639
|
// push pushes the regexp re onto the parse stack and returns the regexp.
|
|
3433
3640
|
// Returns null for a CHAR_CLASS that can be merged with the top-of-stack.
|
|
3434
3641
|
push(re) {
|
|
3642
|
+
this.numRunes += re.runes.length;
|
|
3435
3643
|
if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] === re.runes[1]) {
|
|
3436
|
-
if (this.maybeConcat(re.runes[0], this.flags &
|
|
3644
|
+
if (this.maybeConcat(re.runes[0], this.flags & -2)) {
|
|
3437
3645
|
return null;
|
|
3438
3646
|
}
|
|
3439
3647
|
re.op = Regexp.Op.LITERAL;
|
|
3440
3648
|
re.runes = [re.runes[0]];
|
|
3441
|
-
re.flags = this.flags &
|
|
3649
|
+
re.flags = this.flags & -2;
|
|
3442
3650
|
} else if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 4 && re.runes[0] === re.runes[1] && re.runes[2] === re.runes[3] && Unicode.simpleFold(re.runes[0]) === re.runes[2] && Unicode.simpleFold(re.runes[2]) === re.runes[0] || re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] + 1 === re.runes[1] && Unicode.simpleFold(re.runes[0]) === re.runes[1] && Unicode.simpleFold(re.runes[1]) === re.runes[0]) {
|
|
3443
3651
|
// Case-insensitive rune like [Aa] or [Δδ].
|
|
3444
3652
|
if (this.maybeConcat(re.runes[0], this.flags | RE2Flags.FOLD_CASE)) {
|
|
@@ -3453,6 +3661,7 @@
|
|
|
3453
3661
|
this.maybeConcat(-1, 0);
|
|
3454
3662
|
}
|
|
3455
3663
|
this.stack.push(re);
|
|
3664
|
+
this.checkLimits(re);
|
|
3456
3665
|
return re;
|
|
3457
3666
|
}
|
|
3458
3667
|
|
|
@@ -3546,6 +3755,43 @@
|
|
|
3546
3755
|
re.flags = flags;
|
|
3547
3756
|
re.subs = [sub];
|
|
3548
3757
|
this.stack[n - 1] = re;
|
|
3758
|
+
this.checkLimits(re);
|
|
3759
|
+
if (op === Regexp.Op.REPEAT && (min >= 2 || max >= 2) && !this.repeatIsValid(re, 1000)) {
|
|
3760
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_REPEAT_SIZE, t.from(beforePos));
|
|
3761
|
+
}
|
|
3762
|
+
}
|
|
3763
|
+
|
|
3764
|
+
// repeatIsValid reports whether the repetition re is valid.
|
|
3765
|
+
// Valid means that the combination of the top-level repetition
|
|
3766
|
+
// and any inner repetitions does not exceed n copies of the
|
|
3767
|
+
// innermost thing.
|
|
3768
|
+
// This function rewalks the regexp tree and is called for every repetition,
|
|
3769
|
+
// so we have to worry about inducing quadratic behavior in the parser.
|
|
3770
|
+
// We avoid this by only calling repeatIsValid when min or max >= 2.
|
|
3771
|
+
// In that case the depth of any >= 2 nesting can only get to 9 without
|
|
3772
|
+
// triggering a parse error, so each subtree can only be rewalked 9 times.
|
|
3773
|
+
repeatIsValid(re, n) {
|
|
3774
|
+
if (re.op === Regexp.Op.REPEAT) {
|
|
3775
|
+
let m = re.max;
|
|
3776
|
+
if (m === 0) {
|
|
3777
|
+
return true;
|
|
3778
|
+
}
|
|
3779
|
+
if (m < 0) {
|
|
3780
|
+
m = re.min;
|
|
3781
|
+
}
|
|
3782
|
+
if (m > n) {
|
|
3783
|
+
return false;
|
|
3784
|
+
}
|
|
3785
|
+
if (m > 0) {
|
|
3786
|
+
n = Math.trunc(n / m);
|
|
3787
|
+
}
|
|
3788
|
+
}
|
|
3789
|
+
for (let sub of re.subs) {
|
|
3790
|
+
if (!this.repeatIsValid(sub, n)) {
|
|
3791
|
+
return false;
|
|
3792
|
+
}
|
|
3793
|
+
}
|
|
3794
|
+
return true;
|
|
3549
3795
|
}
|
|
3550
3796
|
|
|
3551
3797
|
// concat replaces the top of the stack (above the topmost '|' or '(') with
|
|
@@ -3583,10 +3829,10 @@
|
|
|
3583
3829
|
if (re.op === Regexp.Op.CHAR_CLASS) {
|
|
3584
3830
|
re.runes = new CharClass(re.runes).cleanClass().toArray();
|
|
3585
3831
|
if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
|
|
3586
|
-
re.runes =
|
|
3832
|
+
re.runes = [];
|
|
3587
3833
|
re.op = Regexp.Op.ANY_CHAR;
|
|
3588
3834
|
} else if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
|
|
3589
|
-
re.runes =
|
|
3835
|
+
re.runes = [];
|
|
3590
3836
|
re.op = Regexp.Op.ANY_CHAR_NOT_NL;
|
|
3591
3837
|
}
|
|
3592
3838
|
}
|
|
@@ -3721,6 +3967,7 @@
|
|
|
3721
3967
|
prefix.runes = str.slice(0, strlen);
|
|
3722
3968
|
for (let j = start; j < i; j++) {
|
|
3723
3969
|
array[s + j] = this.removeLeadingString(array[s + j], strlen);
|
|
3970
|
+
this.checkLimits(array[s + j]);
|
|
3724
3971
|
}
|
|
3725
3972
|
// Recurse.
|
|
3726
3973
|
const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
|
|
@@ -3770,6 +4017,7 @@
|
|
|
3770
4017
|
for (let j = start; j < i; j++) {
|
|
3771
4018
|
const reuse = j !== start; // prefix came from sub[start]
|
|
3772
4019
|
array[s + j] = this.removeLeadingRegexp(array[s + j], reuse);
|
|
4020
|
+
this.checkLimits(array[s + j]);
|
|
3773
4021
|
}
|
|
3774
4022
|
// recurse
|
|
3775
4023
|
const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
|
|
@@ -4153,7 +4401,7 @@
|
|
|
4153
4401
|
t.skip(2); // "(?"
|
|
4154
4402
|
|
|
4155
4403
|
let flags = this.flags;
|
|
4156
|
-
let sign =
|
|
4404
|
+
let sign = 1;
|
|
4157
4405
|
let sawFlag = false;
|
|
4158
4406
|
loop: while (t.more()) {
|
|
4159
4407
|
{
|
|
@@ -4164,7 +4412,7 @@
|
|
|
4164
4412
|
sawFlag = true;
|
|
4165
4413
|
break;
|
|
4166
4414
|
case Codepoint.CODES.get('m'):
|
|
4167
|
-
flags &=
|
|
4415
|
+
flags &= -17;
|
|
4168
4416
|
sawFlag = true;
|
|
4169
4417
|
break;
|
|
4170
4418
|
case Codepoint.CODES.get('s'):
|
|
@@ -4270,12 +4518,12 @@
|
|
|
4270
4518
|
this.alternate();
|
|
4271
4519
|
const n = this.stack.length;
|
|
4272
4520
|
if (n < 2) {
|
|
4273
|
-
throw new RE2JSSyntaxException(Parser.
|
|
4521
|
+
throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
|
|
4274
4522
|
}
|
|
4275
4523
|
const re1 = this.pop();
|
|
4276
4524
|
const re2 = this.pop();
|
|
4277
4525
|
if (re2.op !== Regexp.Op.LEFT_PAREN) {
|
|
4278
|
-
throw new RE2JSSyntaxException(Parser.
|
|
4526
|
+
throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
|
|
4279
4527
|
}
|
|
4280
4528
|
// Restore flags at time of paren.
|
|
4281
4529
|
this.flags = re2.flags;
|
|
@@ -4345,7 +4593,7 @@
|
|
|
4345
4593
|
}
|
|
4346
4594
|
t.skip(1); // '\\'
|
|
4347
4595
|
// Committed to parse or throw exception.
|
|
4348
|
-
let sign =
|
|
4596
|
+
let sign = 1;
|
|
4349
4597
|
let c = t.pop(); // 'p' or 'P'
|
|
4350
4598
|
if (c === Codepoint.CODES.get('P')) {
|
|
4351
4599
|
sign = -1;
|
|
@@ -4409,7 +4657,7 @@
|
|
|
4409
4657
|
const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
|
|
4410
4658
|
re.flags = this.flags;
|
|
4411
4659
|
const cc = new CharClass();
|
|
4412
|
-
let sign =
|
|
4660
|
+
let sign = 1;
|
|
4413
4661
|
if (t.more() && t.lookingAt('^')) {
|
|
4414
4662
|
sign = -1;
|
|
4415
4663
|
t.skip(1); // '^'
|
|
@@ -4952,6 +5200,13 @@
|
|
|
4952
5200
|
return this.numSubexp;
|
|
4953
5201
|
}
|
|
4954
5202
|
|
|
5203
|
+
/**
|
|
5204
|
+
* Returns the number of instructions in this compiled regular expression program.
|
|
5205
|
+
*/
|
|
5206
|
+
numberOfInstructions() {
|
|
5207
|
+
return this.prog.numInst();
|
|
5208
|
+
}
|
|
5209
|
+
|
|
4955
5210
|
// get() returns a machine to use for matching |this|. It uses |this|'s
|
|
4956
5211
|
// machine cache if possible, to avoid unnecessary allocation.
|
|
4957
5212
|
get() {
|
|
@@ -5584,7 +5839,7 @@
|
|
|
5584
5839
|
}
|
|
5585
5840
|
let re2Flags = RE2Flags.PERL;
|
|
5586
5841
|
if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
|
|
5587
|
-
re2Flags &=
|
|
5842
|
+
re2Flags &= -129;
|
|
5588
5843
|
}
|
|
5589
5844
|
const p = new RE2JS(regex, flags);
|
|
5590
5845
|
// The compiled RE2 regexp.
|
|
@@ -5752,6 +6007,20 @@
|
|
|
5752
6007
|
return this.patternInput;
|
|
5753
6008
|
}
|
|
5754
6009
|
|
|
6010
|
+
/**
|
|
6011
|
+
* Returns the program size of this pattern.
|
|
6012
|
+
*
|
|
6013
|
+
* <p>
|
|
6014
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
6015
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
6016
|
+
* </p>
|
|
6017
|
+
*
|
|
6018
|
+
* @return the program size of this pattern
|
|
6019
|
+
*/
|
|
6020
|
+
programSize() {
|
|
6021
|
+
return this.re2Input.numberOfInstructions();
|
|
6022
|
+
}
|
|
6023
|
+
|
|
5755
6024
|
/**
|
|
5756
6025
|
* Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
|
|
5757
6026
|
* pattern and is excluded from this count.
|