re2js 0.4.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v0.4.2
5
+ * @version v1.0.0
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -328,7 +328,7 @@
328
328
  // Minimum and maximum runes involved in folding.
329
329
  // Checked during test.
330
330
  static MIN_FOLD = 0x0041;
331
- static MAX_FOLD = 0x1044f;
331
+ static MAX_FOLD = 0x1e943;
332
332
 
333
333
  // is32 uses binary search to test whether rune is in the specified
334
334
  // slice of 32-bit ranges.
@@ -932,6 +932,7 @@
932
932
  // The group indexes, in [start, end) pairs. Zeroth pair is overall match.
933
933
  this.groups = [];
934
934
  this.namedGroups = re2.namedGroups;
935
+ this.numberOfInstructions = re2.numberOfInstructions();
935
936
  if (input instanceof MatcherInputBase) {
936
937
  this.resetMatcherInput(input);
937
938
  } else if (Array.isArray(input)) {
@@ -1018,6 +1019,20 @@
1018
1019
  return this.groups[2 * group + 1];
1019
1020
  }
1020
1021
 
1022
+ /**
1023
+ * Returns the program size of this pattern.
1024
+ *
1025
+ * <p>
1026
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
1027
+ * "cost". Larger numbers are more expensive than smaller numbers.
1028
+ * </p>
1029
+ *
1030
+ * @return the program size of this pattern
1031
+ */
1032
+ programSize() {
1033
+ return this.numberOfInstructions;
1034
+ }
1035
+
1021
1036
  /**
1022
1037
  * Returns the named group of the most recent match, or {@code null} if the group was not matched.
1023
1038
  * @param {string|number} [group=0]
@@ -1658,7 +1673,7 @@
1658
1673
  // subexpressions, if any. Never null.
1659
1674
  // subs[0] is used as the freelist.
1660
1675
  this.subs = Regexp.emptySubs();
1661
- this.runes = null; // matched runes, for LITERAL, CHAR_CLASS
1676
+ this.runes = []; // matched runes, for LITERAL, CHAR_CLASS
1662
1677
  this.min = 0; // min for REPEAT
1663
1678
  this.max = 0; // max for REPEAT
1664
1679
  this.cap = 0; // capturing index, for CAPTURE
@@ -1668,7 +1683,7 @@
1668
1683
  reinit() {
1669
1684
  this.flags = 0;
1670
1685
  this.subs = Regexp.emptySubs();
1671
- this.runes = null;
1686
+ this.runes = [];
1672
1687
  this.cap = 0;
1673
1688
  this.min = 0;
1674
1689
  this.max = 0;
@@ -1969,7 +1984,7 @@
1969
1984
  this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
1970
1985
  // length==1 => exact match
1971
1986
  // otherwise a list of [lo,hi] pairs. hi is *inclusive*.
1972
- this.runes = null;
1987
+ this.runes = [];
1973
1988
  }
1974
1989
 
1975
1990
  // MatchRune returns true if the instruction matches (and consumes) r.
@@ -2363,7 +2378,7 @@
2363
2378
  i.runes = runes;
2364
2379
  flags &= RE2Flags.FOLD_CASE;
2365
2380
  if (runes.length !== 1 || Unicode.simpleFold(runes[0]) === runes[0]) {
2366
- flags &= ~RE2Flags.FOLD_CASE;
2381
+ flags &= -2;
2367
2382
  }
2368
2383
  i.arg = flags;
2369
2384
  f.out = f.i << 1;
@@ -2480,7 +2495,7 @@
2480
2495
  const nsub = Simplify.simplify(sub);
2481
2496
  if (nre === re && nsub !== sub) {
2482
2497
  nre = Regexp.fromRegexp(re);
2483
- nre.runes = null;
2498
+ nre.runes = [];
2484
2499
  nre.subs = re.subs.slice(0, re.subs.length);
2485
2500
  }
2486
2501
  if (nre !== re) {
@@ -2621,7 +2636,7 @@
2621
2636
  const code1 = [0x30, 0x39];
2622
2637
  const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
2623
2638
  const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
2624
- const PERL_GROUPS = new Map([['\\d', new CharGroup(+1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(+1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(+1, code3)], ['\\W', new CharGroup(-1, code3)]]);
2639
+ const PERL_GROUPS = new Map([['\\d', new CharGroup(1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(1, code3)], ['\\W', new CharGroup(-1, code3)]]);
2625
2640
  const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
2626
2641
  const code5 = [0x41, 0x5a, 0x61, 0x7a];
2627
2642
  const code6 = [0x0, 0x7f];
@@ -2636,7 +2651,7 @@
2636
2651
  const code15 = [0x41, 0x5a];
2637
2652
  const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
2638
2653
  const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
2639
- const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(+1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(+1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(+1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(+1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(+1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(+1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(+1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(+1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(+1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(+1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(+1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(+1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(+1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(+1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
2654
+ const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
2640
2655
 
2641
2656
  /**
2642
2657
  * A "builder"-style helper class for manipulating character classes represented as an array of
@@ -2675,7 +2690,7 @@
2675
2690
  // qsortIntPair() quicksorts pairs of ints in |array| according to lt().
2676
2691
  // Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
2677
2692
  static qsortIntPair(array, left, right) {
2678
- const pivotIndex = ((left + right) / 2 | 0) & ~1;
2693
+ const pivotIndex = ((left + right) / 2 | 0) & -2;
2679
2694
  const pivotFrom = array[pivotIndex];
2680
2695
  const pivotTo = array[pivotIndex + 1];
2681
2696
  let i = left;
@@ -3060,6 +3075,48 @@
3060
3075
  static ERR_MISSING_REPEAT_ARGUMENT = 'missing argument to repetition operator';
3061
3076
  static ERR_TRAILING_BACKSLASH = 'trailing backslash at end of expression';
3062
3077
  static ERR_DUPLICATE_NAMED_CAPTURE = 'duplicate capture group name';
3078
+ static ERR_UNEXPECTED_PAREN = 'unexpected )';
3079
+ static ERR_NESTING_DEPTH = 'expression nests too deeply';
3080
+ static ERR_LARGE = 'expression too large';
3081
+
3082
+ // maxHeight is the maximum height of a regexp parse tree.
3083
+ // It is somewhat arbitrarily chosen, but the idea is to be large enough
3084
+ // that no one will actually hit in real use but at the same time small enough
3085
+ // that recursion on the Regexp tree will not hit the 1GB Go stack limit.
3086
+ // The maximum amount of stack for a single recursive frame is probably
3087
+ // closer to 1kB, so this could potentially be raised, but it seems unlikely
3088
+ // that people have regexps nested even this deeply.
3089
+ // We ran a test on Google's C++ code base and turned up only
3090
+ // a single use case with depth > 100; it had depth 128.
3091
+ // Using depth 1000 should be plenty of margin.
3092
+ // As an optimization, we don't even bother calculating heights
3093
+ // until we've allocated at least maxHeight Regexp structures.
3094
+ static MAX_HEIGHT = 1000;
3095
+
3096
+ // maxSize is the maximum size of a compiled regexp in Insts.
3097
+ // It too is somewhat arbitrarily chosen, but the idea is to be large enough
3098
+ // to allow significant regexps while at the same time small enough that
3099
+ // the compiled form will not take up too much memory.
3100
+ // 128 MB is enough for a 3.3 million Inst structures, which roughly
3101
+ // corresponds to a 3.3 MB regexp.
3102
+ static MAX_SIZE = 3355443; // 128 << 20 / (5 * 8) (instSize = byte, 2 uint32, slice is 5 64-bit words)
3103
+
3104
+ // maxRunes is the maximum number of runes allowed in a regexp tree
3105
+ // counting the runes in all the nodes.
3106
+ // Ignoring character classes p.numRunes is always less than the length of the regexp.
3107
+ // Character classes can make it much larger: each \pL adds 1292 runes.
3108
+ // 128 MB is enough for 32M runes, which is over 26k \pL instances.
3109
+ // Note that repetitions do not make copies of the rune slices,
3110
+ // so \pL{1000} is only one rune slice, not 1000.
3111
+ // We could keep a cache of character classes we've seen,
3112
+ // so that all the \pL we see use the same rune list,
3113
+ // but that doesn't remove the problem entirely:
3114
+ // consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
3115
+ // And because the Rune slice is exposed directly in the Regexp,
3116
+ // there is not an opportunity to change the representation to allow
3117
+ // partial sharing between different character classes.
3118
+ // So the limit is the best we can do.
3119
+ static MAX_RUNES = 33554432; // 128 << 20 / 4 (runeSize, int32 is 4 bytes)
3063
3120
 
3064
3121
  // RangeTables are represented as int[][], a list of triples (start, end,
3065
3122
  // stride).
@@ -3358,7 +3415,7 @@
3358
3415
  case Codepoint.CODES.get('v'):
3359
3416
  return Codepoint.CODES.get('\v');
3360
3417
  default:
3361
- if (!Utils.isalnum(c)) {
3418
+ if (c <= Unicode.MAX_ASCII && !Utils.isalnum(c)) {
3362
3419
  return c;
3363
3420
  }
3364
3421
  break;
@@ -3392,6 +3449,12 @@
3392
3449
  // Stack of parsed expressions.
3393
3450
  this.stack = [];
3394
3451
  this.free = null;
3452
+ // checks
3453
+ this.numRegexp = 0; // number of regexps allocated
3454
+ this.numRunes = 0; // number of runes in char classes
3455
+ this.repeats = 0; // product of all repetitions seen
3456
+ this.height = null; // regexp height, for height limit check
3457
+ this.size = null; // regexp compiled size, for size limit check
3395
3458
  }
3396
3459
 
3397
3460
  // Allocate a Regexp, from the free list if possible.
@@ -3403,15 +3466,159 @@
3403
3466
  re.op = op;
3404
3467
  } else {
3405
3468
  re = new Regexp(op);
3469
+ this.numRegexp += 1;
3406
3470
  }
3407
3471
  return re;
3408
3472
  }
3409
3473
  reuse(re) {
3474
+ if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
3475
+ delete this.height[re];
3476
+ }
3410
3477
  if (re.subs !== null && re.subs.length > 0) {
3411
3478
  re.subs[0] = this.free;
3412
3479
  }
3413
3480
  this.free = re;
3414
3481
  }
3482
+ checkLimits(re) {
3483
+ if (this.numRunes > Parser.MAX_RUNES) {
3484
+ throw new RE2JSSyntaxException(Parser.ERR_LARGE);
3485
+ }
3486
+ this.checkSize(re);
3487
+ this.checkHeight(re);
3488
+ }
3489
+ checkSize(re) {
3490
+ if (this.size === null) {
3491
+ // We haven't started tracking size yet.
3492
+ // Do a relatively cheap check to see if we need to start.
3493
+ // Maintain the product of all the repeats we've seen
3494
+ // and don't track if the total number of regexp nodes
3495
+ // we've seen times the repeat product is in budget.
3496
+ if (this.repeats === 0) {
3497
+ this.repeats = 1;
3498
+ }
3499
+ if (re.op === Regexp.Op.REPEAT) {
3500
+ let n = re.max;
3501
+ if (n === -1) {
3502
+ n = re.min;
3503
+ }
3504
+ if (n <= 0) {
3505
+ n = 1;
3506
+ }
3507
+ if (n > Parser.MAX_SIZE / this.repeats) {
3508
+ this.repeats = Parser.MAX_SIZE;
3509
+ } else {
3510
+ this.repeats *= n;
3511
+ }
3512
+ }
3513
+ if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
3514
+ return;
3515
+ }
3516
+
3517
+ // We need to start tracking size.
3518
+ // Make the map and belatedly populate it
3519
+ // with info about everything we've constructed so far.
3520
+ this.size = {};
3521
+ for (let reEx of this.stack) {
3522
+ this.checkSize(reEx);
3523
+ }
3524
+ }
3525
+ if (this.calcSize(re, true) > Parser.MAX_SIZE) {
3526
+ throw new RE2JSSyntaxException(Parser.ERR_LARGE);
3527
+ }
3528
+ }
3529
+ calcSize(re, force = false) {
3530
+ if (!force) {
3531
+ if (Object.prototype.hasOwnProperty.call(this.size, re)) {
3532
+ return this.size[re];
3533
+ }
3534
+ }
3535
+ let size = 0;
3536
+ switch (re.op) {
3537
+ case Regexp.Op.LITERAL:
3538
+ {
3539
+ size = re.runes.length;
3540
+ break;
3541
+ }
3542
+ case Regexp.Op.CAPTURE:
3543
+ case Regexp.Op.STAR:
3544
+ {
3545
+ // star can be 1+ or 2+; assume 2 pessimistically
3546
+ size = 2 + this.calcSize(re.subs[0]);
3547
+ break;
3548
+ }
3549
+ case Regexp.Op.PLUS:
3550
+ case Regexp.Op.QUEST:
3551
+ {
3552
+ size = 1 + this.calcSize(re.subs[0]);
3553
+ break;
3554
+ }
3555
+ case Regexp.Op.CONCAT:
3556
+ {
3557
+ for (let sub of re.subs) {
3558
+ size = size + this.calcSize(sub);
3559
+ }
3560
+ break;
3561
+ }
3562
+ case Regexp.Op.ALTERNATE:
3563
+ {
3564
+ for (let sub of re.subs) {
3565
+ size = size + this.calcSize(sub);
3566
+ }
3567
+ if (re.subs.length > 1) {
3568
+ size = size + re.subs.length - 1;
3569
+ }
3570
+ break;
3571
+ }
3572
+ case Regexp.Op.REPEAT:
3573
+ {
3574
+ let sub = this.calcSize(re.subs[0]);
3575
+ if (re.max === -1) {
3576
+ if (re.min === 0) {
3577
+ size = 2 + sub; // x*
3578
+ } else {
3579
+ size = 1 + re.min * sub; // xxx+
3580
+ }
3581
+ break;
3582
+ }
3583
+ // x{2,5} = xx(x(x(x)?)?)?
3584
+ size = re.max * sub + (re.max - re.min);
3585
+ break;
3586
+ }
3587
+ }
3588
+ size = Math.max(1, size);
3589
+ this.size[re] = size;
3590
+ return size;
3591
+ }
3592
+ checkHeight(re) {
3593
+ if (this.numRegexp < Parser.MAX_HEIGHT) {
3594
+ return;
3595
+ }
3596
+ if (this.height === null) {
3597
+ this.height = {};
3598
+ for (let reEx of this.stack) {
3599
+ this.checkHeight(reEx);
3600
+ }
3601
+ }
3602
+ if (this.calcHeight(re, true) > Parser.MAX_HEIGHT) {
3603
+ throw new RE2JSSyntaxException(Parser.ERR_NESTING_DEPTH);
3604
+ }
3605
+ }
3606
+ calcHeight(re, force = false) {
3607
+ if (!force) {
3608
+ if (Object.prototype.hasOwnProperty.call(this.height, re)) {
3609
+ return this.height[re];
3610
+ }
3611
+ }
3612
+ let h = 1;
3613
+ for (let sub of re.subs) {
3614
+ const hsub = this.calcHeight(sub);
3615
+ if (h < 1 + hsub) {
3616
+ h = 1 + hsub;
3617
+ }
3618
+ }
3619
+ this.height[re] = h;
3620
+ return h;
3621
+ }
3415
3622
 
3416
3623
  // Parse stack manipulation.
3417
3624
 
@@ -3432,13 +3639,14 @@
3432
3639
  // push pushes the regexp re onto the parse stack and returns the regexp.
3433
3640
  // Returns null for a CHAR_CLASS that can be merged with the top-of-stack.
3434
3641
  push(re) {
3642
+ this.numRunes += re.runes.length;
3435
3643
  if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] === re.runes[1]) {
3436
- if (this.maybeConcat(re.runes[0], this.flags & ~RE2Flags.FOLD_CASE)) {
3644
+ if (this.maybeConcat(re.runes[0], this.flags & -2)) {
3437
3645
  return null;
3438
3646
  }
3439
3647
  re.op = Regexp.Op.LITERAL;
3440
3648
  re.runes = [re.runes[0]];
3441
- re.flags = this.flags & ~RE2Flags.FOLD_CASE;
3649
+ re.flags = this.flags & -2;
3442
3650
  } else if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 4 && re.runes[0] === re.runes[1] && re.runes[2] === re.runes[3] && Unicode.simpleFold(re.runes[0]) === re.runes[2] && Unicode.simpleFold(re.runes[2]) === re.runes[0] || re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] + 1 === re.runes[1] && Unicode.simpleFold(re.runes[0]) === re.runes[1] && Unicode.simpleFold(re.runes[1]) === re.runes[0]) {
3443
3651
  // Case-insensitive rune like [Aa] or [Δδ].
3444
3652
  if (this.maybeConcat(re.runes[0], this.flags | RE2Flags.FOLD_CASE)) {
@@ -3453,6 +3661,7 @@
3453
3661
  this.maybeConcat(-1, 0);
3454
3662
  }
3455
3663
  this.stack.push(re);
3664
+ this.checkLimits(re);
3456
3665
  return re;
3457
3666
  }
3458
3667
 
@@ -3546,6 +3755,43 @@
3546
3755
  re.flags = flags;
3547
3756
  re.subs = [sub];
3548
3757
  this.stack[n - 1] = re;
3758
+ this.checkLimits(re);
3759
+ if (op === Regexp.Op.REPEAT && (min >= 2 || max >= 2) && !this.repeatIsValid(re, 1000)) {
3760
+ throw new RE2JSSyntaxException(Parser.ERR_INVALID_REPEAT_SIZE, t.from(beforePos));
3761
+ }
3762
+ }
3763
+
3764
+ // repeatIsValid reports whether the repetition re is valid.
3765
+ // Valid means that the combination of the top-level repetition
3766
+ // and any inner repetitions does not exceed n copies of the
3767
+ // innermost thing.
3768
+ // This function rewalks the regexp tree and is called for every repetition,
3769
+ // so we have to worry about inducing quadratic behavior in the parser.
3770
+ // We avoid this by only calling repeatIsValid when min or max >= 2.
3771
+ // In that case the depth of any >= 2 nesting can only get to 9 without
3772
+ // triggering a parse error, so each subtree can only be rewalked 9 times.
3773
+ repeatIsValid(re, n) {
3774
+ if (re.op === Regexp.Op.REPEAT) {
3775
+ let m = re.max;
3776
+ if (m === 0) {
3777
+ return true;
3778
+ }
3779
+ if (m < 0) {
3780
+ m = re.min;
3781
+ }
3782
+ if (m > n) {
3783
+ return false;
3784
+ }
3785
+ if (m > 0) {
3786
+ n = Math.trunc(n / m);
3787
+ }
3788
+ }
3789
+ for (let sub of re.subs) {
3790
+ if (!this.repeatIsValid(sub, n)) {
3791
+ return false;
3792
+ }
3793
+ }
3794
+ return true;
3549
3795
  }
3550
3796
 
3551
3797
  // concat replaces the top of the stack (above the topmost '|' or '(') with
@@ -3583,10 +3829,10 @@
3583
3829
  if (re.op === Regexp.Op.CHAR_CLASS) {
3584
3830
  re.runes = new CharClass(re.runes).cleanClass().toArray();
3585
3831
  if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
3586
- re.runes = null;
3832
+ re.runes = [];
3587
3833
  re.op = Regexp.Op.ANY_CHAR;
3588
3834
  } else if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
3589
- re.runes = null;
3835
+ re.runes = [];
3590
3836
  re.op = Regexp.Op.ANY_CHAR_NOT_NL;
3591
3837
  }
3592
3838
  }
@@ -3721,6 +3967,7 @@
3721
3967
  prefix.runes = str.slice(0, strlen);
3722
3968
  for (let j = start; j < i; j++) {
3723
3969
  array[s + j] = this.removeLeadingString(array[s + j], strlen);
3970
+ this.checkLimits(array[s + j]);
3724
3971
  }
3725
3972
  // Recurse.
3726
3973
  const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
@@ -3770,6 +4017,7 @@
3770
4017
  for (let j = start; j < i; j++) {
3771
4018
  const reuse = j !== start; // prefix came from sub[start]
3772
4019
  array[s + j] = this.removeLeadingRegexp(array[s + j], reuse);
4020
+ this.checkLimits(array[s + j]);
3773
4021
  }
3774
4022
  // recurse
3775
4023
  const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
@@ -4153,7 +4401,7 @@
4153
4401
  t.skip(2); // "(?"
4154
4402
 
4155
4403
  let flags = this.flags;
4156
- let sign = +1;
4404
+ let sign = 1;
4157
4405
  let sawFlag = false;
4158
4406
  loop: while (t.more()) {
4159
4407
  {
@@ -4164,7 +4412,7 @@
4164
4412
  sawFlag = true;
4165
4413
  break;
4166
4414
  case Codepoint.CODES.get('m'):
4167
- flags &= ~RE2Flags.ONE_LINE;
4415
+ flags &= -17;
4168
4416
  sawFlag = true;
4169
4417
  break;
4170
4418
  case Codepoint.CODES.get('s'):
@@ -4270,12 +4518,12 @@
4270
4518
  this.alternate();
4271
4519
  const n = this.stack.length;
4272
4520
  if (n < 2) {
4273
- throw new RE2JSSyntaxException(Parser.ERR_INTERNAL_ERROR, 'stack underflow');
4521
+ throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
4274
4522
  }
4275
4523
  const re1 = this.pop();
4276
4524
  const re2 = this.pop();
4277
4525
  if (re2.op !== Regexp.Op.LEFT_PAREN) {
4278
- throw new RE2JSSyntaxException(Parser.ERR_MISSING_PAREN, this.wholeRegexp);
4526
+ throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
4279
4527
  }
4280
4528
  // Restore flags at time of paren.
4281
4529
  this.flags = re2.flags;
@@ -4345,7 +4593,7 @@
4345
4593
  }
4346
4594
  t.skip(1); // '\\'
4347
4595
  // Committed to parse or throw exception.
4348
- let sign = +1;
4596
+ let sign = 1;
4349
4597
  let c = t.pop(); // 'p' or 'P'
4350
4598
  if (c === Codepoint.CODES.get('P')) {
4351
4599
  sign = -1;
@@ -4409,7 +4657,7 @@
4409
4657
  const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
4410
4658
  re.flags = this.flags;
4411
4659
  const cc = new CharClass();
4412
- let sign = +1;
4660
+ let sign = 1;
4413
4661
  if (t.more() && t.lookingAt('^')) {
4414
4662
  sign = -1;
4415
4663
  t.skip(1); // '^'
@@ -4952,6 +5200,13 @@
4952
5200
  return this.numSubexp;
4953
5201
  }
4954
5202
 
5203
+ /**
5204
+ * Returns the number of instructions in this compiled regular expression program.
5205
+ */
5206
+ numberOfInstructions() {
5207
+ return this.prog.numInst();
5208
+ }
5209
+
4955
5210
  // get() returns a machine to use for matching |this|. It uses |this|'s
4956
5211
  // machine cache if possible, to avoid unnecessary allocation.
4957
5212
  get() {
@@ -5584,7 +5839,7 @@
5584
5839
  }
5585
5840
  let re2Flags = RE2Flags.PERL;
5586
5841
  if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
5587
- re2Flags &= ~RE2Flags.UNICODE_GROUPS;
5842
+ re2Flags &= -129;
5588
5843
  }
5589
5844
  const p = new RE2JS(regex, flags);
5590
5845
  // The compiled RE2 regexp.
@@ -5752,6 +6007,20 @@
5752
6007
  return this.patternInput;
5753
6008
  }
5754
6009
 
6010
+ /**
6011
+ * Returns the program size of this pattern.
6012
+ *
6013
+ * <p>
6014
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
6015
+ * "cost". Larger numbers are more expensive than smaller numbers.
6016
+ * </p>
6017
+ *
6018
+ * @return the program size of this pattern
6019
+ */
6020
+ programSize() {
6021
+ return this.re2Input.numberOfInstructions();
6022
+ }
6023
+
5755
6024
  /**
5756
6025
  * Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
5757
6026
  * pattern and is excluded from this count.