re2js 0.4.3 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,6 +120,17 @@ export class RE2JS {
120
120
  * @returns {string}
121
121
  */
122
122
  toString(): string;
123
+ /**
124
+ * Returns the program size of this pattern.
125
+ *
126
+ * <p>
127
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
128
+ * "cost". Larger numbers are more expensive than smaller numbers.
129
+ * </p>
130
+ *
131
+ * @returns {number} the program size of this pattern
132
+ */
133
+ programSize(): number;
123
134
  /**
124
135
  * Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
125
136
  * pattern and is excluded from this count.
@@ -215,6 +226,7 @@ declare class Matcher {
215
226
  patternGroupCount: any;
216
227
  groups: any[];
217
228
  namedGroups: any;
229
+ numberOfInstructions: any;
218
230
  /**
219
231
  * Returns the {@code RE2JS} associated with this {@code Matcher}.
220
232
  * @returns {RE2JS}
@@ -251,6 +263,17 @@ declare class Matcher {
251
263
  * @returns {string}
252
264
  */
253
265
  end(group?: string | number): string;
266
+ /**
267
+ * Returns the program size of this pattern.
268
+ *
269
+ * <p>
270
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
271
+ * "cost". Larger numbers are more expensive than smaller numbers.
272
+ * </p>
273
+ *
274
+ * @return the program size of this pattern
275
+ */
276
+ programSize(): any;
254
277
  /**
255
278
  * Returns the named group of the most recent match, or {@code null} if the group was not matched.
256
279
  * @param {string|number} [group=0]
@@ -1 +1 @@
1
- {"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AAs4KA;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAA4B;IAC5B;;OAEG;IACH,sBAAkB;IAClB;;;OAGG;IACH,yBAAqB;IACrB;;OAEG;IACH,sCAAkC;IAClC;;OAEG;IACH,6BAA0B;IAE1B;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CAwBjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAOnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,GAAC,CAIb;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AA90JD;;GAEG;AACH;CAKC;AA9CD;IACE,0BAGC;CACF;AAqDD;;GAEG;AACH;CAKC;AAlBD;;GAEG;AACH;CAKC;AAjDD;;GAEG;AACH;IACE,qCAUC;IAFC,WAAkB;IAClB,WAAkB;IAGpB;;OAEG;IACH,sBAEC;IAED;;OAEG;IACH,kBAEC;CACF;AAgCD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH;IACE;;;;;;OAMG;IACH,6BAHW,MAAM,GACJ,MAAM,CAalB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,gBAAgB,GAAC,iBAAiB,GAAC,MAAM,EAAE,GAAC,MAAM,EAqB5D;IAdC,oBAA2B;IAG3B,uBAAsD;IAEtD,cAAgB;IAChB,iBAAkC;IAUpC;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAenB;IAXC,wBAAoD;IAEpD,2BAAkB;IAElB,kBAAqB;IAGrB,mBAAsB;IAEtB,mBAAmB;IAIrB;;;OAGG;IACH,+BAFa,OAAO,CASnB;IAHC,kBAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAgBlB;IACD;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAqBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,MAAM,GACX,OAAO,CAoBnB;IAED;;;;;;OAMG;IACH,iBAWC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,kCA2DC;IAED;;;;OAIG;IACH,sCAiFC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AA9qBD;IACE,yBAGC;IADC,WAAkB;IAEpB,mBAEC;IACD;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AACD;IACE,gCAGC;IADC,kBAAgC;IAElC,mBAEC;IAED;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AAzFD;;GAEG;AACH;IACE,8BAA4D;IAC5D,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF"}
1
+ {"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AAqoLA;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAA4B;IAC5B;;OAEG;IACH,sBAAkB;IAClB;;;OAGG;IACH,yBAAqB;IACrB;;OAEG;IACH,sCAAkC;IAClC;;OAEG;IACH,6BAA0B;IAE1B;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CAwBjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAOnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,GAAC,CAIb;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AA3lKD;;GAEG;AACH;CAKC;AA9CD;IACE,0BAGC;CACF;AAqDD;;GAEG;AACH;CAKC;AAlBD;;GAEG;AACH;CAKC;AAjDD;;GAEG;AACH;IACE,qCAUC;IAFC,WAAkB;IAClB,WAAkB;IAGpB;;OAEG;IACH,sBAEC;IAED;;OAEG;IACH,kBAEC;CACF;AAgCD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH;IACE;;;;;;OAMG;IACH,6BAHW,MAAM,GACJ,MAAM,CAalB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,gBAAgB,GAAC,iBAAiB,GAAC,MAAM,EAAE,GAAC,MAAM,EAsB5D;IAfC,oBAA2B;IAG3B,uBAAsD;IAEtD,cAAgB;IAChB,iBAAkC;IAClC,0BAAsD;IAUxD;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAenB;IAXC,wBAAoD;IAEpD,2BAAkB;IAElB,kBAAqB;IAGrB,mBAAsB;IAEtB,mBAAmB;IAIrB;;;OAGG;IACH,+BAFa,OAAO,CASnB;IAHC,kBAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,mBAEC;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAgBlB;IACD;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAqBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,MAAM,GACX,OAAO,CAoBnB;IAED;;;;;;OAMG;IACH,iBAWC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,kCA2DC;IAED;;;;OAIG;IACH,sCAiFC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AA7rBD;IACE,yBAGC;IADC,WAAkB;IAEpB,mBAEC;IACD;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AACD;IACE,gCAGC;IADC,kBAAgC;IAElC,mBAEC;IAED;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AAzFD;;GAEG;AACH;IACE,8BAA4D;IAC5D,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF"}
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v0.4.3
5
+ * @version v1.0.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -926,6 +926,7 @@ class Matcher {
926
926
  // The group indexes, in [start, end) pairs. Zeroth pair is overall match.
927
927
  this.groups = [];
928
928
  this.namedGroups = re2.namedGroups;
929
+ this.numberOfInstructions = re2.numberOfInstructions();
929
930
  if (input instanceof MatcherInputBase) {
930
931
  this.resetMatcherInput(input);
931
932
  } else if (Array.isArray(input)) {
@@ -1012,6 +1013,20 @@ class Matcher {
1012
1013
  return this.groups[2 * group + 1];
1013
1014
  }
1014
1015
 
1016
+ /**
1017
+ * Returns the program size of this pattern.
1018
+ *
1019
+ * <p>
1020
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
1021
+ * "cost". Larger numbers are more expensive than smaller numbers.
1022
+ * </p>
1023
+ *
1024
+ * @return the program size of this pattern
1025
+ */
1026
+ programSize() {
1027
+ return this.numberOfInstructions;
1028
+ }
1029
+
1015
1030
  /**
1016
1031
  * Returns the named group of the most recent match, or {@code null} if the group was not matched.
1017
1032
  * @param {string|number} [group=0]
@@ -1652,7 +1667,7 @@ class Regexp {
1652
1667
  // subexpressions, if any. Never null.
1653
1668
  // subs[0] is used as the freelist.
1654
1669
  this.subs = Regexp.emptySubs();
1655
- this.runes = null; // matched runes, for LITERAL, CHAR_CLASS
1670
+ this.runes = []; // matched runes, for LITERAL, CHAR_CLASS
1656
1671
  this.min = 0; // min for REPEAT
1657
1672
  this.max = 0; // max for REPEAT
1658
1673
  this.cap = 0; // capturing index, for CAPTURE
@@ -1662,7 +1677,7 @@ class Regexp {
1662
1677
  reinit() {
1663
1678
  this.flags = 0;
1664
1679
  this.subs = Regexp.emptySubs();
1665
- this.runes = null;
1680
+ this.runes = [];
1666
1681
  this.cap = 0;
1667
1682
  this.min = 0;
1668
1683
  this.max = 0;
@@ -1963,7 +1978,7 @@ class Inst {
1963
1978
  this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
1964
1979
  // length==1 => exact match
1965
1980
  // otherwise a list of [lo,hi] pairs. hi is *inclusive*.
1966
- this.runes = null;
1981
+ this.runes = [];
1967
1982
  }
1968
1983
 
1969
1984
  // MatchRune returns true if the instruction matches (and consumes) r.
@@ -2357,7 +2372,7 @@ class Compiler {
2357
2372
  i.runes = runes;
2358
2373
  flags &= RE2Flags.FOLD_CASE;
2359
2374
  if (runes.length !== 1 || Unicode.simpleFold(runes[0]) === runes[0]) {
2360
- flags &= ~RE2Flags.FOLD_CASE;
2375
+ flags &= -2;
2361
2376
  }
2362
2377
  i.arg = flags;
2363
2378
  f.out = f.i << 1;
@@ -2474,7 +2489,7 @@ class Simplify {
2474
2489
  const nsub = Simplify.simplify(sub);
2475
2490
  if (nre === re && nsub !== sub) {
2476
2491
  nre = Regexp.fromRegexp(re);
2477
- nre.runes = null;
2492
+ nre.runes = [];
2478
2493
  nre.subs = re.subs.slice(0, re.subs.length);
2479
2494
  }
2480
2495
  if (nre !== re) {
@@ -2615,7 +2630,7 @@ class CharGroup {
2615
2630
  const code1 = [0x30, 0x39];
2616
2631
  const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
2617
2632
  const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
2618
- const PERL_GROUPS = new Map([['\\d', new CharGroup(+1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(+1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(+1, code3)], ['\\W', new CharGroup(-1, code3)]]);
2633
+ const PERL_GROUPS = new Map([['\\d', new CharGroup(1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(1, code3)], ['\\W', new CharGroup(-1, code3)]]);
2619
2634
  const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
2620
2635
  const code5 = [0x41, 0x5a, 0x61, 0x7a];
2621
2636
  const code6 = [0x0, 0x7f];
@@ -2630,7 +2645,7 @@ const code14 = [0x9, 0xd, 0x20, 0x20];
2630
2645
  const code15 = [0x41, 0x5a];
2631
2646
  const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
2632
2647
  const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
2633
- const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(+1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(+1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(+1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(+1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(+1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(+1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(+1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(+1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(+1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(+1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(+1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(+1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(+1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(+1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
2648
+ const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
2634
2649
 
2635
2650
  /**
2636
2651
  * A "builder"-style helper class for manipulating character classes represented as an array of
@@ -2669,7 +2684,7 @@ class CharClass {
2669
2684
  // qsortIntPair() quicksorts pairs of ints in |array| according to lt().
2670
2685
  // Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
2671
2686
  static qsortIntPair(array, left, right) {
2672
- const pivotIndex = ((left + right) / 2 | 0) & ~1;
2687
+ const pivotIndex = ((left + right) / 2 | 0) & -2;
2673
2688
  const pivotFrom = array[pivotIndex];
2674
2689
  const pivotTo = array[pivotIndex + 1];
2675
2690
  let i = left;
@@ -3054,6 +3069,48 @@ class Parser {
3054
3069
  static ERR_MISSING_REPEAT_ARGUMENT = 'missing argument to repetition operator';
3055
3070
  static ERR_TRAILING_BACKSLASH = 'trailing backslash at end of expression';
3056
3071
  static ERR_DUPLICATE_NAMED_CAPTURE = 'duplicate capture group name';
3072
+ static ERR_UNEXPECTED_PAREN = 'unexpected )';
3073
+ static ERR_NESTING_DEPTH = 'expression nests too deeply';
3074
+ static ERR_LARGE = 'expression too large';
3075
+
3076
+ // maxHeight is the maximum height of a regexp parse tree.
3077
+ // It is somewhat arbitrarily chosen, but the idea is to be large enough
3078
+ // that no one will actually hit in real use but at the same time small enough
3079
+ // that recursion on the Regexp tree will not hit the 1GB Go stack limit.
3080
+ // The maximum amount of stack for a single recursive frame is probably
3081
+ // closer to 1kB, so this could potentially be raised, but it seems unlikely
3082
+ // that people have regexps nested even this deeply.
3083
+ // We ran a test on Google's C++ code base and turned up only
3084
+ // a single use case with depth > 100; it had depth 128.
3085
+ // Using depth 1000 should be plenty of margin.
3086
+ // As an optimization, we don't even bother calculating heights
3087
+ // until we've allocated at least maxHeight Regexp structures.
3088
+ static MAX_HEIGHT = 1000;
3089
+
3090
+ // maxSize is the maximum size of a compiled regexp in Insts.
3091
+ // It too is somewhat arbitrarily chosen, but the idea is to be large enough
3092
+ // to allow significant regexps while at the same time small enough that
3093
+ // the compiled form will not take up too much memory.
3094
+ // 128 MB is enough for a 3.3 million Inst structures, which roughly
3095
+ // corresponds to a 3.3 MB regexp.
3096
+ static MAX_SIZE = 3355443; // 128 << 20 / (5 * 8) (instSize = byte, 2 uint32, slice is 5 64-bit words)
3097
+
3098
+ // maxRunes is the maximum number of runes allowed in a regexp tree
3099
+ // counting the runes in all the nodes.
3100
+ // Ignoring character classes p.numRunes is always less than the length of the regexp.
3101
+ // Character classes can make it much larger: each \pL adds 1292 runes.
3102
+ // 128 MB is enough for 32M runes, which is over 26k \pL instances.
3103
+ // Note that repetitions do not make copies of the rune slices,
3104
+ // so \pL{1000} is only one rune slice, not 1000.
3105
+ // We could keep a cache of character classes we've seen,
3106
+ // so that all the \pL we see use the same rune list,
3107
+ // but that doesn't remove the problem entirely:
3108
+ // consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
3109
+ // And because the Rune slice is exposed directly in the Regexp,
3110
+ // there is not an opportunity to change the representation to allow
3111
+ // partial sharing between different character classes.
3112
+ // So the limit is the best we can do.
3113
+ static MAX_RUNES = 33554432; // 128 << 20 / 4 (runeSize, int32 is 4 bytes)
3057
3114
 
3058
3115
  // RangeTables are represented as int[][], a list of triples (start, end,
3059
3116
  // stride).
@@ -3352,7 +3409,7 @@ class Parser {
3352
3409
  case Codepoint.CODES.get('v'):
3353
3410
  return Codepoint.CODES.get('\v');
3354
3411
  default:
3355
- if (!Utils.isalnum(c)) {
3412
+ if (c <= Unicode.MAX_ASCII && !Utils.isalnum(c)) {
3356
3413
  return c;
3357
3414
  }
3358
3415
  break;
@@ -3386,6 +3443,12 @@ class Parser {
3386
3443
  // Stack of parsed expressions.
3387
3444
  this.stack = [];
3388
3445
  this.free = null;
3446
+ // checks
3447
+ this.numRegexp = 0; // number of regexps allocated
3448
+ this.numRunes = 0; // number of runes in char classes
3449
+ this.repeats = 0; // product of all repetitions seen
3450
+ this.height = null; // regexp height, for height limit check
3451
+ this.size = null; // regexp compiled size, for size limit check
3389
3452
  }
3390
3453
 
3391
3454
  // Allocate a Regexp, from the free list if possible.
@@ -3397,15 +3460,159 @@ class Parser {
3397
3460
  re.op = op;
3398
3461
  } else {
3399
3462
  re = new Regexp(op);
3463
+ this.numRegexp += 1;
3400
3464
  }
3401
3465
  return re;
3402
3466
  }
3403
3467
  reuse(re) {
3468
+ if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
3469
+ delete this.height[re];
3470
+ }
3404
3471
  if (re.subs !== null && re.subs.length > 0) {
3405
3472
  re.subs[0] = this.free;
3406
3473
  }
3407
3474
  this.free = re;
3408
3475
  }
3476
+ checkLimits(re) {
3477
+ if (this.numRunes > Parser.MAX_RUNES) {
3478
+ throw new RE2JSSyntaxException(Parser.ERR_LARGE);
3479
+ }
3480
+ this.checkSize(re);
3481
+ this.checkHeight(re);
3482
+ }
3483
+ checkSize(re) {
3484
+ if (this.size === null) {
3485
+ // We haven't started tracking size yet.
3486
+ // Do a relatively cheap check to see if we need to start.
3487
+ // Maintain the product of all the repeats we've seen
3488
+ // and don't track if the total number of regexp nodes
3489
+ // we've seen times the repeat product is in budget.
3490
+ if (this.repeats === 0) {
3491
+ this.repeats = 1;
3492
+ }
3493
+ if (re.op === Regexp.Op.REPEAT) {
3494
+ let n = re.max;
3495
+ if (n === -1) {
3496
+ n = re.min;
3497
+ }
3498
+ if (n <= 0) {
3499
+ n = 1;
3500
+ }
3501
+ if (n > Parser.MAX_SIZE / this.repeats) {
3502
+ this.repeats = Parser.MAX_SIZE;
3503
+ } else {
3504
+ this.repeats *= n;
3505
+ }
3506
+ }
3507
+ if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
3508
+ return;
3509
+ }
3510
+
3511
+ // We need to start tracking size.
3512
+ // Make the map and belatedly populate it
3513
+ // with info about everything we've constructed so far.
3514
+ this.size = {};
3515
+ for (let reEx of this.stack) {
3516
+ this.checkSize(reEx);
3517
+ }
3518
+ }
3519
+ if (this.calcSize(re, true) > Parser.MAX_SIZE) {
3520
+ throw new RE2JSSyntaxException(Parser.ERR_LARGE);
3521
+ }
3522
+ }
3523
+ calcSize(re, force = false) {
3524
+ if (!force) {
3525
+ if (Object.prototype.hasOwnProperty.call(this.size, re)) {
3526
+ return this.size[re];
3527
+ }
3528
+ }
3529
+ let size = 0;
3530
+ switch (re.op) {
3531
+ case Regexp.Op.LITERAL:
3532
+ {
3533
+ size = re.runes.length;
3534
+ break;
3535
+ }
3536
+ case Regexp.Op.CAPTURE:
3537
+ case Regexp.Op.STAR:
3538
+ {
3539
+ // star can be 1+ or 2+; assume 2 pessimistically
3540
+ size = 2 + this.calcSize(re.subs[0]);
3541
+ break;
3542
+ }
3543
+ case Regexp.Op.PLUS:
3544
+ case Regexp.Op.QUEST:
3545
+ {
3546
+ size = 1 + this.calcSize(re.subs[0]);
3547
+ break;
3548
+ }
3549
+ case Regexp.Op.CONCAT:
3550
+ {
3551
+ for (let sub of re.subs) {
3552
+ size = size + this.calcSize(sub);
3553
+ }
3554
+ break;
3555
+ }
3556
+ case Regexp.Op.ALTERNATE:
3557
+ {
3558
+ for (let sub of re.subs) {
3559
+ size = size + this.calcSize(sub);
3560
+ }
3561
+ if (re.subs.length > 1) {
3562
+ size = size + re.subs.length - 1;
3563
+ }
3564
+ break;
3565
+ }
3566
+ case Regexp.Op.REPEAT:
3567
+ {
3568
+ let sub = this.calcSize(re.subs[0]);
3569
+ if (re.max === -1) {
3570
+ if (re.min === 0) {
3571
+ size = 2 + sub; // x*
3572
+ } else {
3573
+ size = 1 + re.min * sub; // xxx+
3574
+ }
3575
+ break;
3576
+ }
3577
+ // x{2,5} = xx(x(x(x)?)?)?
3578
+ size = re.max * sub + (re.max - re.min);
3579
+ break;
3580
+ }
3581
+ }
3582
+ size = Math.max(1, size);
3583
+ this.size[re] = size;
3584
+ return size;
3585
+ }
3586
+ checkHeight(re) {
3587
+ if (this.numRegexp < Parser.MAX_HEIGHT) {
3588
+ return;
3589
+ }
3590
+ if (this.height === null) {
3591
+ this.height = {};
3592
+ for (let reEx of this.stack) {
3593
+ this.checkHeight(reEx);
3594
+ }
3595
+ }
3596
+ if (this.calcHeight(re, true) > Parser.MAX_HEIGHT) {
3597
+ throw new RE2JSSyntaxException(Parser.ERR_NESTING_DEPTH);
3598
+ }
3599
+ }
3600
+ calcHeight(re, force = false) {
3601
+ if (!force) {
3602
+ if (Object.prototype.hasOwnProperty.call(this.height, re)) {
3603
+ return this.height[re];
3604
+ }
3605
+ }
3606
+ let h = 1;
3607
+ for (let sub of re.subs) {
3608
+ const hsub = this.calcHeight(sub);
3609
+ if (h < 1 + hsub) {
3610
+ h = 1 + hsub;
3611
+ }
3612
+ }
3613
+ this.height[re] = h;
3614
+ return h;
3615
+ }
3409
3616
 
3410
3617
  // Parse stack manipulation.
3411
3618
 
@@ -3426,13 +3633,14 @@ class Parser {
3426
3633
  // push pushes the regexp re onto the parse stack and returns the regexp.
3427
3634
  // Returns null for a CHAR_CLASS that can be merged with the top-of-stack.
3428
3635
  push(re) {
3636
+ this.numRunes += re.runes.length;
3429
3637
  if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] === re.runes[1]) {
3430
- if (this.maybeConcat(re.runes[0], this.flags & ~RE2Flags.FOLD_CASE)) {
3638
+ if (this.maybeConcat(re.runes[0], this.flags & -2)) {
3431
3639
  return null;
3432
3640
  }
3433
3641
  re.op = Regexp.Op.LITERAL;
3434
3642
  re.runes = [re.runes[0]];
3435
- re.flags = this.flags & ~RE2Flags.FOLD_CASE;
3643
+ re.flags = this.flags & -2;
3436
3644
  } else if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 4 && re.runes[0] === re.runes[1] && re.runes[2] === re.runes[3] && Unicode.simpleFold(re.runes[0]) === re.runes[2] && Unicode.simpleFold(re.runes[2]) === re.runes[0] || re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] + 1 === re.runes[1] && Unicode.simpleFold(re.runes[0]) === re.runes[1] && Unicode.simpleFold(re.runes[1]) === re.runes[0]) {
3437
3645
  // Case-insensitive rune like [Aa] or [Δδ].
3438
3646
  if (this.maybeConcat(re.runes[0], this.flags | RE2Flags.FOLD_CASE)) {
@@ -3447,6 +3655,7 @@ class Parser {
3447
3655
  this.maybeConcat(-1, 0);
3448
3656
  }
3449
3657
  this.stack.push(re);
3658
+ this.checkLimits(re);
3450
3659
  return re;
3451
3660
  }
3452
3661
 
@@ -3540,6 +3749,43 @@ class Parser {
3540
3749
  re.flags = flags;
3541
3750
  re.subs = [sub];
3542
3751
  this.stack[n - 1] = re;
3752
+ this.checkLimits(re);
3753
+ if (op === Regexp.Op.REPEAT && (min >= 2 || max >= 2) && !this.repeatIsValid(re, 1000)) {
3754
+ throw new RE2JSSyntaxException(Parser.ERR_INVALID_REPEAT_SIZE, t.from(beforePos));
3755
+ }
3756
+ }
3757
+
3758
+ // repeatIsValid reports whether the repetition re is valid.
3759
+ // Valid means that the combination of the top-level repetition
3760
+ // and any inner repetitions does not exceed n copies of the
3761
+ // innermost thing.
3762
+ // This function rewalks the regexp tree and is called for every repetition,
3763
+ // so we have to worry about inducing quadratic behavior in the parser.
3764
+ // We avoid this by only calling repeatIsValid when min or max >= 2.
3765
+ // In that case the depth of any >= 2 nesting can only get to 9 without
3766
+ // triggering a parse error, so each subtree can only be rewalked 9 times.
3767
+ repeatIsValid(re, n) {
3768
+ if (re.op === Regexp.Op.REPEAT) {
3769
+ let m = re.max;
3770
+ if (m === 0) {
3771
+ return true;
3772
+ }
3773
+ if (m < 0) {
3774
+ m = re.min;
3775
+ }
3776
+ if (m > n) {
3777
+ return false;
3778
+ }
3779
+ if (m > 0) {
3780
+ n = Math.trunc(n / m);
3781
+ }
3782
+ }
3783
+ for (let sub of re.subs) {
3784
+ if (!this.repeatIsValid(sub, n)) {
3785
+ return false;
3786
+ }
3787
+ }
3788
+ return true;
3543
3789
  }
3544
3790
 
3545
3791
  // concat replaces the top of the stack (above the topmost '|' or '(') with
@@ -3577,10 +3823,10 @@ class Parser {
3577
3823
  if (re.op === Regexp.Op.CHAR_CLASS) {
3578
3824
  re.runes = new CharClass(re.runes).cleanClass().toArray();
3579
3825
  if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
3580
- re.runes = null;
3826
+ re.runes = [];
3581
3827
  re.op = Regexp.Op.ANY_CHAR;
3582
3828
  } else if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
3583
- re.runes = null;
3829
+ re.runes = [];
3584
3830
  re.op = Regexp.Op.ANY_CHAR_NOT_NL;
3585
3831
  }
3586
3832
  }
@@ -3715,6 +3961,7 @@ class Parser {
3715
3961
  prefix.runes = str.slice(0, strlen);
3716
3962
  for (let j = start; j < i; j++) {
3717
3963
  array[s + j] = this.removeLeadingString(array[s + j], strlen);
3964
+ this.checkLimits(array[s + j]);
3718
3965
  }
3719
3966
  // Recurse.
3720
3967
  const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
@@ -3764,6 +4011,7 @@ class Parser {
3764
4011
  for (let j = start; j < i; j++) {
3765
4012
  const reuse = j !== start; // prefix came from sub[start]
3766
4013
  array[s + j] = this.removeLeadingRegexp(array[s + j], reuse);
4014
+ this.checkLimits(array[s + j]);
3767
4015
  }
3768
4016
  // recurse
3769
4017
  const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
@@ -4147,7 +4395,7 @@ class Parser {
4147
4395
  t.skip(2); // "(?"
4148
4396
 
4149
4397
  let flags = this.flags;
4150
- let sign = +1;
4398
+ let sign = 1;
4151
4399
  let sawFlag = false;
4152
4400
  loop: while (t.more()) {
4153
4401
  {
@@ -4158,7 +4406,7 @@ class Parser {
4158
4406
  sawFlag = true;
4159
4407
  break;
4160
4408
  case Codepoint.CODES.get('m'):
4161
- flags &= ~RE2Flags.ONE_LINE;
4409
+ flags &= -17;
4162
4410
  sawFlag = true;
4163
4411
  break;
4164
4412
  case Codepoint.CODES.get('s'):
@@ -4264,12 +4512,12 @@ class Parser {
4264
4512
  this.alternate();
4265
4513
  const n = this.stack.length;
4266
4514
  if (n < 2) {
4267
- throw new RE2JSSyntaxException(Parser.ERR_INTERNAL_ERROR, 'stack underflow');
4515
+ throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
4268
4516
  }
4269
4517
  const re1 = this.pop();
4270
4518
  const re2 = this.pop();
4271
4519
  if (re2.op !== Regexp.Op.LEFT_PAREN) {
4272
- throw new RE2JSSyntaxException(Parser.ERR_MISSING_PAREN, this.wholeRegexp);
4520
+ throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
4273
4521
  }
4274
4522
  // Restore flags at time of paren.
4275
4523
  this.flags = re2.flags;
@@ -4339,7 +4587,7 @@ class Parser {
4339
4587
  }
4340
4588
  t.skip(1); // '\\'
4341
4589
  // Committed to parse or throw exception.
4342
- let sign = +1;
4590
+ let sign = 1;
4343
4591
  let c = t.pop(); // 'p' or 'P'
4344
4592
  if (c === Codepoint.CODES.get('P')) {
4345
4593
  sign = -1;
@@ -4403,7 +4651,7 @@ class Parser {
4403
4651
  const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
4404
4652
  re.flags = this.flags;
4405
4653
  const cc = new CharClass();
4406
- let sign = +1;
4654
+ let sign = 1;
4407
4655
  if (t.more() && t.lookingAt('^')) {
4408
4656
  sign = -1;
4409
4657
  t.skip(1); // '^'
@@ -4946,6 +5194,13 @@ class RE2 {
4946
5194
  return this.numSubexp;
4947
5195
  }
4948
5196
 
5197
+ /**
5198
+ * Returns the number of instructions in this compiled regular expression program.
5199
+ */
5200
+ numberOfInstructions() {
5201
+ return this.prog.numInst();
5202
+ }
5203
+
4949
5204
  // get() returns a machine to use for matching |this|. It uses |this|'s
4950
5205
  // machine cache if possible, to avoid unnecessary allocation.
4951
5206
  get() {
@@ -5578,7 +5833,7 @@ class RE2JS {
5578
5833
  }
5579
5834
  let re2Flags = RE2Flags.PERL;
5580
5835
  if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
5581
- re2Flags &= ~RE2Flags.UNICODE_GROUPS;
5836
+ re2Flags &= -129;
5582
5837
  }
5583
5838
  const p = new RE2JS(regex, flags);
5584
5839
  // The compiled RE2 regexp.
@@ -5746,6 +6001,20 @@ class RE2JS {
5746
6001
  return this.patternInput;
5747
6002
  }
5748
6003
 
6004
+ /**
6005
+ * Returns the program size of this pattern.
6006
+ *
6007
+ * <p>
6008
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
6009
+ * "cost". Larger numbers are more expensive than smaller numbers.
6010
+ * </p>
6011
+ *
6012
+ * @returns {number} the program size of this pattern
6013
+ */
6014
+ programSize() {
6015
+ return this.re2Input.numberOfInstructions();
6016
+ }
6017
+
5749
6018
  /**
5750
6019
  * Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
5751
6020
  * pattern and is excluded from this count.