re2js 0.4.3 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/build/index.cjs.cjs +290 -21
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +23 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +290 -21
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +290 -21
- package/build/index.umd.js.map +1 -1
- package/package.json +9 -9
package/build/index.esm.d.ts
CHANGED
|
@@ -120,6 +120,17 @@ export class RE2JS {
|
|
|
120
120
|
* @returns {string}
|
|
121
121
|
*/
|
|
122
122
|
toString(): string;
|
|
123
|
+
/**
|
|
124
|
+
* Returns the program size of this pattern.
|
|
125
|
+
*
|
|
126
|
+
* <p>
|
|
127
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
128
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
129
|
+
* </p>
|
|
130
|
+
*
|
|
131
|
+
* @returns {number} the program size of this pattern
|
|
132
|
+
*/
|
|
133
|
+
programSize(): number;
|
|
123
134
|
/**
|
|
124
135
|
* Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
|
|
125
136
|
* pattern and is excluded from this count.
|
|
@@ -215,6 +226,7 @@ declare class Matcher {
|
|
|
215
226
|
patternGroupCount: any;
|
|
216
227
|
groups: any[];
|
|
217
228
|
namedGroups: any;
|
|
229
|
+
numberOfInstructions: any;
|
|
218
230
|
/**
|
|
219
231
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
220
232
|
* @returns {RE2JS}
|
|
@@ -251,6 +263,17 @@ declare class Matcher {
|
|
|
251
263
|
* @returns {string}
|
|
252
264
|
*/
|
|
253
265
|
end(group?: string | number): string;
|
|
266
|
+
/**
|
|
267
|
+
* Returns the program size of this pattern.
|
|
268
|
+
*
|
|
269
|
+
* <p>
|
|
270
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
271
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
272
|
+
* </p>
|
|
273
|
+
*
|
|
274
|
+
* @return the program size of this pattern
|
|
275
|
+
*/
|
|
276
|
+
programSize(): any;
|
|
254
277
|
/**
|
|
255
278
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
256
279
|
* @param {string|number} [group=0]
|
package/build/index.esm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AAqoLA;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAA4B;IAC5B;;OAEG;IACH,sBAAkB;IAClB;;;OAGG;IACH,yBAAqB;IACrB;;OAEG;IACH,sCAAkC;IAClC;;OAEG;IACH,6BAA0B;IAE1B;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CAwBjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAOnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,GAAC,CAIb;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AA3lKD;;GAEG;AACH;CAKC;AA9CD;IACE,0BAGC;CACF;AAqDD;;GAEG;AACH;CAKC;AAlBD;;GAEG;AACH;CAKC;AAjDD;;GAEG;AACH;IACE,qCAUC;IAFC,WAAkB;IAClB,WAAkB;IAGpB;;OAEG;IACH,sBAEC;IAED;;OAEG;IACH,kBAEC;CACF;AAgCD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH;IACE;;;;;;OAMG;IACH,6BAHW,MAAM,GACJ,MAAM,CAalB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,gBAAgB,GAAC,iBAAiB,GAAC,MAAM,EAAE,GAAC,MAAM,EAsB5D;IAfC,oBAA2B;IAG3B,uBAAsD;IAEtD,cAAgB;IAChB,iBAAkC;IAClC,0BAAsD;IAUxD;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAenB;IAXC,wBAAoD;IAEpD,2BAAkB;IAElB,kBAAqB;IAGrB,mBAAsB;IAEtB,mBAAmB;IAIrB;;;OAGG;IACH,+BAFa,OAAO,CASnB;IAHC,kBAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,mBAEC;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAgBlB;IACD;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAqBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,MAAM,GACX,OAAO,CAoBnB;IAED;;;;;;OAMG;IACH,iBAWC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,kCA2DC;IAED;;;;OAIG;IACH,sCAiFC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AA7rBD;IACE,yBAGC;IADC,WAAkB;IAEpB,mBAEC;IACD;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AACD;IACE,gCAGC;IADC,kBAAgC;IAElC,mBAEC;IAED;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AAzFD;;GAEG;AACH;IACE,8BAA4D;IAC5D,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF"}
|
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version
|
|
5
|
+
* @version v1.0.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -926,6 +926,7 @@ class Matcher {
|
|
|
926
926
|
// The group indexes, in [start, end) pairs. Zeroth pair is overall match.
|
|
927
927
|
this.groups = [];
|
|
928
928
|
this.namedGroups = re2.namedGroups;
|
|
929
|
+
this.numberOfInstructions = re2.numberOfInstructions();
|
|
929
930
|
if (input instanceof MatcherInputBase) {
|
|
930
931
|
this.resetMatcherInput(input);
|
|
931
932
|
} else if (Array.isArray(input)) {
|
|
@@ -1012,6 +1013,20 @@ class Matcher {
|
|
|
1012
1013
|
return this.groups[2 * group + 1];
|
|
1013
1014
|
}
|
|
1014
1015
|
|
|
1016
|
+
/**
|
|
1017
|
+
* Returns the program size of this pattern.
|
|
1018
|
+
*
|
|
1019
|
+
* <p>
|
|
1020
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
1021
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
1022
|
+
* </p>
|
|
1023
|
+
*
|
|
1024
|
+
* @return the program size of this pattern
|
|
1025
|
+
*/
|
|
1026
|
+
programSize() {
|
|
1027
|
+
return this.numberOfInstructions;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1015
1030
|
/**
|
|
1016
1031
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1017
1032
|
* @param {string|number} [group=0]
|
|
@@ -1652,7 +1667,7 @@ class Regexp {
|
|
|
1652
1667
|
// subexpressions, if any. Never null.
|
|
1653
1668
|
// subs[0] is used as the freelist.
|
|
1654
1669
|
this.subs = Regexp.emptySubs();
|
|
1655
|
-
this.runes =
|
|
1670
|
+
this.runes = []; // matched runes, for LITERAL, CHAR_CLASS
|
|
1656
1671
|
this.min = 0; // min for REPEAT
|
|
1657
1672
|
this.max = 0; // max for REPEAT
|
|
1658
1673
|
this.cap = 0; // capturing index, for CAPTURE
|
|
@@ -1662,7 +1677,7 @@ class Regexp {
|
|
|
1662
1677
|
reinit() {
|
|
1663
1678
|
this.flags = 0;
|
|
1664
1679
|
this.subs = Regexp.emptySubs();
|
|
1665
|
-
this.runes =
|
|
1680
|
+
this.runes = [];
|
|
1666
1681
|
this.cap = 0;
|
|
1667
1682
|
this.min = 0;
|
|
1668
1683
|
this.max = 0;
|
|
@@ -1963,7 +1978,7 @@ class Inst {
|
|
|
1963
1978
|
this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
|
|
1964
1979
|
// length==1 => exact match
|
|
1965
1980
|
// otherwise a list of [lo,hi] pairs. hi is *inclusive*.
|
|
1966
|
-
this.runes =
|
|
1981
|
+
this.runes = [];
|
|
1967
1982
|
}
|
|
1968
1983
|
|
|
1969
1984
|
// MatchRune returns true if the instruction matches (and consumes) r.
|
|
@@ -2357,7 +2372,7 @@ class Compiler {
|
|
|
2357
2372
|
i.runes = runes;
|
|
2358
2373
|
flags &= RE2Flags.FOLD_CASE;
|
|
2359
2374
|
if (runes.length !== 1 || Unicode.simpleFold(runes[0]) === runes[0]) {
|
|
2360
|
-
flags &=
|
|
2375
|
+
flags &= -2;
|
|
2361
2376
|
}
|
|
2362
2377
|
i.arg = flags;
|
|
2363
2378
|
f.out = f.i << 1;
|
|
@@ -2474,7 +2489,7 @@ class Simplify {
|
|
|
2474
2489
|
const nsub = Simplify.simplify(sub);
|
|
2475
2490
|
if (nre === re && nsub !== sub) {
|
|
2476
2491
|
nre = Regexp.fromRegexp(re);
|
|
2477
|
-
nre.runes =
|
|
2492
|
+
nre.runes = [];
|
|
2478
2493
|
nre.subs = re.subs.slice(0, re.subs.length);
|
|
2479
2494
|
}
|
|
2480
2495
|
if (nre !== re) {
|
|
@@ -2615,7 +2630,7 @@ class CharGroup {
|
|
|
2615
2630
|
const code1 = [0x30, 0x39];
|
|
2616
2631
|
const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
|
|
2617
2632
|
const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
2618
|
-
const PERL_GROUPS = new Map([['\\d', new CharGroup(
|
|
2633
|
+
const PERL_GROUPS = new Map([['\\d', new CharGroup(1, code1)], ['\\D', new CharGroup(-1, code1)], ['\\s', new CharGroup(1, code2)], ['\\S', new CharGroup(-1, code2)], ['\\w', new CharGroup(1, code3)], ['\\W', new CharGroup(-1, code3)]]);
|
|
2619
2634
|
const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
|
|
2620
2635
|
const code5 = [0x41, 0x5a, 0x61, 0x7a];
|
|
2621
2636
|
const code6 = [0x0, 0x7f];
|
|
@@ -2630,7 +2645,7 @@ const code14 = [0x9, 0xd, 0x20, 0x20];
|
|
|
2630
2645
|
const code15 = [0x41, 0x5a];
|
|
2631
2646
|
const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
2632
2647
|
const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
|
|
2633
|
-
const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(
|
|
2648
|
+
const POSIX_GROUPS = new Map([['[:alnum:]', new CharGroup(1, code4)], ['[:^alnum:]', new CharGroup(-1, code4)], ['[:alpha:]', new CharGroup(1, code5)], ['[:^alpha:]', new CharGroup(-1, code5)], ['[:ascii:]', new CharGroup(1, code6)], ['[:^ascii:]', new CharGroup(-1, code6)], ['[:blank:]', new CharGroup(1, code7)], ['[:^blank:]', new CharGroup(-1, code7)], ['[:cntrl:]', new CharGroup(1, code8)], ['[:^cntrl:]', new CharGroup(-1, code8)], ['[:digit:]', new CharGroup(1, code9)], ['[:^digit:]', new CharGroup(-1, code9)], ['[:graph:]', new CharGroup(1, code10)], ['[:^graph:]', new CharGroup(-1, code10)], ['[:lower:]', new CharGroup(1, code11)], ['[:^lower:]', new CharGroup(-1, code11)], ['[:print:]', new CharGroup(1, code12)], ['[:^print:]', new CharGroup(-1, code12)], ['[:punct:]', new CharGroup(1, code13)], ['[:^punct:]', new CharGroup(-1, code13)], ['[:space:]', new CharGroup(1, code14)], ['[:^space:]', new CharGroup(-1, code14)], ['[:upper:]', new CharGroup(1, code15)], ['[:^upper:]', new CharGroup(-1, code15)], ['[:word:]', new CharGroup(1, code16)], ['[:^word:]', new CharGroup(-1, code16)], ['[:xdigit:]', new CharGroup(1, code17)], ['[:^xdigit:]', new CharGroup(-1, code17)]]);
|
|
2634
2649
|
|
|
2635
2650
|
/**
|
|
2636
2651
|
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
@@ -2669,7 +2684,7 @@ class CharClass {
|
|
|
2669
2684
|
// qsortIntPair() quicksorts pairs of ints in |array| according to lt().
|
|
2670
2685
|
// Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
|
|
2671
2686
|
static qsortIntPair(array, left, right) {
|
|
2672
|
-
const pivotIndex = ((left + right) / 2 | 0) &
|
|
2687
|
+
const pivotIndex = ((left + right) / 2 | 0) & -2;
|
|
2673
2688
|
const pivotFrom = array[pivotIndex];
|
|
2674
2689
|
const pivotTo = array[pivotIndex + 1];
|
|
2675
2690
|
let i = left;
|
|
@@ -3054,6 +3069,48 @@ class Parser {
|
|
|
3054
3069
|
static ERR_MISSING_REPEAT_ARGUMENT = 'missing argument to repetition operator';
|
|
3055
3070
|
static ERR_TRAILING_BACKSLASH = 'trailing backslash at end of expression';
|
|
3056
3071
|
static ERR_DUPLICATE_NAMED_CAPTURE = 'duplicate capture group name';
|
|
3072
|
+
static ERR_UNEXPECTED_PAREN = 'unexpected )';
|
|
3073
|
+
static ERR_NESTING_DEPTH = 'expression nests too deeply';
|
|
3074
|
+
static ERR_LARGE = 'expression too large';
|
|
3075
|
+
|
|
3076
|
+
// maxHeight is the maximum height of a regexp parse tree.
|
|
3077
|
+
// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
3078
|
+
// that no one will actually hit in real use but at the same time small enough
|
|
3079
|
+
// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
|
|
3080
|
+
// The maximum amount of stack for a single recursive frame is probably
|
|
3081
|
+
// closer to 1kB, so this could potentially be raised, but it seems unlikely
|
|
3082
|
+
// that people have regexps nested even this deeply.
|
|
3083
|
+
// We ran a test on Google's C++ code base and turned up only
|
|
3084
|
+
// a single use case with depth > 100; it had depth 128.
|
|
3085
|
+
// Using depth 1000 should be plenty of margin.
|
|
3086
|
+
// As an optimization, we don't even bother calculating heights
|
|
3087
|
+
// until we've allocated at least maxHeight Regexp structures.
|
|
3088
|
+
static MAX_HEIGHT = 1000;
|
|
3089
|
+
|
|
3090
|
+
// maxSize is the maximum size of a compiled regexp in Insts.
|
|
3091
|
+
// It too is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
3092
|
+
// to allow significant regexps while at the same time small enough that
|
|
3093
|
+
// the compiled form will not take up too much memory.
|
|
3094
|
+
// 128 MB is enough for a 3.3 million Inst structures, which roughly
|
|
3095
|
+
// corresponds to a 3.3 MB regexp.
|
|
3096
|
+
static MAX_SIZE = 3355443; // 128 << 20 / (5 * 8) (instSize = byte, 2 uint32, slice is 5 64-bit words)
|
|
3097
|
+
|
|
3098
|
+
// maxRunes is the maximum number of runes allowed in a regexp tree
|
|
3099
|
+
// counting the runes in all the nodes.
|
|
3100
|
+
// Ignoring character classes p.numRunes is always less than the length of the regexp.
|
|
3101
|
+
// Character classes can make it much larger: each \pL adds 1292 runes.
|
|
3102
|
+
// 128 MB is enough for 32M runes, which is over 26k \pL instances.
|
|
3103
|
+
// Note that repetitions do not make copies of the rune slices,
|
|
3104
|
+
// so \pL{1000} is only one rune slice, not 1000.
|
|
3105
|
+
// We could keep a cache of character classes we've seen,
|
|
3106
|
+
// so that all the \pL we see use the same rune list,
|
|
3107
|
+
// but that doesn't remove the problem entirely:
|
|
3108
|
+
// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
|
|
3109
|
+
// And because the Rune slice is exposed directly in the Regexp,
|
|
3110
|
+
// there is not an opportunity to change the representation to allow
|
|
3111
|
+
// partial sharing between different character classes.
|
|
3112
|
+
// So the limit is the best we can do.
|
|
3113
|
+
static MAX_RUNES = 33554432; // 128 << 20 / 4 (runeSize, int32 is 4 bytes)
|
|
3057
3114
|
|
|
3058
3115
|
// RangeTables are represented as int[][], a list of triples (start, end,
|
|
3059
3116
|
// stride).
|
|
@@ -3352,7 +3409,7 @@ class Parser {
|
|
|
3352
3409
|
case Codepoint.CODES.get('v'):
|
|
3353
3410
|
return Codepoint.CODES.get('\v');
|
|
3354
3411
|
default:
|
|
3355
|
-
if (!Utils.isalnum(c)) {
|
|
3412
|
+
if (c <= Unicode.MAX_ASCII && !Utils.isalnum(c)) {
|
|
3356
3413
|
return c;
|
|
3357
3414
|
}
|
|
3358
3415
|
break;
|
|
@@ -3386,6 +3443,12 @@ class Parser {
|
|
|
3386
3443
|
// Stack of parsed expressions.
|
|
3387
3444
|
this.stack = [];
|
|
3388
3445
|
this.free = null;
|
|
3446
|
+
// checks
|
|
3447
|
+
this.numRegexp = 0; // number of regexps allocated
|
|
3448
|
+
this.numRunes = 0; // number of runes in char classes
|
|
3449
|
+
this.repeats = 0; // product of all repetitions seen
|
|
3450
|
+
this.height = null; // regexp height, for height limit check
|
|
3451
|
+
this.size = null; // regexp compiled size, for size limit check
|
|
3389
3452
|
}
|
|
3390
3453
|
|
|
3391
3454
|
// Allocate a Regexp, from the free list if possible.
|
|
@@ -3397,15 +3460,159 @@ class Parser {
|
|
|
3397
3460
|
re.op = op;
|
|
3398
3461
|
} else {
|
|
3399
3462
|
re = new Regexp(op);
|
|
3463
|
+
this.numRegexp += 1;
|
|
3400
3464
|
}
|
|
3401
3465
|
return re;
|
|
3402
3466
|
}
|
|
3403
3467
|
reuse(re) {
|
|
3468
|
+
if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
|
|
3469
|
+
delete this.height[re];
|
|
3470
|
+
}
|
|
3404
3471
|
if (re.subs !== null && re.subs.length > 0) {
|
|
3405
3472
|
re.subs[0] = this.free;
|
|
3406
3473
|
}
|
|
3407
3474
|
this.free = re;
|
|
3408
3475
|
}
|
|
3476
|
+
checkLimits(re) {
|
|
3477
|
+
if (this.numRunes > Parser.MAX_RUNES) {
|
|
3478
|
+
throw new RE2JSSyntaxException(Parser.ERR_LARGE);
|
|
3479
|
+
}
|
|
3480
|
+
this.checkSize(re);
|
|
3481
|
+
this.checkHeight(re);
|
|
3482
|
+
}
|
|
3483
|
+
checkSize(re) {
|
|
3484
|
+
if (this.size === null) {
|
|
3485
|
+
// We haven't started tracking size yet.
|
|
3486
|
+
// Do a relatively cheap check to see if we need to start.
|
|
3487
|
+
// Maintain the product of all the repeats we've seen
|
|
3488
|
+
// and don't track if the total number of regexp nodes
|
|
3489
|
+
// we've seen times the repeat product is in budget.
|
|
3490
|
+
if (this.repeats === 0) {
|
|
3491
|
+
this.repeats = 1;
|
|
3492
|
+
}
|
|
3493
|
+
if (re.op === Regexp.Op.REPEAT) {
|
|
3494
|
+
let n = re.max;
|
|
3495
|
+
if (n === -1) {
|
|
3496
|
+
n = re.min;
|
|
3497
|
+
}
|
|
3498
|
+
if (n <= 0) {
|
|
3499
|
+
n = 1;
|
|
3500
|
+
}
|
|
3501
|
+
if (n > Parser.MAX_SIZE / this.repeats) {
|
|
3502
|
+
this.repeats = Parser.MAX_SIZE;
|
|
3503
|
+
} else {
|
|
3504
|
+
this.repeats *= n;
|
|
3505
|
+
}
|
|
3506
|
+
}
|
|
3507
|
+
if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
|
|
3508
|
+
return;
|
|
3509
|
+
}
|
|
3510
|
+
|
|
3511
|
+
// We need to start tracking size.
|
|
3512
|
+
// Make the map and belatedly populate it
|
|
3513
|
+
// with info about everything we've constructed so far.
|
|
3514
|
+
this.size = {};
|
|
3515
|
+
for (let reEx of this.stack) {
|
|
3516
|
+
this.checkSize(reEx);
|
|
3517
|
+
}
|
|
3518
|
+
}
|
|
3519
|
+
if (this.calcSize(re, true) > Parser.MAX_SIZE) {
|
|
3520
|
+
throw new RE2JSSyntaxException(Parser.ERR_LARGE);
|
|
3521
|
+
}
|
|
3522
|
+
}
|
|
3523
|
+
calcSize(re, force = false) {
|
|
3524
|
+
if (!force) {
|
|
3525
|
+
if (Object.prototype.hasOwnProperty.call(this.size, re)) {
|
|
3526
|
+
return this.size[re];
|
|
3527
|
+
}
|
|
3528
|
+
}
|
|
3529
|
+
let size = 0;
|
|
3530
|
+
switch (re.op) {
|
|
3531
|
+
case Regexp.Op.LITERAL:
|
|
3532
|
+
{
|
|
3533
|
+
size = re.runes.length;
|
|
3534
|
+
break;
|
|
3535
|
+
}
|
|
3536
|
+
case Regexp.Op.CAPTURE:
|
|
3537
|
+
case Regexp.Op.STAR:
|
|
3538
|
+
{
|
|
3539
|
+
// star can be 1+ or 2+; assume 2 pessimistically
|
|
3540
|
+
size = 2 + this.calcSize(re.subs[0]);
|
|
3541
|
+
break;
|
|
3542
|
+
}
|
|
3543
|
+
case Regexp.Op.PLUS:
|
|
3544
|
+
case Regexp.Op.QUEST:
|
|
3545
|
+
{
|
|
3546
|
+
size = 1 + this.calcSize(re.subs[0]);
|
|
3547
|
+
break;
|
|
3548
|
+
}
|
|
3549
|
+
case Regexp.Op.CONCAT:
|
|
3550
|
+
{
|
|
3551
|
+
for (let sub of re.subs) {
|
|
3552
|
+
size = size + this.calcSize(sub);
|
|
3553
|
+
}
|
|
3554
|
+
break;
|
|
3555
|
+
}
|
|
3556
|
+
case Regexp.Op.ALTERNATE:
|
|
3557
|
+
{
|
|
3558
|
+
for (let sub of re.subs) {
|
|
3559
|
+
size = size + this.calcSize(sub);
|
|
3560
|
+
}
|
|
3561
|
+
if (re.subs.length > 1) {
|
|
3562
|
+
size = size + re.subs.length - 1;
|
|
3563
|
+
}
|
|
3564
|
+
break;
|
|
3565
|
+
}
|
|
3566
|
+
case Regexp.Op.REPEAT:
|
|
3567
|
+
{
|
|
3568
|
+
let sub = this.calcSize(re.subs[0]);
|
|
3569
|
+
if (re.max === -1) {
|
|
3570
|
+
if (re.min === 0) {
|
|
3571
|
+
size = 2 + sub; // x*
|
|
3572
|
+
} else {
|
|
3573
|
+
size = 1 + re.min * sub; // xxx+
|
|
3574
|
+
}
|
|
3575
|
+
break;
|
|
3576
|
+
}
|
|
3577
|
+
// x{2,5} = xx(x(x(x)?)?)?
|
|
3578
|
+
size = re.max * sub + (re.max - re.min);
|
|
3579
|
+
break;
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
size = Math.max(1, size);
|
|
3583
|
+
this.size[re] = size;
|
|
3584
|
+
return size;
|
|
3585
|
+
}
|
|
3586
|
+
checkHeight(re) {
|
|
3587
|
+
if (this.numRegexp < Parser.MAX_HEIGHT) {
|
|
3588
|
+
return;
|
|
3589
|
+
}
|
|
3590
|
+
if (this.height === null) {
|
|
3591
|
+
this.height = {};
|
|
3592
|
+
for (let reEx of this.stack) {
|
|
3593
|
+
this.checkHeight(reEx);
|
|
3594
|
+
}
|
|
3595
|
+
}
|
|
3596
|
+
if (this.calcHeight(re, true) > Parser.MAX_HEIGHT) {
|
|
3597
|
+
throw new RE2JSSyntaxException(Parser.ERR_NESTING_DEPTH);
|
|
3598
|
+
}
|
|
3599
|
+
}
|
|
3600
|
+
calcHeight(re, force = false) {
|
|
3601
|
+
if (!force) {
|
|
3602
|
+
if (Object.prototype.hasOwnProperty.call(this.height, re)) {
|
|
3603
|
+
return this.height[re];
|
|
3604
|
+
}
|
|
3605
|
+
}
|
|
3606
|
+
let h = 1;
|
|
3607
|
+
for (let sub of re.subs) {
|
|
3608
|
+
const hsub = this.calcHeight(sub);
|
|
3609
|
+
if (h < 1 + hsub) {
|
|
3610
|
+
h = 1 + hsub;
|
|
3611
|
+
}
|
|
3612
|
+
}
|
|
3613
|
+
this.height[re] = h;
|
|
3614
|
+
return h;
|
|
3615
|
+
}
|
|
3409
3616
|
|
|
3410
3617
|
// Parse stack manipulation.
|
|
3411
3618
|
|
|
@@ -3426,13 +3633,14 @@ class Parser {
|
|
|
3426
3633
|
// push pushes the regexp re onto the parse stack and returns the regexp.
|
|
3427
3634
|
// Returns null for a CHAR_CLASS that can be merged with the top-of-stack.
|
|
3428
3635
|
push(re) {
|
|
3636
|
+
this.numRunes += re.runes.length;
|
|
3429
3637
|
if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] === re.runes[1]) {
|
|
3430
|
-
if (this.maybeConcat(re.runes[0], this.flags &
|
|
3638
|
+
if (this.maybeConcat(re.runes[0], this.flags & -2)) {
|
|
3431
3639
|
return null;
|
|
3432
3640
|
}
|
|
3433
3641
|
re.op = Regexp.Op.LITERAL;
|
|
3434
3642
|
re.runes = [re.runes[0]];
|
|
3435
|
-
re.flags = this.flags &
|
|
3643
|
+
re.flags = this.flags & -2;
|
|
3436
3644
|
} else if (re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 4 && re.runes[0] === re.runes[1] && re.runes[2] === re.runes[3] && Unicode.simpleFold(re.runes[0]) === re.runes[2] && Unicode.simpleFold(re.runes[2]) === re.runes[0] || re.op === Regexp.Op.CHAR_CLASS && re.runes.length === 2 && re.runes[0] + 1 === re.runes[1] && Unicode.simpleFold(re.runes[0]) === re.runes[1] && Unicode.simpleFold(re.runes[1]) === re.runes[0]) {
|
|
3437
3645
|
// Case-insensitive rune like [Aa] or [Δδ].
|
|
3438
3646
|
if (this.maybeConcat(re.runes[0], this.flags | RE2Flags.FOLD_CASE)) {
|
|
@@ -3447,6 +3655,7 @@ class Parser {
|
|
|
3447
3655
|
this.maybeConcat(-1, 0);
|
|
3448
3656
|
}
|
|
3449
3657
|
this.stack.push(re);
|
|
3658
|
+
this.checkLimits(re);
|
|
3450
3659
|
return re;
|
|
3451
3660
|
}
|
|
3452
3661
|
|
|
@@ -3540,6 +3749,43 @@ class Parser {
|
|
|
3540
3749
|
re.flags = flags;
|
|
3541
3750
|
re.subs = [sub];
|
|
3542
3751
|
this.stack[n - 1] = re;
|
|
3752
|
+
this.checkLimits(re);
|
|
3753
|
+
if (op === Regexp.Op.REPEAT && (min >= 2 || max >= 2) && !this.repeatIsValid(re, 1000)) {
|
|
3754
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_REPEAT_SIZE, t.from(beforePos));
|
|
3755
|
+
}
|
|
3756
|
+
}
|
|
3757
|
+
|
|
3758
|
+
// repeatIsValid reports whether the repetition re is valid.
|
|
3759
|
+
// Valid means that the combination of the top-level repetition
|
|
3760
|
+
// and any inner repetitions does not exceed n copies of the
|
|
3761
|
+
// innermost thing.
|
|
3762
|
+
// This function rewalks the regexp tree and is called for every repetition,
|
|
3763
|
+
// so we have to worry about inducing quadratic behavior in the parser.
|
|
3764
|
+
// We avoid this by only calling repeatIsValid when min or max >= 2.
|
|
3765
|
+
// In that case the depth of any >= 2 nesting can only get to 9 without
|
|
3766
|
+
// triggering a parse error, so each subtree can only be rewalked 9 times.
|
|
3767
|
+
repeatIsValid(re, n) {
|
|
3768
|
+
if (re.op === Regexp.Op.REPEAT) {
|
|
3769
|
+
let m = re.max;
|
|
3770
|
+
if (m === 0) {
|
|
3771
|
+
return true;
|
|
3772
|
+
}
|
|
3773
|
+
if (m < 0) {
|
|
3774
|
+
m = re.min;
|
|
3775
|
+
}
|
|
3776
|
+
if (m > n) {
|
|
3777
|
+
return false;
|
|
3778
|
+
}
|
|
3779
|
+
if (m > 0) {
|
|
3780
|
+
n = Math.trunc(n / m);
|
|
3781
|
+
}
|
|
3782
|
+
}
|
|
3783
|
+
for (let sub of re.subs) {
|
|
3784
|
+
if (!this.repeatIsValid(sub, n)) {
|
|
3785
|
+
return false;
|
|
3786
|
+
}
|
|
3787
|
+
}
|
|
3788
|
+
return true;
|
|
3543
3789
|
}
|
|
3544
3790
|
|
|
3545
3791
|
// concat replaces the top of the stack (above the topmost '|' or '(') with
|
|
@@ -3577,10 +3823,10 @@ class Parser {
|
|
|
3577
3823
|
if (re.op === Regexp.Op.CHAR_CLASS) {
|
|
3578
3824
|
re.runes = new CharClass(re.runes).cleanClass().toArray();
|
|
3579
3825
|
if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
|
|
3580
|
-
re.runes =
|
|
3826
|
+
re.runes = [];
|
|
3581
3827
|
re.op = Regexp.Op.ANY_CHAR;
|
|
3582
3828
|
} else if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
|
|
3583
|
-
re.runes =
|
|
3829
|
+
re.runes = [];
|
|
3584
3830
|
re.op = Regexp.Op.ANY_CHAR_NOT_NL;
|
|
3585
3831
|
}
|
|
3586
3832
|
}
|
|
@@ -3715,6 +3961,7 @@ class Parser {
|
|
|
3715
3961
|
prefix.runes = str.slice(0, strlen);
|
|
3716
3962
|
for (let j = start; j < i; j++) {
|
|
3717
3963
|
array[s + j] = this.removeLeadingString(array[s + j], strlen);
|
|
3964
|
+
this.checkLimits(array[s + j]);
|
|
3718
3965
|
}
|
|
3719
3966
|
// Recurse.
|
|
3720
3967
|
const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
|
|
@@ -3764,6 +4011,7 @@ class Parser {
|
|
|
3764
4011
|
for (let j = start; j < i; j++) {
|
|
3765
4012
|
const reuse = j !== start; // prefix came from sub[start]
|
|
3766
4013
|
array[s + j] = this.removeLeadingRegexp(array[s + j], reuse);
|
|
4014
|
+
this.checkLimits(array[s + j]);
|
|
3767
4015
|
}
|
|
3768
4016
|
// recurse
|
|
3769
4017
|
const suffix = this.collapse(array.slice(s + start, s + i), Regexp.Op.ALTERNATE);
|
|
@@ -4147,7 +4395,7 @@ class Parser {
|
|
|
4147
4395
|
t.skip(2); // "(?"
|
|
4148
4396
|
|
|
4149
4397
|
let flags = this.flags;
|
|
4150
|
-
let sign =
|
|
4398
|
+
let sign = 1;
|
|
4151
4399
|
let sawFlag = false;
|
|
4152
4400
|
loop: while (t.more()) {
|
|
4153
4401
|
{
|
|
@@ -4158,7 +4406,7 @@ class Parser {
|
|
|
4158
4406
|
sawFlag = true;
|
|
4159
4407
|
break;
|
|
4160
4408
|
case Codepoint.CODES.get('m'):
|
|
4161
|
-
flags &=
|
|
4409
|
+
flags &= -17;
|
|
4162
4410
|
sawFlag = true;
|
|
4163
4411
|
break;
|
|
4164
4412
|
case Codepoint.CODES.get('s'):
|
|
@@ -4264,12 +4512,12 @@ class Parser {
|
|
|
4264
4512
|
this.alternate();
|
|
4265
4513
|
const n = this.stack.length;
|
|
4266
4514
|
if (n < 2) {
|
|
4267
|
-
throw new RE2JSSyntaxException(Parser.
|
|
4515
|
+
throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
|
|
4268
4516
|
}
|
|
4269
4517
|
const re1 = this.pop();
|
|
4270
4518
|
const re2 = this.pop();
|
|
4271
4519
|
if (re2.op !== Regexp.Op.LEFT_PAREN) {
|
|
4272
|
-
throw new RE2JSSyntaxException(Parser.
|
|
4520
|
+
throw new RE2JSSyntaxException(Parser.ERR_UNEXPECTED_PAREN, this.wholeRegexp);
|
|
4273
4521
|
}
|
|
4274
4522
|
// Restore flags at time of paren.
|
|
4275
4523
|
this.flags = re2.flags;
|
|
@@ -4339,7 +4587,7 @@ class Parser {
|
|
|
4339
4587
|
}
|
|
4340
4588
|
t.skip(1); // '\\'
|
|
4341
4589
|
// Committed to parse or throw exception.
|
|
4342
|
-
let sign =
|
|
4590
|
+
let sign = 1;
|
|
4343
4591
|
let c = t.pop(); // 'p' or 'P'
|
|
4344
4592
|
if (c === Codepoint.CODES.get('P')) {
|
|
4345
4593
|
sign = -1;
|
|
@@ -4403,7 +4651,7 @@ class Parser {
|
|
|
4403
4651
|
const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
|
|
4404
4652
|
re.flags = this.flags;
|
|
4405
4653
|
const cc = new CharClass();
|
|
4406
|
-
let sign =
|
|
4654
|
+
let sign = 1;
|
|
4407
4655
|
if (t.more() && t.lookingAt('^')) {
|
|
4408
4656
|
sign = -1;
|
|
4409
4657
|
t.skip(1); // '^'
|
|
@@ -4946,6 +5194,13 @@ class RE2 {
|
|
|
4946
5194
|
return this.numSubexp;
|
|
4947
5195
|
}
|
|
4948
5196
|
|
|
5197
|
+
/**
|
|
5198
|
+
* Returns the number of instructions in this compiled regular expression program.
|
|
5199
|
+
*/
|
|
5200
|
+
numberOfInstructions() {
|
|
5201
|
+
return this.prog.numInst();
|
|
5202
|
+
}
|
|
5203
|
+
|
|
4949
5204
|
// get() returns a machine to use for matching |this|. It uses |this|'s
|
|
4950
5205
|
// machine cache if possible, to avoid unnecessary allocation.
|
|
4951
5206
|
get() {
|
|
@@ -5578,7 +5833,7 @@ class RE2JS {
|
|
|
5578
5833
|
}
|
|
5579
5834
|
let re2Flags = RE2Flags.PERL;
|
|
5580
5835
|
if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
|
|
5581
|
-
re2Flags &=
|
|
5836
|
+
re2Flags &= -129;
|
|
5582
5837
|
}
|
|
5583
5838
|
const p = new RE2JS(regex, flags);
|
|
5584
5839
|
// The compiled RE2 regexp.
|
|
@@ -5746,6 +6001,20 @@ class RE2JS {
|
|
|
5746
6001
|
return this.patternInput;
|
|
5747
6002
|
}
|
|
5748
6003
|
|
|
6004
|
+
/**
|
|
6005
|
+
* Returns the program size of this pattern.
|
|
6006
|
+
*
|
|
6007
|
+
* <p>
|
|
6008
|
+
* Similar to the C++ implementation, the program size is a very approximate measure of a regexp's
|
|
6009
|
+
* "cost". Larger numbers are more expensive than smaller numbers.
|
|
6010
|
+
* </p>
|
|
6011
|
+
*
|
|
6012
|
+
* @returns {number} the program size of this pattern
|
|
6013
|
+
*/
|
|
6014
|
+
programSize() {
|
|
6015
|
+
return this.re2Input.numberOfInstructions();
|
|
6016
|
+
}
|
|
6017
|
+
|
|
5749
6018
|
/**
|
|
5750
6019
|
* Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire
|
|
5751
6020
|
* pattern and is excluded from this count.
|