re2js 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.0
6
- * @author Alexey Vasiliev
5
+ * @version v2.2.2
6
+ * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
9
9
  * @license MIT
@@ -78,6 +78,8 @@ const PublicFlags = {
78
78
  const ASCII_SIZE = 128;
79
79
  const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
80
80
  const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
81
+ // The highest legal Basic Multilingual Plane (BMP) value.
82
+ const MAX_BMP = 0xffff;
81
83
  for (let i = 0; i < ASCII_SIZE; i++) {
82
84
  if (i >= 97 && i <= 122) {
83
85
  // a-z
@@ -101,11 +103,13 @@ class Codepoint {
101
103
  static toUpperCase(codepoint) {
102
104
  if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
103
105
  const s = String.fromCodePoint(codepoint).toUpperCase();
104
- if (s.length > 1) {
106
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
107
+ if (s.length > expectedLen) {
105
108
  return codepoint;
106
109
  }
107
110
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toLowerCase();
108
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
111
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
112
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
109
113
  return codepoint;
110
114
  }
111
115
  return s.codePointAt(0);
@@ -116,11 +120,13 @@ class Codepoint {
116
120
  static toLowerCase(codepoint) {
117
121
  if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
118
122
  const s = String.fromCodePoint(codepoint).toLowerCase();
119
- if (s.length > 1) {
123
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
124
+ if (s.length > expectedLen) {
120
125
  return codepoint;
121
126
  }
122
127
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toUpperCase();
123
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
128
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
129
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
124
130
  return codepoint;
125
131
  }
126
132
  return s.codePointAt(0);
@@ -222,7 +228,7 @@ class UnicodeTables {
222
228
  static _CASE_ORBIT = null;
223
229
  static get CASE_ORBIT() {
224
230
  if (!this._CASE_ORBIT) {
225
- this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B6nCohhCBphhCBqhhCBrhhCBshhCBthhCBuhhCBvhhCBwhhCBxhhCByhhCBzhhCB0hhCB1hhCB2hhCB3hhCB4hhCB5hhCB6hhCB7hhCB8hhCB9hhCB+hhCB-hhCBgihCBhihCBiihCBjihCBkihCBlihCBmihCBnihCBoihCBpihCBqihCBrihCBsihCBtihCBuihCBvihCBgghCBhghCBighCBjghCBkghCBlghCBmghCBnghCBoghCBpghCBqghCBrghCBsghCBtghCBughCBvghCBwghCBxghCByghCBzghCB0ghCB1ghCB2ghCB3ghCB4ghCB5ghCB6ghCB7ghCB8ghCB9ghCB+ghCB-ghCBghhCBhhhCBihhCBjhhCBkhhCBlhhCBmhhCBnhhChD4mhCB5mhCB6mhCB7mhCB8mhCB9mhCB+mhCB-mhCBgnhCBhnhCBinhCBjnhCBknhCBlnhCBmnhCBnnhCBonhCBpnhCBqnhCBrnhCBsnhCBtnhCBunhCBvnhCBwnhCBxnhCBynhCBznhCB0nhCB1nhCB2nhCB3nhCB4nhCB5nhCB6nhCB7nhCFwlhCBxlhCBylhCBzlhCB0lhCB1lhCB2lhCB3lhCB4lhCB5lhCB6lhCB7lhCB8lhCB9lhCB+lhCB-lhCBgmhCBhmhCBimhCBjmhCBkmhCBlmhCBmmhCBnmhCBomhCBpmhCBqmhCBrmhCBsmhCBtmhCBumhCBvmhCBwmhCBxmhCBymhCBzmhC1D3shCB4shCB5shCB6shCB7shCB8shCB9shCB+shCB-shCBgthCBhthCCjthCBkthCBlthCBmthCBnthCBothCBpthCBqthCBrthCBsthCBtthCButhCBvthCBwthCBxthCCzthCB0thCB1thCB2thCB3thCB4thCB5thCC7thCB8thCCwrhCBxrhCByrhCBzrhCB0rhCB1rhCB2rhCB3rhCB4rhCB5rhCB6rhCC8rhCB9rhCB+rhCB-rhCBgshCBhshCBishCBjshCBkshCBlshCBmshCBnshCBoshCBpshCBqshCCsshCBtshCBushCBvshCBwshCBxshCByshCC0shCB1shCk2BgmjCBhmjCBimjCBjmjCBkmjCBlmjCBmmjCBnmjCBomjCBpmjCBqmjCBrmjCBsmjCBtmjCBumjCBvmjCBwmjCBxmjCBymjCBzmjCB0mjCB1mjCB2mjCB3mjCB4mjCB5mjCB6mjCB7mjCB8mjCB9mjCB+mjCB-mjCBgnjCBhnjCBinjCBjnjCBknjCBlnjCBmnjCBnnjCBonjCBpnjCBqnjCBrnjCBsnjCBtnjCBunjCBvnjCBwnjCBxnjCBynjCOgkjCBhkjCBikjCBjkjCBkkjCBlkjCBmkjCBnkjCBokjCBpkjCBqkjCBrkjCBskjCBtkjCBukjCBvkjCBwkjCBxkjCBykjCBzkjCB0kjCB1kjCB2kjCB3kjCB4kjCB5kjCB6kjCB7kjCB8kjCB9kjCB+kjCB-kjCBgljCBhljCBiljCBjljCBkljCBlljCBmljCBnljCBoljCBpljCBqljCBrljCBsljCBtljCBuljCBvljCBwljCBxljCByljC+CwrjCBxrjCByrjCBzrjCB0rjCB1rjCB2rjCB3rjCB4rjCB5rjCB6rjCB7rjCB8rjCB9rjCB+rjCB-rjCBgsjCBhsjCBisjCBjsjCBksjCBlsjCLwqjCBxqjCByqjCBzqjCB0qjCB1qjCB2qjCB3qjCB4qjCB5qjCB6qjCB7qjCB8qjCB9qjCB+qjCB-qjCBgrjCBhrjCBirjCBjrjCBkrjCBlrjC74CgmmCBhmmCBimmCBjmmCBkmmCBlmmCBmmmCBnmmCBommCBpmmCBqmmCBrmmCBsmmCBtmmCBummCBvmmCBwmmCBxmmCBymmCBzmmCB0mmCB1mmCB2mmCB3mmCB4mmCB5mmCB6mmCB7mmCB8mmCB9mmCB+mmCB-mmCBglmCBhlmCBilmCBjlmCBklmCBllmCBmlmCBnlmCBolmCBplmCBqlmCBrlmCBslmCBtlmCBulmCBvlmCBwlmCBxlmCBylmCBzlmCB0lmCB1lmCB2lmCB3lmCB4lmCB5lmCB6lmCB7lmCB8lmCB9lmCB+lmCB-lmChrVgz7CBhz7CBiz7CBjz7CBkz7CBlz7CBmz7CBnz7CBoz7CBpz7CBqz7CBrz7CBsz7CBtz7CBuz7CBvz7CBwz7CBxz7CByz7CBzz7CB0z7CB1z7CB2z7CB3z7CB4z7CB5z7CB6z7CB7z7CB8z7CB9z7CB+z7CB-z7CBgy7CBhy7CBiy7CBjy7CBky7CBly7CBmy7CBny7CBoy7CBpy7CBqy7CBry7CBsy7CBty7CBuy7CBvy7CBwy7CBxy7CByy7CBzy7CB0y7CB1y7CB2y7CB3y7CB4y7CB5y7CB6y7CB7y7CB8y7CB9y7CB+y7CB-y7ChB717CB817CB917CB+17CB-17CBg27CBh27CBi27CBj27CBk27CBl27CBm27CBn27CBo27CBp27CBq27CBr27CBs27CBt27CBu27CBv27CBw27CBx27CBy27CBz27CDg17CBh17CBi17CBj17CBk17CBl17CBm17CBn17CBo17CBp17CBq17CBr17CBs17CBt17CBu17CBv17CBw17CBx17CBy17CBz17CB017CB117CB217CB317CB417Ctxeip6DBjp6DBkp6DBlp6DBmp6DBnp6DBop6DBpp6DBqp6DBrp6DBsp6DBtp6DBup6DBvp6DBwp6DBxp6DByp6DBzp6DB0p6DB1p6DB2p6DB3p6DB4p6DB5p6DB6p6DB7p6DB8p6DB9p6DB+p6DB-p6DBgq6DBhq6DBiq6DBjq6DBgo6DBho6DBio6DBjo6DBko6DBlo6DBmo6DBno6DBoo6DBpo6DBqo6DBro6DBso6DBto6DBuo6DBvo6DBwo6DBxo6DByo6DBzo6DB0o6DB1o6DB2o6DB3o6DB4o6DB5o6DB6o6DB7o6DB8o6DB9o6DB+o6DB-o6DBgp6DBhp6D');
231
+ this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B');
226
232
  }
227
233
  return this._CASE_ORBIT;
228
234
  }
@@ -594,11 +600,16 @@ class Unicode {
594
600
  // to compare it to |r2|.
595
601
  // -1 is interpreted as the end-of-file mark.
596
602
  static equalsIgnoreCase(r1, r2) {
597
- // Runes already match, or one of them is EOF
598
- if (r1 < 0 || r2 < 0 || r1 === r2) {
603
+ // Runes already match
604
+ if (r1 === r2) {
599
605
  return true;
600
606
  }
601
607
 
608
+ // Safely fail if either is EOF (and they didn't explicitly match above)
609
+ if (r1 < 0 || r2 < 0) {
610
+ return false;
611
+ }
612
+
602
613
  // Fast path for the common case where both runes are ASCII characters.
603
614
  // Coerces both runes to lowercase if applicable.
604
615
  if (r1 <= this.MAX_ASCII && r2 <= this.MAX_ASCII) {
@@ -851,7 +862,7 @@ class Utils {
851
862
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
852
863
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
853
864
  const createEnum = (values = [], initNum = 0) => {
854
- const enumObject = {};
865
+ const enumObject = Object.create(null);
855
866
  for (let i = 0; i < values.length; i++) {
856
867
  const val = values[i];
857
868
  const keyVal = initNum + i;
@@ -993,6 +1004,9 @@ class MachineInputBase {
993
1004
  hasString() {
994
1005
  return false;
995
1006
  }
1007
+ hasAnyString() {
1008
+ return false;
1009
+ }
996
1010
 
997
1011
  // Helper for the exact-literal fast-path execution router
998
1012
  prefixLength() {
@@ -1018,6 +1032,13 @@ class MachineUTF8Input extends MachineInputBase {
1018
1032
  return idx !== -1 && idx <= this.end - target.length;
1019
1033
  }
1020
1034
 
1035
+ // Executes a high-speed, single - pass search for multiple literal strings
1036
+ // simultaneously using an Aho-Corasick automaton.
1037
+ hasAnyString(prefilter, pos) {
1038
+ if (!prefilter.ac8) return false;
1039
+ return prefilter.ac8.searchUTF8(this.bytes, this.start + pos, this.end);
1040
+ }
1041
+
1021
1042
  // Returns the rune at the specified index; the units are
1022
1043
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1023
1044
  // indices. Returns the width (in the same units) of the rune in
@@ -1036,17 +1057,23 @@ class MachineUTF8Input extends MachineInputBase {
1036
1057
  return c << 3 | 1;
1037
1058
  } else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
1038
1059
  const c1 = this.bytes[pos + 1] & 0xff;
1060
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1039
1061
  const rune = (c & 0x1f) << 6 | c1 & 0x3f;
1040
1062
  return rune << 3 | 2;
1041
1063
  } else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
1042
1064
  const c1 = this.bytes[pos + 1] & 0xff;
1065
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1043
1066
  const c2 = this.bytes[pos + 2] & 0xff;
1067
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1044
1068
  const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
1045
1069
  return rune << 3 | 3;
1046
1070
  } else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
1047
1071
  const c1 = this.bytes[pos + 1] & 0xff;
1072
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1048
1073
  const c2 = this.bytes[pos + 2] & 0xff;
1074
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1049
1075
  const c3 = this.bytes[pos + 3] & 0xff;
1076
+ if ((c3 & 0xc0) !== 0x80) return c << 3 | 1;
1050
1077
  const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
1051
1078
  return rune << 3 | 4;
1052
1079
  } else {
@@ -1125,6 +1152,13 @@ class MachineUTF16Input extends MachineInputBase {
1125
1152
  return idx !== -1 && idx <= this.end - prefilter.str.length;
1126
1153
  }
1127
1154
 
1155
+ // Executes a high-speed, single - pass search for multiple literal strings
1156
+ // simultaneously using an Aho-Corasick automaton.
1157
+ hasAnyString(prefilter, pos) {
1158
+ if (!prefilter.ac16) return false;
1159
+ return prefilter.ac16.searchUTF16(this.charSequence, this.start + pos, this.end);
1160
+ }
1161
+
1128
1162
  // Returns the rune at the specified index; the units are
1129
1163
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1130
1164
  // indices. Returns the width (in the same units) of the rune in
@@ -1570,7 +1604,15 @@ class Matcher {
1570
1604
  if (this.hasMatch) {
1571
1605
  start = this.groups[1];
1572
1606
  if (this.groups[0] === this.groups[1]) {
1573
- start++;
1607
+ // Safely calculate structural encoding width to avoid sequence corruption
1608
+ const machineInput = this.matcherInput.isUTF16Encoding() ? MachineInput.fromUTF16(this.matcherInput.asCharSequence(), 0, this.matcherInputLength) : MachineInput.fromUTF8(this.matcherInput.asBytes(), 0, this.matcherInputLength);
1609
+ const r = machineInput.step(start);
1610
+ if (r < 0) {
1611
+ // EOF
1612
+ start++; // Advance past length to force loop exit
1613
+ } else {
1614
+ start += r & 7; // Advance by safely decoded width
1615
+ }
1574
1616
  }
1575
1617
  }
1576
1618
  return this.genMatch(start, RE2Flags.UNANCHORED);
@@ -1709,6 +1751,8 @@ class Matcher {
1709
1751
  const groupName = replacement.substring(i + 1, j);
1710
1752
  res += this.group(groupName);
1711
1753
  last = j + 1;
1754
+ i = j;
1755
+ continue;
1712
1756
  }
1713
1757
  }
1714
1758
  }
@@ -1788,6 +1832,7 @@ class Matcher {
1788
1832
  if (j === replacement.length || replacement.codePointAt(j) !== Codepoint.CODES.get('>')) {
1789
1833
  res += replacement.substring(i - 1, j + 1);
1790
1834
  last = j + 1;
1835
+ i = j;
1791
1836
  continue;
1792
1837
  }
1793
1838
  const groupName = replacement.substring(i + 1, j);
@@ -1797,6 +1842,8 @@ class Matcher {
1797
1842
  res += `$<${groupName}>`;
1798
1843
  }
1799
1844
  last = j + 1;
1845
+ i = j;
1846
+ continue;
1800
1847
  }
1801
1848
  }
1802
1849
  }
@@ -1921,6 +1968,8 @@ class Inst {
1921
1968
  return r === r0;
1922
1969
  }
1923
1970
  const len = this.runes.length;
1971
+ if (len === 0) return false;
1972
+
1924
1973
  // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1925
1974
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1926
1975
  for (let j = 0; j < len; j += 2) {
@@ -1934,22 +1983,19 @@ class Inst {
1934
1983
  return false; // Stop here
1935
1984
  }
1936
1985
 
1937
- // Otherwise binary search.
1938
- let lo = 0;
1939
- let hi = this.runes.length / 2 | 0;
1940
- while (lo < hi) {
1941
- const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
1942
- const c = this.runes[2 * m];
1943
- if (c <= r) {
1944
- if (r <= this.runes[2 * m + 1]) {
1945
- return true;
1946
- }
1947
- lo = m + 1;
1948
- } else {
1949
- hi = m;
1950
- }
1986
+ // Branchless Binary Search (Lower Bound)
1987
+ // Compiles to optimal conditional move (cmov) machine code, preventing
1988
+ // branch mispredictions on large, chaotic Unicode arrays
1989
+ let base = 0;
1990
+ let n = len >> 1;
1991
+ while (n > 1) {
1992
+ const half = n >> 1;
1993
+ base += this.runes[base + half << 1] <= r ? half : 0;
1994
+ n -= half;
1951
1995
  }
1952
- return false;
1996
+ base += this.runes[base << 1] <= r ? 1 : 0;
1997
+ const m = base - 1;
1998
+ return m >= 0 && r <= this.runes[m << 1 | 1];
1953
1999
  }
1954
2000
 
1955
2001
  // matchRunePos checks whether the instruction matches (and consumes) r.
@@ -1964,6 +2010,7 @@ class Inst {
1964
2010
  return r === r0 ? 0 : -1;
1965
2011
  }
1966
2012
  const len = this.runes.length;
2013
+ if (len === 0) return -1;
1967
2014
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1968
2015
  for (let j = 0; j < len; j += 2) {
1969
2016
  if (r < this.runes[j]) return -1;
@@ -1971,19 +2018,18 @@ class Inst {
1971
2018
  }
1972
2019
  return -1;
1973
2020
  }
1974
- let lo = 0;
1975
- let hi = Math.floor(len / 2);
1976
- while (lo < hi) {
1977
- const m = lo + hi >> 1;
1978
- const c = this.runes[2 * m];
1979
- if (c <= r) {
1980
- if (r <= this.runes[2 * m + 1]) return m;
1981
- lo = m + 1;
1982
- } else {
1983
- hi = m;
1984
- }
2021
+
2022
+ // Branchless Binary Search (Lower Bound)
2023
+ let base = 0;
2024
+ let n = len >> 1;
2025
+ while (n > 1) {
2026
+ const half = n >> 1;
2027
+ base += this.runes[base + half << 1] <= r ? half : 0;
2028
+ n -= half;
1985
2029
  }
1986
- return -1;
2030
+ base += this.runes[base << 1] <= r ? 1 : 0;
2031
+ const m = base - 1;
2032
+ return m >= 0 && r <= this.runes[m << 1 | 1] ? m : -1;
1987
2033
  }
1988
2034
  /**
1989
2035
  *
@@ -2082,6 +2128,7 @@ class Queue {
2082
2128
  //
2083
2129
  // Called by RE2.doExecute.
2084
2130
  class Machine {
2131
+ static THREADS_CHUNK_SIZE = 128;
2085
2132
  static fromRE2(re2) {
2086
2133
  const m = new Machine();
2087
2134
  m.prog = re2.prog;
@@ -2122,15 +2169,15 @@ class Machine {
2122
2169
  resetCap() {
2123
2170
  for (let i = 0; i < this.poolSize; i++) {
2124
2171
  const t = this.pool[i];
2125
- t.cap.fill(0);
2172
+ t.cap.fill(-1);
2126
2173
  }
2127
2174
  }
2128
2175
  initNewCap(ncap) {
2129
2176
  for (let i = 0; i < this.poolSize; i++) {
2130
2177
  const t = this.pool[i];
2131
- t.cap = new Int32Array(ncap);
2178
+ t.cap = new Int32Array(ncap).fill(-1);
2132
2179
  }
2133
- this.matchcap = new Int32Array(ncap);
2180
+ this.matchcap = new Int32Array(ncap).fill(-1);
2134
2181
  }
2135
2182
  submatches() {
2136
2183
  if (this.ncap === 0) {
@@ -2143,14 +2190,21 @@ class Machine {
2143
2190
  // alloc() allocates a new thread with the given instruction.
2144
2191
  // It uses the free pool if possible.
2145
2192
  alloc(inst) {
2146
- let t;
2147
- if (this.poolSize > 0) {
2148
- this.poolSize--;
2149
- t = this.pool[this.poolSize];
2150
- } else {
2151
- t = new Thread();
2152
- t.cap = new Int32Array(this.matchcap.length);
2193
+ if (this.poolSize === 0) {
2194
+ const capLen = this.matchcap.length;
2195
+
2196
+ // Bulk allocate threads in a tight loop so the V8 engine
2197
+ // places them adjacently in the young generation heap
2198
+ for (let i = 0; i < Machine.THREADS_CHUNK_SIZE; i++) {
2199
+ const t = new Thread();
2200
+ t.cap = new Int32Array(capLen);
2201
+ this.pool[this.poolSize++] = t;
2202
+ }
2153
2203
  }
2204
+
2205
+ // Pop a thread from the top of the pool stack
2206
+ this.poolSize--;
2207
+ const t = this.pool[this.poolSize];
2154
2208
  t.inst = inst;
2155
2209
  return t;
2156
2210
  }
@@ -2203,6 +2257,9 @@ class Machine {
2203
2257
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2204
2258
  break;
2205
2259
  }
2260
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2261
+ break;
2262
+ }
2206
2263
  if (this.matched) {
2207
2264
  break;
2208
2265
  }
@@ -2280,6 +2337,9 @@ class Machine {
2280
2337
  while (true) {
2281
2338
  if (runq.isEmpty()) {
2282
2339
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2340
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2341
+ break;
2342
+ }
2283
2343
  }
2284
2344
  if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2285
2345
  // Spawn Lookbehind threads BEFORE the main pattern
@@ -2395,78 +2455,83 @@ class Machine {
2395
2455
  runq.clear();
2396
2456
  }
2397
2457
  add(q, pc, pos, cap, cond, t) {
2398
- if (pc === 0) {
2399
- return t;
2400
- }
2401
- if (q.contains(pc)) {
2402
- return t;
2403
- }
2404
- const d = q.add(pc);
2405
- const inst = this.prog.inst[pc];
2406
- switch (inst.op) {
2407
- case Inst.FAIL:
2408
- break;
2409
- case Inst.ALT:
2410
- case Inst.ALT_MATCH:
2411
- t = this.add(q, inst.out, pos, cap, cond, t);
2412
- t = this.add(q, inst.arg, pos, cap, cond, t);
2413
- break;
2414
- case Inst.EMPTY_WIDTH:
2415
- if ((inst.arg & ~cond) === 0) {
2416
- t = this.add(q, inst.out, pos, cap, cond, t);
2417
- }
2418
- break;
2419
- case Inst.NOP:
2420
- t = this.add(q, inst.out, pos, cap, cond, t);
2421
- break;
2422
- case Inst.CAPTURE:
2423
- if (inst.arg < this.ncap) {
2424
- const opos = cap[inst.arg];
2425
- cap[inst.arg] = pos;
2426
- this.add(q, inst.out, pos, cap, cond, null);
2427
- cap[inst.arg] = opos;
2428
- } else {
2458
+ while (true) {
2459
+ if (pc === 0) {
2460
+ return t;
2461
+ }
2462
+ if (q.contains(pc)) {
2463
+ return t;
2464
+ }
2465
+ const d = q.add(pc);
2466
+ const inst = this.prog.inst[pc];
2467
+ switch (inst.op) {
2468
+ case Inst.FAIL:
2469
+ return t;
2470
+ case Inst.ALT:
2471
+ case Inst.ALT_MATCH:
2429
2472
  t = this.add(q, inst.out, pos, cap, cond, t);
2430
- }
2431
- break;
2432
- case Inst.LB_WRITE:
2433
- this.lbTable[Math.abs(inst.lb)] = pos;
2434
- t = this.add(q, inst.out, pos, cap, cond, t);
2435
- break;
2436
- case Inst.LB_CHECK:
2437
- if (inst.lb > 0) {
2438
- // Positive Lookbehind
2439
- if (this.lbTable[inst.lb] === pos) {
2440
- t = this.add(q, inst.out, pos, cap, cond, t);
2473
+ pc = inst.arg; // Flattened tail recursion
2474
+ continue;
2475
+ case Inst.EMPTY_WIDTH:
2476
+ if ((inst.arg & ~cond) === 0) {
2477
+ pc = inst.out; // Flattened tail recursion
2478
+ continue;
2441
2479
  }
2442
- } else if (this.lbTable[-inst.lb] !== pos) {
2443
- // Negative Lookbehind
2444
- t = this.add(q, inst.out, pos, cap, cond, t);
2445
- }
2446
- break;
2447
- case Inst.MATCH:
2448
- case Inst.RUNE:
2449
- case Inst.RUNE1:
2450
- case Inst.RUNE_ANY:
2451
- case Inst.RUNE_ANY_NOT_NL:
2452
- if (t === null) {
2453
- t = this.alloc(inst);
2454
- } else {
2455
- t.inst = inst;
2456
- }
2457
- if (this.ncap > 0 && t.cap !== cap) {
2458
- // Direct assignment utilizing Typed Array performance
2459
- for (let c = 0; c < this.ncap; c++) {
2460
- t.cap[c] = cap[c];
2480
+ return t;
2481
+ case Inst.NOP:
2482
+ pc = inst.out; // Flattened tail recursion
2483
+ continue;
2484
+ case Inst.CAPTURE:
2485
+ if (inst.arg < this.ncap) {
2486
+ const opos = cap[inst.arg];
2487
+ cap[inst.arg] = pos;
2488
+ this.add(q, inst.out, pos, cap, cond, null);
2489
+ cap[inst.arg] = opos;
2490
+ return t;
2491
+ } else {
2492
+ pc = inst.out; // Flattened tail recursion
2493
+ continue;
2461
2494
  }
2462
- }
2463
- q.denseThreads[d] = t;
2464
- t = null;
2465
- break;
2466
- default:
2467
- throw new Error('unhandled');
2495
+ case Inst.LB_WRITE:
2496
+ this.lbTable[Math.abs(inst.lb)] = pos;
2497
+ pc = inst.out;
2498
+ continue;
2499
+ case Inst.LB_CHECK:
2500
+ if (inst.lb > 0) {
2501
+ // Positive Lookbehind
2502
+ if (this.lbTable[inst.lb] === pos) {
2503
+ pc = inst.out; // Flattened tail recursion
2504
+ continue;
2505
+ }
2506
+ } else if (this.lbTable[-inst.lb] !== pos) {
2507
+ // Negative Lookbehind
2508
+ pc = inst.out; // Flattened tail recursion
2509
+ continue;
2510
+ }
2511
+ return t;
2512
+ case Inst.MATCH:
2513
+ case Inst.RUNE:
2514
+ case Inst.RUNE1:
2515
+ case Inst.RUNE_ANY:
2516
+ case Inst.RUNE_ANY_NOT_NL:
2517
+ if (t === null) {
2518
+ t = this.alloc(inst);
2519
+ } else {
2520
+ t.inst = inst;
2521
+ }
2522
+ if (this.ncap > 0 && t.cap !== cap) {
2523
+ // Direct assignment utilizing Typed Array performance
2524
+ for (let c = 0; c < this.ncap; c++) {
2525
+ t.cap[c] = cap[c];
2526
+ }
2527
+ }
2528
+ q.denseThreads[d] = t;
2529
+ t = null;
2530
+ return t;
2531
+ default:
2532
+ throw new Error('unhandled');
2533
+ }
2468
2534
  }
2469
- return t;
2470
2535
  }
2471
2536
  }
2472
2537
 
@@ -2494,8 +2559,15 @@ class DFAState {
2494
2559
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2495
2560
  this.isMatch = isMatch; // Boolean
2496
2561
  this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2497
- this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2498
- this.nextMap = new Map(); // Cache of Char -> DFAState
2562
+
2563
+ // Latin-1 (Unicode.MAX_LATIN1 + 1) flat arrays for blisteringly fast O(1) lookups
2564
+ // completely covering standard English, European languages, and 1-byte encodings.
2565
+ this.nextLatin1 = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2566
+ this.nextLatin1Anchored = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2567
+ // 2 arrays used as hash map for V8 optimization (N is small number, so O(n) faster than Map O(1))
2568
+ this.transKeys = [];
2569
+ this.transVals = [];
2570
+ this.lastSeen = 0; // Track when this state was last used for LRU eviction
2499
2571
  }
2500
2572
  }
2501
2573
  class DFA {
@@ -2508,6 +2580,7 @@ class DFA {
2508
2580
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2509
2581
  this.cacheClears = 0; // Track thrashing
2510
2582
  this.failed = false; // mark if DFA cannot work with provided prog
2583
+ this.clock = 0; // Global clock for LRU eviction
2511
2584
  }
2512
2585
 
2513
2586
  // Follows epsilon (empty) transitions to find all reachable states without consuming a char
@@ -2567,6 +2640,7 @@ class DFA {
2567
2640
  for (let i = 0; i < bucket.length; i++) {
2568
2641
  const state = bucket[i];
2569
2642
  if (arraysEqual(state.nfaStates, sortedPCs)) {
2643
+ state.lastSeen = ++this.clock;
2570
2644
  return state;
2571
2645
  }
2572
2646
  }
@@ -2579,40 +2653,99 @@ class DFA {
2579
2653
  if (this.failed) return null;
2580
2654
 
2581
2655
  // Safety: prevent memory exhaustion from state explosion
2582
- // We flush the cache and return null, which seamlessly routes execution to the NFA
2656
+ // We prune the cache to keep the newest 50%
2583
2657
  if (this.stateCount >= this.stateLimit) {
2584
- this.stateCache.clear();
2585
- this.stateCount = 0;
2586
- this.startState = null;
2587
2658
  this.cacheClears++;
2588
2659
 
2589
2660
  // If this regex causes continuous cache thrashing, permanently fall back to NFA
2590
2661
  // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2591
2662
  if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2592
2663
  this.failed = true;
2664
+ this.stateCache.clear();
2665
+ this.stateCount = 0;
2666
+ this.startState = null;
2667
+ return null;
2668
+ }
2669
+ this.evictCache();
2670
+
2671
+ // After eviction, the bucket reference might be stale or empty.
2672
+ // We must re-fetch or re-create the bucket.
2673
+ bucket = this.stateCache.get(hash);
2674
+ if (!bucket) {
2675
+ bucket = [];
2676
+ this.stateCache.set(hash, bucket);
2593
2677
  }
2594
- return null;
2595
2678
  }
2596
2679
 
2597
2680
  // State not found, create it and add to bucket
2598
2681
  const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2682
+ state.lastSeen = ++this.clock;
2599
2683
  bucket.push(state);
2600
2684
  this.stateCount++;
2601
2685
  return state;
2602
2686
  }
2687
+ evictCache() {
2688
+ const allStates = [];
2689
+ for (const bucket of this.stateCache.values()) {
2690
+ for (let i = 0; i < bucket.length; i++) {
2691
+ allStates.push(bucket[i]);
2692
+ }
2693
+ }
2694
+
2695
+ // Sort ascending by lastSeen (oldest first)
2696
+ allStates.sort((a, b) => a.lastSeen - b.lastSeen);
2697
+
2698
+ // Keep the newest 50%
2699
+ const keepCount = Math.max(1, Math.floor(this.stateLimit / 2));
2700
+ const startIndex = allStates.length - keepCount;
2701
+ const survivorsArray = allStates.slice(startIndex);
2702
+ const survivors = new Set(survivorsArray);
2703
+ this.stateCache.clear();
2704
+ this.stateCount = 0;
2705
+ for (let i = 0; i < survivorsArray.length; i++) {
2706
+ const state = survivorsArray[i];
2707
+
2708
+ // Sever ties to all states to prevent memory leaks and dangling pointers
2709
+ state.nextLatin1.fill(null);
2710
+ state.nextLatin1Anchored.fill(null);
2711
+ // zero-allocation cleanup
2712
+ state.transKeys.length = 0;
2713
+ state.transVals.length = 0;
2714
+ const hash = hashPCs(state.nfaStates);
2715
+ let bucket = this.stateCache.get(hash);
2716
+ if (!bucket) {
2717
+ bucket = [];
2718
+ this.stateCache.set(hash, bucket);
2719
+ }
2720
+ bucket.push(state);
2721
+ this.stateCount++;
2722
+ }
2723
+
2724
+ // Start state must either be preserved or nullified so it gets re-created
2725
+ if (this.startState && !survivors.has(this.startState)) {
2726
+ this.startState = null;
2727
+ }
2728
+ }
2603
2729
 
2604
2730
  // Compute the next DFA state given a current state and a character
2605
2731
  step(state, charCode, anchor) {
2606
- // OPTIMIZATION: ASCII Fast-Path
2607
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2608
- const next = state.nextAscii[charCode];
2609
- if (next !== null) {
2610
- return next;
2732
+ // OPTIMIZATION: Latin-1 Array Fast-Path
2733
+ if (charCode <= Unicode.MAX_LATIN1) {
2734
+ if (anchor === RE2Flags.UNANCHORED) {
2735
+ const next = state.nextLatin1[charCode];
2736
+ if (next !== null) return next;
2737
+ } else {
2738
+ const next = state.nextLatin1Anchored[charCode];
2739
+ if (next !== null) return next;
2611
2740
  }
2612
2741
  } else {
2742
+ // Dense Array Linear Search fallback for Runes > 255
2613
2743
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2614
- if (state.nextMap.has(key)) {
2615
- return state.nextMap.get(key);
2744
+ // get [key] -> nextState
2745
+ const keys = state.transKeys;
2746
+ const len = keys.length;
2747
+ for (let i = 0; i < len; i++) {
2748
+ if (keys[i] === key) return state.transVals[i];
2616
2749
  }
2617
2750
  }
2618
2751
  const nextPCs = [];
@@ -2629,11 +2762,17 @@ class DFA {
2629
2762
  const nextState = this.getState(nextPCs);
2630
2763
 
2631
2764
  // Cache the result
2632
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2633
- state.nextAscii[charCode] = nextState;
2765
+ if (charCode <= Unicode.MAX_LATIN1) {
2766
+ if (anchor === RE2Flags.UNANCHORED) {
2767
+ state.nextLatin1[charCode] = nextState;
2768
+ } else {
2769
+ state.nextLatin1Anchored[charCode] = nextState;
2770
+ }
2634
2771
  } else {
2635
2772
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2636
- state.nextMap.set(key, nextState);
2773
+ // store key -> nextState
2774
+ state.transKeys.push(key);
2775
+ state.transVals.push(nextState);
2637
2776
  }
2638
2777
  return nextState;
2639
2778
  }
@@ -2666,10 +2805,11 @@ class DFA {
2666
2805
  if (width === 0) {
2667
2806
  break;
2668
2807
  }
2669
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2808
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2670
2809
 
2671
2810
  // If we hit an unrecoverable DFA error or bailout, signal fallback
2672
2811
  if (currentState === null) return null;
2812
+ currentState.lastSeen = ++this.clock;
2673
2813
  if (currentState.isMatch) {
2674
2814
  if (anchor === RE2Flags.ANCHOR_BOTH) {
2675
2815
  if (i + width === endPos) return true;
@@ -2717,9 +2857,10 @@ class DFA {
2717
2857
  const rune = r >> 3;
2718
2858
  const width = r & 7;
2719
2859
  if (width === 0) break;
2720
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2860
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2721
2861
  if (currentState === null) return null; // Bailout to NFA
2722
2862
 
2863
+ currentState.lastSeen = ++this.clock;
2723
2864
  i += width;
2724
2865
  checkMatch(currentState, i);
2725
2866
  if (currentState.nfaStates.length === 0) {
@@ -2757,7 +2898,7 @@ class BitState {
2757
2898
  // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2758
2899
  const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2759
2900
  if (this.visited.length < visitedSize) {
2760
- this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2901
+ this.visited = new Uint32Array(visitedSize);
2761
2902
  } else {
2762
2903
  this.visited.fill(0, 0, visitedSize);
2763
2904
  }
@@ -2843,11 +2984,12 @@ class BitState {
2843
2984
  const outInst = re2.prog.getInst(inst.out);
2844
2985
  if (Inst.isRuneOp(outInst.op)) {
2845
2986
  this.push(re2, inst.arg, currentPos, false);
2846
- currentPc = inst.out;
2987
+ currentPc = inst.arg;
2988
+ currentPos = this.end;
2847
2989
  continue;
2848
2990
  }
2849
2991
  this.push(re2, inst.out, this.end, false);
2850
- currentPc = inst.arg;
2992
+ currentPc = inst.out;
2851
2993
  continue;
2852
2994
  }
2853
2995
  case Inst.RUNE:
@@ -2928,6 +3070,11 @@ class BitState {
2928
3070
  if (currentPos === this.end) return true;
2929
3071
  break;
2930
3072
  }
3073
+ case Inst.LB_WRITE:
3074
+ case Inst.LB_CHECK:
3075
+ {
3076
+ throw new RE2JSInternalException('Backtracker cannot evaluate Lookbehind instructions');
3077
+ }
2931
3078
  default:
2932
3079
  {
2933
3080
  throw new RE2JSInternalException('bad inst');
@@ -3196,7 +3343,9 @@ const makeOnePass = p => {
3196
3343
  }
3197
3344
  runes.sort((a, b) => a - b);
3198
3345
  } else {
3199
- runes.push(...inst.runes);
3346
+ for (let j = 0; j < inst.runes.length; j++) {
3347
+ runes.push(inst.runes[j]);
3348
+ }
3200
3349
  }
3201
3350
  onePassRunes[pc] = runes;
3202
3351
  inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
@@ -3364,6 +3513,10 @@ class OnePass {
3364
3513
  switch (inst.op) {
3365
3514
  case Inst.MATCH:
3366
3515
  {
3516
+ // Verify ANCHOR_BOTH constraint before accepting the match
3517
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) {
3518
+ return null;
3519
+ }
3367
3520
  matched = true;
3368
3521
  if (matchcap.length > 0) {
3369
3522
  matchcap[0] = 0;
@@ -3794,6 +3947,88 @@ class Regexp {
3794
3947
  }
3795
3948
  }
3796
3949
 
3950
+ // High-speed, single-pass Aho-Corasick string matcher optimized for V8.
3951
+ // Builds a trie with failure links to search for multiple prefixes simultaneously.
3952
+ class AhoCorasick {
3953
+ constructor(wordArrays) {
3954
+ this.next = [Object.create(null)];
3955
+ this.fail = [0];
3956
+ this.match = [false];
3957
+
3958
+ // Build Trie
3959
+ for (const word of wordArrays) {
3960
+ let node = 0;
3961
+ for (let i = 0; i < word.length; i++) {
3962
+ const val = word[i];
3963
+ if (!(val in this.next[node])) {
3964
+ this.next.push(Object.create(null));
3965
+ this.fail.push(0);
3966
+ this.match.push(false);
3967
+ this.next[node][val] = this.next.length - 1;
3968
+ }
3969
+ node = this.next[node][val];
3970
+ }
3971
+ this.match[node] = true;
3972
+ }
3973
+
3974
+ // Build Failure Links (BFS)
3975
+ const queue = [];
3976
+ for (const val in this.next[0]) {
3977
+ if (Object.prototype.hasOwnProperty.call(this.next[0], val)) {
3978
+ const child = this.next[0][val];
3979
+ this.fail[child] = 0;
3980
+ queue.push(child);
3981
+ }
3982
+ }
3983
+ while (queue.length > 0) {
3984
+ const curr = queue.shift();
3985
+ for (const val in this.next[curr]) {
3986
+ if (Object.prototype.hasOwnProperty.call(this.next[curr], val)) {
3987
+ const child = this.next[curr][val];
3988
+ let failNode = this.fail[curr];
3989
+ while (failNode !== 0 && !(val in this.next[failNode])) {
3990
+ failNode = this.fail[failNode];
3991
+ }
3992
+ if (val in this.next[failNode]) {
3993
+ this.fail[child] = this.next[failNode][val];
3994
+ } else {
3995
+ this.fail[child] = 0;
3996
+ }
3997
+ this.match[child] = this.match[child] || this.match[this.fail[child]];
3998
+ queue.push(child);
3999
+ }
4000
+ }
4001
+ }
4002
+ }
4003
+ searchUTF16(charSeq, start, end) {
4004
+ let node = 0;
4005
+ for (let i = start; i < end; i++) {
4006
+ const val = charSeq.charCodeAt(i);
4007
+ while (node !== 0 && !(val in this.next[node])) {
4008
+ node = this.fail[node];
4009
+ }
4010
+ if (val in this.next[node]) {
4011
+ node = this.next[node][val];
4012
+ }
4013
+ if (this.match[node]) return true;
4014
+ }
4015
+ return false;
4016
+ }
4017
+ searchUTF8(bytes, start, end) {
4018
+ let node = 0;
4019
+ for (let i = start; i < end; i++) {
4020
+ const val = bytes[i];
4021
+ while (node !== 0 && !(val in this.next[node])) {
4022
+ node = this.fail[node];
4023
+ }
4024
+ if (val in this.next[node]) {
4025
+ node = this.next[node][val];
4026
+ }
4027
+ if (this.match[node]) return true;
4028
+ }
4029
+ return false;
4030
+ }
4031
+ }
3797
4032
  class Prefilter {
3798
4033
  static Type = {
3799
4034
  NONE: 0,
@@ -3806,6 +4041,8 @@ class Prefilter {
3806
4041
  this.subs = [];
3807
4042
  this.str = '';
3808
4043
  this.bytes = null;
4044
+ this.ac16 = null;
4045
+ this.ac8 = null;
3809
4046
  }
3810
4047
  eval(input, pos) {
3811
4048
  switch (this.type) {
@@ -3819,6 +4056,10 @@ class Prefilter {
3819
4056
  }
3820
4057
  return true;
3821
4058
  case Prefilter.Type.OR:
4059
+ // Exploit Aho-Corasick if it was successfully built
4060
+ if (this.ac16 && this.ac8) {
4061
+ return input.hasAnyString(this, pos);
4062
+ }
3822
4063
  for (let i = 0; i < this.subs.length; i++) {
3823
4064
  if (this.subs[i].eval(input, pos)) return true;
3824
4065
  }
@@ -3909,7 +4150,9 @@ class PrefilterTree {
3909
4150
  const s = PrefilterTree.simplify(sub);
3910
4151
  if (s.type !== Prefilter.Type.NONE) {
3911
4152
  if (s.type === Prefilter.Type.AND) {
3912
- newSubs.push(...s.subs);
4153
+ for (let j = 0; j < s.subs.length; j++) {
4154
+ newSubs.push(s.subs[j]);
4155
+ }
3913
4156
  } else {
3914
4157
  newSubs.push(s);
3915
4158
  }
@@ -3929,7 +4172,9 @@ class PrefilterTree {
3929
4172
  return new Prefilter(Prefilter.Type.NONE);
3930
4173
  }
3931
4174
  if (s.type === Prefilter.Type.OR) {
3932
- newSubs.push(...s.subs);
4175
+ for (let j = 0; j < s.subs.length; j++) {
4176
+ newSubs.push(s.subs[j]);
4177
+ }
3933
4178
  } else {
3934
4179
  newSubs.push(s);
3935
4180
  }
@@ -3951,6 +4196,27 @@ class PrefilterTree {
3951
4196
  }
3952
4197
  }
3953
4198
  pf.subs = uniqueSubs;
4199
+
4200
+ // Build an Aho-Corasick automaton if all children are exact matches
4201
+ let allExact = true;
4202
+ for (const sub of uniqueSubs) {
4203
+ if (sub.type !== Prefilter.Type.EXACT) {
4204
+ allExact = false;
4205
+ break;
4206
+ }
4207
+ }
4208
+ if (allExact && uniqueSubs.length > 1) {
4209
+ const words16 = uniqueSubs.map(s => {
4210
+ const arr = [];
4211
+ for (let i = 0; i < s.str.length; i++) {
4212
+ arr.push(s.str.charCodeAt(i));
4213
+ }
4214
+ return arr;
4215
+ });
4216
+ pf.ac16 = new AhoCorasick(words16);
4217
+ const words8 = uniqueSubs.map(s => s.bytes);
4218
+ pf.ac8 = new AhoCorasick(words8);
4219
+ }
3954
4220
  return pf;
3955
4221
  }
3956
4222
  return pf;
@@ -4477,7 +4743,9 @@ class Simplify {
4477
4743
  // Flatten nested concatenations
4478
4744
  if (nsub.op === Regexp.Op.CONCAT) {
4479
4745
  changed = true;
4480
- newSubs.push(...nsub.subs);
4746
+ for (let j = 0; j < nsub.subs.length; j++) {
4747
+ newSubs.push(nsub.subs[j]);
4748
+ }
4481
4749
  continue;
4482
4750
  }
4483
4751
  } else if (re.op === Regexp.Op.ALTERNATE) {
@@ -4489,7 +4757,9 @@ class Simplify {
4489
4757
  // Flatten nested alternations
4490
4758
  if (nsub.op === Regexp.Op.ALTERNATE) {
4491
4759
  changed = true;
4492
- newSubs.push(...nsub.subs);
4760
+ for (let j = 0; j < nsub.subs.length; j++) {
4761
+ newSubs.push(nsub.subs[j]);
4762
+ }
4493
4763
  continue;
4494
4764
  }
4495
4765
  }
@@ -5499,7 +5769,10 @@ class Parser {
5499
5769
  return t.pop();
5500
5770
  }
5501
5771
  static concatRunes(x, y) {
5502
- return [...x, ...y];
5772
+ for (let i = 0; i < y.length; i++) {
5773
+ x.push(y[i]);
5774
+ }
5775
+ return x;
5503
5776
  }
5504
5777
  constructor(wholeRegexp, flags = 0) {
5505
5778
  this.wholeRegexp = wholeRegexp;
@@ -5535,8 +5808,8 @@ class Parser {
5535
5808
  return re;
5536
5809
  }
5537
5810
  reuse(re) {
5538
- if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
5539
- delete this.height[re];
5811
+ if (this.height !== null && this.height.has(re)) {
5812
+ this.height.delete(re);
5540
5813
  }
5541
5814
  if (re.subs !== null && re.subs.length > 0) {
5542
5815
  re.subs[0] = this.free;
@@ -5568,20 +5841,20 @@ class Parser {
5568
5841
  if (n <= 0) {
5569
5842
  n = 1;
5570
5843
  }
5571
- if (n > Parser.MAX_SIZE / this.repeats) {
5844
+ if (n > Math.floor(Parser.MAX_SIZE / this.repeats)) {
5572
5845
  this.repeats = Parser.MAX_SIZE;
5573
5846
  } else {
5574
5847
  this.repeats *= n;
5575
5848
  }
5576
5849
  }
5577
- if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
5850
+ if (this.numRegexp < Math.floor(Parser.MAX_SIZE / this.repeats)) {
5578
5851
  return;
5579
5852
  }
5580
5853
 
5581
5854
  // We need to start tracking size.
5582
5855
  // Make the map and belatedly populate it
5583
5856
  // with info about everything we've constructed so far.
5584
- this.size = Object.create(null);
5857
+ this.size = new Map();
5585
5858
  for (let reEx of this.stack) {
5586
5859
  this.checkSize(reEx);
5587
5860
  }
@@ -5592,8 +5865,8 @@ class Parser {
5592
5865
  }
5593
5866
  calcSize(re, force = false) {
5594
5867
  if (!force && this.size !== null) {
5595
- if (Object.prototype.hasOwnProperty.call(this.size, re)) {
5596
- return this.size[re];
5868
+ if (this.size.has(re)) {
5869
+ return this.size.get(re);
5597
5870
  }
5598
5871
  }
5599
5872
  let size = 0;
@@ -5653,9 +5926,9 @@ class Parser {
5653
5926
  }
5654
5927
  size = Math.max(1, size);
5655
5928
  if (this.size === null) {
5656
- this.size = Object.create(null);
5929
+ this.size = new Map();
5657
5930
  }
5658
- this.size[re] = size;
5931
+ this.size.set(re, size);
5659
5932
  return size;
5660
5933
  }
5661
5934
  checkHeight(re) {
@@ -5663,7 +5936,7 @@ class Parser {
5663
5936
  return;
5664
5937
  }
5665
5938
  if (this.height === null) {
5666
- this.height = Object.create(null);
5939
+ this.height = new Map();
5667
5940
  for (let reEx of this.stack) {
5668
5941
  this.checkHeight(reEx);
5669
5942
  }
@@ -5674,8 +5947,8 @@ class Parser {
5674
5947
  }
5675
5948
  calcHeight(re, force = false) {
5676
5949
  if (!force && this.height !== null) {
5677
- if (Object.prototype.hasOwnProperty.call(this.height, re)) {
5678
- return this.height[re];
5950
+ if (this.height.has(re)) {
5951
+ return this.height.get(re);
5679
5952
  }
5680
5953
  }
5681
5954
  let h = 1;
@@ -5686,9 +5959,9 @@ class Parser {
5686
5959
  }
5687
5960
  }
5688
5961
  if (this.height === null) {
5689
- this.height = Object.create(null);
5962
+ this.height = new Map();
5690
5963
  }
5691
- this.height[re] = h;
5964
+ this.height.set(re, h);
5692
5965
  return h;
5693
5966
  }
5694
5967
 
@@ -7810,7 +8083,7 @@ class TranslateRegExpString {
7810
8083
  changed = true;
7811
8084
  continue;
7812
8085
  } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
7813
- if (i + 3 >= size || data[i + 3] !== '=' && data[i + 3] !== '!') {
8086
+ if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
7814
8087
  result += '(?P<';
7815
8088
  i += 3;
7816
8089
  changed = true;