re2js 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.0
5
+ * @version v2.2.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -78,6 +78,8 @@ const PublicFlags = {
78
78
  const ASCII_SIZE = 128;
79
79
  const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
80
80
  const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
81
+ // The highest legal Basic Multilingual Plane (BMP) value.
82
+ const MAX_BMP = 0xffff;
81
83
  for (let i = 0; i < ASCII_SIZE; i++) {
82
84
  if (i >= 97 && i <= 122) {
83
85
  // a-z
@@ -101,11 +103,13 @@ class Codepoint {
101
103
  static toUpperCase(codepoint) {
102
104
  if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
103
105
  const s = String.fromCodePoint(codepoint).toUpperCase();
104
- if (s.length > 1) {
106
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
107
+ if (s.length > expectedLen) {
105
108
  return codepoint;
106
109
  }
107
110
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toLowerCase();
108
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
111
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
112
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
109
113
  return codepoint;
110
114
  }
111
115
  return s.codePointAt(0);
@@ -116,11 +120,13 @@ class Codepoint {
116
120
  static toLowerCase(codepoint) {
117
121
  if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
118
122
  const s = String.fromCodePoint(codepoint).toLowerCase();
119
- if (s.length > 1) {
123
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
124
+ if (s.length > expectedLen) {
120
125
  return codepoint;
121
126
  }
122
127
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toUpperCase();
123
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
128
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
129
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
124
130
  return codepoint;
125
131
  }
126
132
  return s.codePointAt(0);
@@ -222,7 +228,7 @@ class UnicodeTables {
222
228
  static _CASE_ORBIT = null;
223
229
  static get CASE_ORBIT() {
224
230
  if (!this._CASE_ORBIT) {
225
- this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B6nCohhCBphhCBqhhCBrhhCBshhCBthhCBuhhCBvhhCBwhhCBxhhCByhhCBzhhCB0hhCB1hhCB2hhCB3hhCB4hhCB5hhCB6hhCB7hhCB8hhCB9hhCB+hhCB-hhCBgihCBhihCBiihCBjihCBkihCBlihCBmihCBnihCBoihCBpihCBqihCBrihCBsihCBtihCBuihCBvihCBgghCBhghCBighCBjghCBkghCBlghCBmghCBnghCBoghCBpghCBqghCBrghCBsghCBtghCBughCBvghCBwghCBxghCByghCBzghCB0ghCB1ghCB2ghCB3ghCB4ghCB5ghCB6ghCB7ghCB8ghCB9ghCB+ghCB-ghCBghhCBhhhCBihhCBjhhCBkhhCBlhhCBmhhCBnhhChD4mhCB5mhCB6mhCB7mhCB8mhCB9mhCB+mhCB-mhCBgnhCBhnhCBinhCBjnhCBknhCBlnhCBmnhCBnnhCBonhCBpnhCBqnhCBrnhCBsnhCBtnhCBunhCBvnhCBwnhCBxnhCBynhCBznhCB0nhCB1nhCB2nhCB3nhCB4nhCB5nhCB6nhCB7nhCFwlhCBxlhCBylhCBzlhCB0lhCB1lhCB2lhCB3lhCB4lhCB5lhCB6lhCB7lhCB8lhCB9lhCB+lhCB-lhCBgmhCBhmhCBimhCBjmhCBkmhCBlmhCBmmhCBnmhCBomhCBpmhCBqmhCBrmhCBsmhCBtmhCBumhCBvmhCBwmhCBxmhCBymhCBzmhC1D3shCB4shCB5shCB6shCB7shCB8shCB9shCB+shCB-shCBgthCBhthCCjthCBkthCBlthCBmthCBnthCBothCBpthCBqthCBrthCBsthCBtthCButhCBvthCBwthCBxthCCzthCB0thCB1thCB2thCB3thCB4thCB5thCC7thCB8thCCwrhCBxrhCByrhCBzrhCB0rhCB1rhCB2rhCB3rhCB4rhCB5rhCB6rhCC8rhCB9rhCB+rhCB-rhCBgshCBhshCBishCBjshCBkshCBlshCBmshCBnshCBoshCBpshCBqshCCsshCBtshCBushCBvshCBwshCBxshCByshCC0shCB1shCk2BgmjCBhmjCBimjCBjmjCBkmjCBlmjCBmmjCBnmjCBomjCBpmjCBqmjCBrmjCBsmjCBtmjCBumjCBvmjCBwmjCBxmjCBymjCBzmjCB0mjCB1mjCB2mjCB3mjCB4mjCB5mjCB6mjCB7mjCB8mjCB9mjCB+mjCB-mjCBgnjCBhnjCBinjCBjnjCBknjCBlnjCBmnjCBnnjCBonjCBpnjCBqnjCBrnjCBsnjCBtnjCBunjCBvnjCBwnjCBxnjCBynjCOgkjCBhkjCBikjCBjkjCBkkjCBlkjCBmkjCBnkjCBokjCBpkjCBqkjCBrkjCBskjCBtkjCBukjCBvkjCBwkjCBxkjCBykjCBzkjCB0kjCB1kjCB2kjCB3kjCB4kjCB5kjCB6kjCB7kjCB8kjCB9kjCB+kjCB-kjCBgljCBhljCBiljCBjljCBkljCBlljCBmljCBnljCBoljCBpljCBqljCBrljCBsljCBtljCBuljCBvljCBwljCBxljCByljC+CwrjCBxrjCByrjCBzrjCB0rjCB1rjCB2rjCB3rjCB4rjCB5rjCB6rjCB7rjCB8rjCB9rjCB+rjCB-rjCBgsjCBhsjCBisjCBjsjCBksjCBlsjCLwqjCBxqjCByqjCBzqjCB0qjCB1qjCB2qjCB3qjCB4qjCB5qjCB6qjCB7qjCB8qjCB9qjCB+qjCB-qjCBgrjCBhrjCBirjCBjrjCBkrjCBlrjC74CgmmCBhmmCBimmCBjmmCBkmmCBlmmCBmmmCBnmmCBommCBpmmCBqmmCBrmmCBsmmCBtmmCBummCBvmmCBwmmCBxmmCBymmCBzmmCB0mmCB1mmCB2mmCB3mmCB4mmCB5mmCB6mmCB7mmCB8mmCB9mmCB+mmCB-mmCBglmCBhlmCBilmCBjlmCBklmCBllmCBmlmCBnlmCBolmCBplmCBqlmCBrlmCBslmCBtlmCBulmCBvlmCBwlmCBxlmCBylmCBzlmCB0lmCB1lmCB2lmCB3lmCB4lmCB5lmCB6lmCB7lmCB8lmCB9lmCB+lmCB-lmChrVgz7CBhz7CBiz7CBjz7CBkz7CBlz7CBmz7CBnz7CBoz7CBpz7CBqz7CBrz7CBsz7CBtz7CBuz7CBvz7CBwz7CBxz7CByz7CBzz7CB0z7CB1z7CB2z7CB3z7CB4z7CB5z7CB6z7CB7z7CB8z7CB9z7CB+z7CB-z7CBgy7CBhy7CBiy7CBjy7CBky7CBly7CBmy7CBny7CBoy7CBpy7CBqy7CBry7CBsy7CBty7CBuy7CBvy7CBwy7CBxy7CByy7CBzy7CB0y7CB1y7CB2y7CB3y7CB4y7CB5y7CB6y7CB7y7CB8y7CB9y7CB+y7CB-y7ChB717CB817CB917CB+17CB-17CBg27CBh27CBi27CBj27CBk27CBl27CBm27CBn27CBo27CBp27CBq27CBr27CBs27CBt27CBu27CBv27CBw27CBx27CBy27CBz27CDg17CBh17CBi17CBj17CBk17CBl17CBm17CBn17CBo17CBp17CBq17CBr17CBs17CBt17CBu17CBv17CBw17CBx17CBy17CBz17CB017CB117CB217CB317CB417Ctxeip6DBjp6DBkp6DBlp6DBmp6DBnp6DBop6DBpp6DBqp6DBrp6DBsp6DBtp6DBup6DBvp6DBwp6DBxp6DByp6DBzp6DB0p6DB1p6DB2p6DB3p6DB4p6DB5p6DB6p6DB7p6DB8p6DB9p6DB+p6DB-p6DBgq6DBhq6DBiq6DBjq6DBgo6DBho6DBio6DBjo6DBko6DBlo6DBmo6DBno6DBoo6DBpo6DBqo6DBro6DBso6DBto6DBuo6DBvo6DBwo6DBxo6DByo6DBzo6DB0o6DB1o6DB2o6DB3o6DB4o6DB5o6DB6o6DB7o6DB8o6DB9o6DB+o6DB-o6DBgp6DBhp6D');
231
+ this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B');
226
232
  }
227
233
  return this._CASE_ORBIT;
228
234
  }
@@ -594,11 +600,16 @@ class Unicode {
594
600
  // to compare it to |r2|.
595
601
  // -1 is interpreted as the end-of-file mark.
596
602
  static equalsIgnoreCase(r1, r2) {
597
- // Runes already match, or one of them is EOF
598
- if (r1 < 0 || r2 < 0 || r1 === r2) {
603
+ // Runes already match
604
+ if (r1 === r2) {
599
605
  return true;
600
606
  }
601
607
 
608
+ // Safely fail if either is EOF (and they didn't explicitly match above)
609
+ if (r1 < 0 || r2 < 0) {
610
+ return false;
611
+ }
612
+
602
613
  // Fast path for the common case where both runes are ASCII characters.
603
614
  // Coerces both runes to lowercase if applicable.
604
615
  if (r1 <= this.MAX_ASCII && r2 <= this.MAX_ASCII) {
@@ -851,7 +862,7 @@ class Utils {
851
862
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
852
863
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
853
864
  const createEnum = (values = [], initNum = 0) => {
854
- const enumObject = {};
865
+ const enumObject = Object.create(null);
855
866
  for (let i = 0; i < values.length; i++) {
856
867
  const val = values[i];
857
868
  const keyVal = initNum + i;
@@ -993,6 +1004,9 @@ class MachineInputBase {
993
1004
  hasString() {
994
1005
  return false;
995
1006
  }
1007
+ hasAnyString() {
1008
+ return false;
1009
+ }
996
1010
 
997
1011
  // Helper for the exact-literal fast-path execution router
998
1012
  prefixLength() {
@@ -1018,6 +1032,13 @@ class MachineUTF8Input extends MachineInputBase {
1018
1032
  return idx !== -1 && idx <= this.end - target.length;
1019
1033
  }
1020
1034
 
1035
+ // Executes a high-speed, single - pass search for multiple literal strings
1036
+ // simultaneously using an Aho-Corasick automaton.
1037
+ hasAnyString(prefilter, pos) {
1038
+ if (!prefilter.ac8) return false;
1039
+ return prefilter.ac8.searchUTF8(this.bytes, this.start + pos, this.end);
1040
+ }
1041
+
1021
1042
  // Returns the rune at the specified index; the units are
1022
1043
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1023
1044
  // indices. Returns the width (in the same units) of the rune in
@@ -1036,17 +1057,23 @@ class MachineUTF8Input extends MachineInputBase {
1036
1057
  return c << 3 | 1;
1037
1058
  } else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
1038
1059
  const c1 = this.bytes[pos + 1] & 0xff;
1060
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1039
1061
  const rune = (c & 0x1f) << 6 | c1 & 0x3f;
1040
1062
  return rune << 3 | 2;
1041
1063
  } else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
1042
1064
  const c1 = this.bytes[pos + 1] & 0xff;
1065
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1043
1066
  const c2 = this.bytes[pos + 2] & 0xff;
1067
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1044
1068
  const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
1045
1069
  return rune << 3 | 3;
1046
1070
  } else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
1047
1071
  const c1 = this.bytes[pos + 1] & 0xff;
1072
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1048
1073
  const c2 = this.bytes[pos + 2] & 0xff;
1074
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1049
1075
  const c3 = this.bytes[pos + 3] & 0xff;
1076
+ if ((c3 & 0xc0) !== 0x80) return c << 3 | 1;
1050
1077
  const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
1051
1078
  return rune << 3 | 4;
1052
1079
  } else {
@@ -1125,6 +1152,13 @@ class MachineUTF16Input extends MachineInputBase {
1125
1152
  return idx !== -1 && idx <= this.end - prefilter.str.length;
1126
1153
  }
1127
1154
 
1155
+ // Executes a high-speed, single - pass search for multiple literal strings
1156
+ // simultaneously using an Aho-Corasick automaton.
1157
+ hasAnyString(prefilter, pos) {
1158
+ if (!prefilter.ac16) return false;
1159
+ return prefilter.ac16.searchUTF16(this.charSequence, this.start + pos, this.end);
1160
+ }
1161
+
1128
1162
  // Returns the rune at the specified index; the units are
1129
1163
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1130
1164
  // indices. Returns the width (in the same units) of the rune in
@@ -1570,7 +1604,15 @@ class Matcher {
1570
1604
  if (this.hasMatch) {
1571
1605
  start = this.groups[1];
1572
1606
  if (this.groups[0] === this.groups[1]) {
1573
- start++;
1607
+ // Safely calculate structural encoding width to avoid sequence corruption
1608
+ const machineInput = this.matcherInput.isUTF16Encoding() ? MachineInput.fromUTF16(this.matcherInput.asCharSequence(), 0, this.matcherInputLength) : MachineInput.fromUTF8(this.matcherInput.asBytes(), 0, this.matcherInputLength);
1609
+ const r = machineInput.step(start);
1610
+ if (r < 0) {
1611
+ // EOF
1612
+ start++; // Advance past length to force loop exit
1613
+ } else {
1614
+ start += r & 7; // Advance by safely decoded width
1615
+ }
1574
1616
  }
1575
1617
  }
1576
1618
  return this.genMatch(start, RE2Flags.UNANCHORED);
@@ -1709,6 +1751,8 @@ class Matcher {
1709
1751
  const groupName = replacement.substring(i + 1, j);
1710
1752
  res += this.group(groupName);
1711
1753
  last = j + 1;
1754
+ i = j;
1755
+ continue;
1712
1756
  }
1713
1757
  }
1714
1758
  }
@@ -1788,6 +1832,7 @@ class Matcher {
1788
1832
  if (j === replacement.length || replacement.codePointAt(j) !== Codepoint.CODES.get('>')) {
1789
1833
  res += replacement.substring(i - 1, j + 1);
1790
1834
  last = j + 1;
1835
+ i = j;
1791
1836
  continue;
1792
1837
  }
1793
1838
  const groupName = replacement.substring(i + 1, j);
@@ -1797,6 +1842,8 @@ class Matcher {
1797
1842
  res += `$<${groupName}>`;
1798
1843
  }
1799
1844
  last = j + 1;
1845
+ i = j;
1846
+ continue;
1800
1847
  }
1801
1848
  }
1802
1849
  }
@@ -1921,6 +1968,8 @@ class Inst {
1921
1968
  return r === r0;
1922
1969
  }
1923
1970
  const len = this.runes.length;
1971
+ if (len === 0) return false;
1972
+
1924
1973
  // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1925
1974
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1926
1975
  for (let j = 0; j < len; j += 2) {
@@ -1934,22 +1983,19 @@ class Inst {
1934
1983
  return false; // Stop here
1935
1984
  }
1936
1985
 
1937
- // Otherwise binary search.
1938
- let lo = 0;
1939
- let hi = this.runes.length / 2 | 0;
1940
- while (lo < hi) {
1941
- const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
1942
- const c = this.runes[2 * m];
1943
- if (c <= r) {
1944
- if (r <= this.runes[2 * m + 1]) {
1945
- return true;
1946
- }
1947
- lo = m + 1;
1948
- } else {
1949
- hi = m;
1950
- }
1986
+ // Branchless Binary Search (Lower Bound)
1987
+ // Compiles to optimal conditional move (cmov) machine code, preventing
1988
+ // branch mispredictions on large, chaotic Unicode arrays
1989
+ let base = 0;
1990
+ let n = len >> 1;
1991
+ while (n > 1) {
1992
+ const half = n >> 1;
1993
+ base += this.runes[base + half << 1] <= r ? half : 0;
1994
+ n -= half;
1951
1995
  }
1952
- return false;
1996
+ base += this.runes[base << 1] <= r ? 1 : 0;
1997
+ const m = base - 1;
1998
+ return m >= 0 && r <= this.runes[m << 1 | 1];
1953
1999
  }
1954
2000
 
1955
2001
  // matchRunePos checks whether the instruction matches (and consumes) r.
@@ -1964,6 +2010,7 @@ class Inst {
1964
2010
  return r === r0 ? 0 : -1;
1965
2011
  }
1966
2012
  const len = this.runes.length;
2013
+ if (len === 0) return -1;
1967
2014
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1968
2015
  for (let j = 0; j < len; j += 2) {
1969
2016
  if (r < this.runes[j]) return -1;
@@ -1971,19 +2018,18 @@ class Inst {
1971
2018
  }
1972
2019
  return -1;
1973
2020
  }
1974
- let lo = 0;
1975
- let hi = Math.floor(len / 2);
1976
- while (lo < hi) {
1977
- const m = lo + hi >> 1;
1978
- const c = this.runes[2 * m];
1979
- if (c <= r) {
1980
- if (r <= this.runes[2 * m + 1]) return m;
1981
- lo = m + 1;
1982
- } else {
1983
- hi = m;
1984
- }
2021
+
2022
+ // Branchless Binary Search (Lower Bound)
2023
+ let base = 0;
2024
+ let n = len >> 1;
2025
+ while (n > 1) {
2026
+ const half = n >> 1;
2027
+ base += this.runes[base + half << 1] <= r ? half : 0;
2028
+ n -= half;
1985
2029
  }
1986
- return -1;
2030
+ base += this.runes[base << 1] <= r ? 1 : 0;
2031
+ const m = base - 1;
2032
+ return m >= 0 && r <= this.runes[m << 1 | 1] ? m : -1;
1987
2033
  }
1988
2034
  /**
1989
2035
  *
@@ -2082,6 +2128,7 @@ class Queue {
2082
2128
  //
2083
2129
  // Called by RE2.doExecute.
2084
2130
  class Machine {
2131
+ static THREADS_CHUNK_SIZE = 128;
2085
2132
  static fromRE2(re2) {
2086
2133
  const m = new Machine();
2087
2134
  m.prog = re2.prog;
@@ -2122,15 +2169,15 @@ class Machine {
2122
2169
  resetCap() {
2123
2170
  for (let i = 0; i < this.poolSize; i++) {
2124
2171
  const t = this.pool[i];
2125
- t.cap.fill(0);
2172
+ t.cap.fill(-1);
2126
2173
  }
2127
2174
  }
2128
2175
  initNewCap(ncap) {
2129
2176
  for (let i = 0; i < this.poolSize; i++) {
2130
2177
  const t = this.pool[i];
2131
- t.cap = new Int32Array(ncap);
2178
+ t.cap = new Int32Array(ncap).fill(-1);
2132
2179
  }
2133
- this.matchcap = new Int32Array(ncap);
2180
+ this.matchcap = new Int32Array(ncap).fill(-1);
2134
2181
  }
2135
2182
  submatches() {
2136
2183
  if (this.ncap === 0) {
@@ -2143,14 +2190,21 @@ class Machine {
2143
2190
  // alloc() allocates a new thread with the given instruction.
2144
2191
  // It uses the free pool if possible.
2145
2192
  alloc(inst) {
2146
- let t;
2147
- if (this.poolSize > 0) {
2148
- this.poolSize--;
2149
- t = this.pool[this.poolSize];
2150
- } else {
2151
- t = new Thread();
2152
- t.cap = new Int32Array(this.matchcap.length);
2193
+ if (this.poolSize === 0) {
2194
+ const capLen = this.matchcap.length;
2195
+
2196
+ // Bulk allocate threads in a tight loop so the V8 engine
2197
+ // places them adjacently in the young generation heap
2198
+ for (let i = 0; i < Machine.THREADS_CHUNK_SIZE; i++) {
2199
+ const t = new Thread();
2200
+ t.cap = new Int32Array(capLen);
2201
+ this.pool[this.poolSize++] = t;
2202
+ }
2153
2203
  }
2204
+
2205
+ // Pop a thread from the top of the pool stack
2206
+ this.poolSize--;
2207
+ const t = this.pool[this.poolSize];
2154
2208
  t.inst = inst;
2155
2209
  return t;
2156
2210
  }
@@ -2203,6 +2257,9 @@ class Machine {
2203
2257
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2204
2258
  break;
2205
2259
  }
2260
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2261
+ break;
2262
+ }
2206
2263
  if (this.matched) {
2207
2264
  break;
2208
2265
  }
@@ -2280,6 +2337,9 @@ class Machine {
2280
2337
  while (true) {
2281
2338
  if (runq.isEmpty()) {
2282
2339
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2340
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2341
+ break;
2342
+ }
2283
2343
  }
2284
2344
  if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2285
2345
  // Spawn Lookbehind threads BEFORE the main pattern
@@ -2395,78 +2455,83 @@ class Machine {
2395
2455
  runq.clear();
2396
2456
  }
2397
2457
  add(q, pc, pos, cap, cond, t) {
2398
- if (pc === 0) {
2399
- return t;
2400
- }
2401
- if (q.contains(pc)) {
2402
- return t;
2403
- }
2404
- const d = q.add(pc);
2405
- const inst = this.prog.inst[pc];
2406
- switch (inst.op) {
2407
- case Inst.FAIL:
2408
- break;
2409
- case Inst.ALT:
2410
- case Inst.ALT_MATCH:
2411
- t = this.add(q, inst.out, pos, cap, cond, t);
2412
- t = this.add(q, inst.arg, pos, cap, cond, t);
2413
- break;
2414
- case Inst.EMPTY_WIDTH:
2415
- if ((inst.arg & ~cond) === 0) {
2416
- t = this.add(q, inst.out, pos, cap, cond, t);
2417
- }
2418
- break;
2419
- case Inst.NOP:
2420
- t = this.add(q, inst.out, pos, cap, cond, t);
2421
- break;
2422
- case Inst.CAPTURE:
2423
- if (inst.arg < this.ncap) {
2424
- const opos = cap[inst.arg];
2425
- cap[inst.arg] = pos;
2426
- this.add(q, inst.out, pos, cap, cond, null);
2427
- cap[inst.arg] = opos;
2428
- } else {
2458
+ while (true) {
2459
+ if (pc === 0) {
2460
+ return t;
2461
+ }
2462
+ if (q.contains(pc)) {
2463
+ return t;
2464
+ }
2465
+ const d = q.add(pc);
2466
+ const inst = this.prog.inst[pc];
2467
+ switch (inst.op) {
2468
+ case Inst.FAIL:
2469
+ return t;
2470
+ case Inst.ALT:
2471
+ case Inst.ALT_MATCH:
2429
2472
  t = this.add(q, inst.out, pos, cap, cond, t);
2430
- }
2431
- break;
2432
- case Inst.LB_WRITE:
2433
- this.lbTable[Math.abs(inst.lb)] = pos;
2434
- t = this.add(q, inst.out, pos, cap, cond, t);
2435
- break;
2436
- case Inst.LB_CHECK:
2437
- if (inst.lb > 0) {
2438
- // Positive Lookbehind
2439
- if (this.lbTable[inst.lb] === pos) {
2440
- t = this.add(q, inst.out, pos, cap, cond, t);
2473
+ pc = inst.arg; // Flattened tail recursion
2474
+ continue;
2475
+ case Inst.EMPTY_WIDTH:
2476
+ if ((inst.arg & ~cond) === 0) {
2477
+ pc = inst.out; // Flattened tail recursion
2478
+ continue;
2441
2479
  }
2442
- } else if (this.lbTable[-inst.lb] !== pos) {
2443
- // Negative Lookbehind
2444
- t = this.add(q, inst.out, pos, cap, cond, t);
2445
- }
2446
- break;
2447
- case Inst.MATCH:
2448
- case Inst.RUNE:
2449
- case Inst.RUNE1:
2450
- case Inst.RUNE_ANY:
2451
- case Inst.RUNE_ANY_NOT_NL:
2452
- if (t === null) {
2453
- t = this.alloc(inst);
2454
- } else {
2455
- t.inst = inst;
2456
- }
2457
- if (this.ncap > 0 && t.cap !== cap) {
2458
- // Direct assignment utilizing Typed Array performance
2459
- for (let c = 0; c < this.ncap; c++) {
2460
- t.cap[c] = cap[c];
2480
+ return t;
2481
+ case Inst.NOP:
2482
+ pc = inst.out; // Flattened tail recursion
2483
+ continue;
2484
+ case Inst.CAPTURE:
2485
+ if (inst.arg < this.ncap) {
2486
+ const opos = cap[inst.arg];
2487
+ cap[inst.arg] = pos;
2488
+ this.add(q, inst.out, pos, cap, cond, null);
2489
+ cap[inst.arg] = opos;
2490
+ return t;
2491
+ } else {
2492
+ pc = inst.out; // Flattened tail recursion
2493
+ continue;
2461
2494
  }
2462
- }
2463
- q.denseThreads[d] = t;
2464
- t = null;
2465
- break;
2466
- default:
2467
- throw new Error('unhandled');
2495
+ case Inst.LB_WRITE:
2496
+ this.lbTable[Math.abs(inst.lb)] = pos;
2497
+ pc = inst.out;
2498
+ continue;
2499
+ case Inst.LB_CHECK:
2500
+ if (inst.lb > 0) {
2501
+ // Positive Lookbehind
2502
+ if (this.lbTable[inst.lb] === pos) {
2503
+ pc = inst.out; // Flattened tail recursion
2504
+ continue;
2505
+ }
2506
+ } else if (this.lbTable[-inst.lb] !== pos) {
2507
+ // Negative Lookbehind
2508
+ pc = inst.out; // Flattened tail recursion
2509
+ continue;
2510
+ }
2511
+ return t;
2512
+ case Inst.MATCH:
2513
+ case Inst.RUNE:
2514
+ case Inst.RUNE1:
2515
+ case Inst.RUNE_ANY:
2516
+ case Inst.RUNE_ANY_NOT_NL:
2517
+ if (t === null) {
2518
+ t = this.alloc(inst);
2519
+ } else {
2520
+ t.inst = inst;
2521
+ }
2522
+ if (this.ncap > 0 && t.cap !== cap) {
2523
+ // Direct assignment utilizing Typed Array performance
2524
+ for (let c = 0; c < this.ncap; c++) {
2525
+ t.cap[c] = cap[c];
2526
+ }
2527
+ }
2528
+ q.denseThreads[d] = t;
2529
+ t = null;
2530
+ return t;
2531
+ default:
2532
+ throw new Error('unhandled');
2533
+ }
2468
2534
  }
2469
- return t;
2470
2535
  }
2471
2536
  }
2472
2537
 
@@ -2494,8 +2559,15 @@ class DFAState {
2494
2559
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2495
2560
  this.isMatch = isMatch; // Boolean
2496
2561
  this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2497
- this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2498
- this.nextMap = new Map(); // Cache of Char -> DFAState
2562
+
2563
+ // Latin-1 (Unicode.MAX_LATIN1 + 1) flat arrays for blisteringly fast O(1) lookups
2564
+ // completely covering standard English, European languages, and 1-byte encodings.
2565
+ this.nextLatin1 = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2566
+ this.nextLatin1Anchored = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2567
+ // 2 arrays used as hash map for V8 optimization (N is small number, so O(n) faster than Map O(1))
2568
+ this.transKeys = [];
2569
+ this.transVals = [];
2570
+ this.lastSeen = 0; // Track when this state was last used for LRU eviction
2499
2571
  }
2500
2572
  }
2501
2573
  class DFA {
@@ -2508,6 +2580,7 @@ class DFA {
2508
2580
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2509
2581
  this.cacheClears = 0; // Track thrashing
2510
2582
  this.failed = false; // mark if DFA cannot work with provided prog
2583
+ this.clock = 0; // Global clock for LRU eviction
2511
2584
  }
2512
2585
 
2513
2586
  // Follows epsilon (empty) transitions to find all reachable states without consuming a char
@@ -2567,6 +2640,7 @@ class DFA {
2567
2640
  for (let i = 0; i < bucket.length; i++) {
2568
2641
  const state = bucket[i];
2569
2642
  if (arraysEqual(state.nfaStates, sortedPCs)) {
2643
+ state.lastSeen = ++this.clock;
2570
2644
  return state;
2571
2645
  }
2572
2646
  }
@@ -2579,40 +2653,99 @@ class DFA {
2579
2653
  if (this.failed) return null;
2580
2654
 
2581
2655
  // Safety: prevent memory exhaustion from state explosion
2582
- // We flush the cache and return null, which seamlessly routes execution to the NFA
2656
+ // We prune the cache to keep the newest 50%
2583
2657
  if (this.stateCount >= this.stateLimit) {
2584
- this.stateCache.clear();
2585
- this.stateCount = 0;
2586
- this.startState = null;
2587
2658
  this.cacheClears++;
2588
2659
 
2589
2660
  // If this regex causes continuous cache thrashing, permanently fall back to NFA
2590
2661
  // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2591
2662
  if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2592
2663
  this.failed = true;
2664
+ this.stateCache.clear();
2665
+ this.stateCount = 0;
2666
+ this.startState = null;
2667
+ return null;
2668
+ }
2669
+ this.evictCache();
2670
+
2671
+ // After eviction, the bucket reference might be stale or empty.
2672
+ // We must re-fetch or re-create the bucket.
2673
+ bucket = this.stateCache.get(hash);
2674
+ if (!bucket) {
2675
+ bucket = [];
2676
+ this.stateCache.set(hash, bucket);
2593
2677
  }
2594
- return null;
2595
2678
  }
2596
2679
 
2597
2680
  // State not found, create it and add to bucket
2598
2681
  const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2682
+ state.lastSeen = ++this.clock;
2599
2683
  bucket.push(state);
2600
2684
  this.stateCount++;
2601
2685
  return state;
2602
2686
  }
2687
+ evictCache() {
2688
+ const allStates = [];
2689
+ for (const bucket of this.stateCache.values()) {
2690
+ for (let i = 0; i < bucket.length; i++) {
2691
+ allStates.push(bucket[i]);
2692
+ }
2693
+ }
2694
+
2695
+ // Sort ascending by lastSeen (oldest first)
2696
+ allStates.sort((a, b) => a.lastSeen - b.lastSeen);
2697
+
2698
+ // Keep the newest 50%
2699
+ const keepCount = Math.max(1, Math.floor(this.stateLimit / 2));
2700
+ const startIndex = allStates.length - keepCount;
2701
+ const survivorsArray = allStates.slice(startIndex);
2702
+ const survivors = new Set(survivorsArray);
2703
+ this.stateCache.clear();
2704
+ this.stateCount = 0;
2705
+ for (let i = 0; i < survivorsArray.length; i++) {
2706
+ const state = survivorsArray[i];
2707
+
2708
+ // Sever ties to all states to prevent memory leaks and dangling pointers
2709
+ state.nextLatin1.fill(null);
2710
+ state.nextLatin1Anchored.fill(null);
2711
+ // zero-allocation cleanup
2712
+ state.transKeys.length = 0;
2713
+ state.transVals.length = 0;
2714
+ const hash = hashPCs(state.nfaStates);
2715
+ let bucket = this.stateCache.get(hash);
2716
+ if (!bucket) {
2717
+ bucket = [];
2718
+ this.stateCache.set(hash, bucket);
2719
+ }
2720
+ bucket.push(state);
2721
+ this.stateCount++;
2722
+ }
2723
+
2724
+ // Start state must either be preserved or nullified so it gets re-created
2725
+ if (this.startState && !survivors.has(this.startState)) {
2726
+ this.startState = null;
2727
+ }
2728
+ }
2603
2729
 
2604
2730
  // Compute the next DFA state given a current state and a character
2605
2731
  step(state, charCode, anchor) {
2606
- // OPTIMIZATION: ASCII Fast-Path
2607
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2608
- const next = state.nextAscii[charCode];
2609
- if (next !== null) {
2610
- return next;
2732
+ // OPTIMIZATION: Latin-1 Array Fast-Path
2733
+ if (charCode <= Unicode.MAX_LATIN1) {
2734
+ if (anchor === RE2Flags.UNANCHORED) {
2735
+ const next = state.nextLatin1[charCode];
2736
+ if (next !== null) return next;
2737
+ } else {
2738
+ const next = state.nextLatin1Anchored[charCode];
2739
+ if (next !== null) return next;
2611
2740
  }
2612
2741
  } else {
2742
+ // Dense Array Linear Search fallback for Runes > 255
2613
2743
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2614
- if (state.nextMap.has(key)) {
2615
- return state.nextMap.get(key);
2744
+ // get [key] -> nextState
2745
+ const keys = state.transKeys;
2746
+ const len = keys.length;
2747
+ for (let i = 0; i < len; i++) {
2748
+ if (keys[i] === key) return state.transVals[i];
2616
2749
  }
2617
2750
  }
2618
2751
  const nextPCs = [];
@@ -2629,11 +2762,17 @@ class DFA {
2629
2762
  const nextState = this.getState(nextPCs);
2630
2763
 
2631
2764
  // Cache the result
2632
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2633
- state.nextAscii[charCode] = nextState;
2765
+ if (charCode <= Unicode.MAX_LATIN1) {
2766
+ if (anchor === RE2Flags.UNANCHORED) {
2767
+ state.nextLatin1[charCode] = nextState;
2768
+ } else {
2769
+ state.nextLatin1Anchored[charCode] = nextState;
2770
+ }
2634
2771
  } else {
2635
2772
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2636
- state.nextMap.set(key, nextState);
2773
+ // store key -> nextState
2774
+ state.transKeys.push(key);
2775
+ state.transVals.push(nextState);
2637
2776
  }
2638
2777
  return nextState;
2639
2778
  }
@@ -2666,10 +2805,11 @@ class DFA {
2666
2805
  if (width === 0) {
2667
2806
  break;
2668
2807
  }
2669
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2808
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2670
2809
 
2671
2810
  // If we hit an unrecoverable DFA error or bailout, signal fallback
2672
2811
  if (currentState === null) return null;
2812
+ currentState.lastSeen = ++this.clock;
2673
2813
  if (currentState.isMatch) {
2674
2814
  if (anchor === RE2Flags.ANCHOR_BOTH) {
2675
2815
  if (i + width === endPos) return true;
@@ -2717,9 +2857,10 @@ class DFA {
2717
2857
  const rune = r >> 3;
2718
2858
  const width = r & 7;
2719
2859
  if (width === 0) break;
2720
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2860
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2721
2861
  if (currentState === null) return null; // Bailout to NFA
2722
2862
 
2863
+ currentState.lastSeen = ++this.clock;
2723
2864
  i += width;
2724
2865
  checkMatch(currentState, i);
2725
2866
  if (currentState.nfaStates.length === 0) {
@@ -2757,7 +2898,7 @@ class BitState {
2757
2898
  // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2758
2899
  const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2759
2900
  if (this.visited.length < visitedSize) {
2760
- this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2901
+ this.visited = new Uint32Array(visitedSize);
2761
2902
  } else {
2762
2903
  this.visited.fill(0, 0, visitedSize);
2763
2904
  }
@@ -2843,11 +2984,12 @@ class BitState {
2843
2984
  const outInst = re2.prog.getInst(inst.out);
2844
2985
  if (Inst.isRuneOp(outInst.op)) {
2845
2986
  this.push(re2, inst.arg, currentPos, false);
2846
- currentPc = inst.out;
2987
+ currentPc = inst.arg;
2988
+ currentPos = this.end;
2847
2989
  continue;
2848
2990
  }
2849
2991
  this.push(re2, inst.out, this.end, false);
2850
- currentPc = inst.arg;
2992
+ currentPc = inst.out;
2851
2993
  continue;
2852
2994
  }
2853
2995
  case Inst.RUNE:
@@ -2928,6 +3070,11 @@ class BitState {
2928
3070
  if (currentPos === this.end) return true;
2929
3071
  break;
2930
3072
  }
3073
+ case Inst.LB_WRITE:
3074
+ case Inst.LB_CHECK:
3075
+ {
3076
+ throw new RE2JSInternalException('Backtracker cannot evaluate Lookbehind instructions');
3077
+ }
2931
3078
  default:
2932
3079
  {
2933
3080
  throw new RE2JSInternalException('bad inst');
@@ -3364,6 +3511,10 @@ class OnePass {
3364
3511
  switch (inst.op) {
3365
3512
  case Inst.MATCH:
3366
3513
  {
3514
+ // Verify ANCHOR_BOTH constraint before accepting the match
3515
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) {
3516
+ return null;
3517
+ }
3367
3518
  matched = true;
3368
3519
  if (matchcap.length > 0) {
3369
3520
  matchcap[0] = 0;
@@ -3794,6 +3945,88 @@ class Regexp {
3794
3945
  }
3795
3946
  }
3796
3947
 
3948
+ // High-speed, single-pass Aho-Corasick string matcher optimized for V8.
3949
+ // Builds a trie with failure links to search for multiple prefixes simultaneously.
3950
+ class AhoCorasick {
3951
+ constructor(wordArrays) {
3952
+ this.next = [Object.create(null)];
3953
+ this.fail = [0];
3954
+ this.match = [false];
3955
+
3956
+ // Build Trie
3957
+ for (const word of wordArrays) {
3958
+ let node = 0;
3959
+ for (let i = 0; i < word.length; i++) {
3960
+ const val = word[i];
3961
+ if (!(val in this.next[node])) {
3962
+ this.next.push(Object.create(null));
3963
+ this.fail.push(0);
3964
+ this.match.push(false);
3965
+ this.next[node][val] = this.next.length - 1;
3966
+ }
3967
+ node = this.next[node][val];
3968
+ }
3969
+ this.match[node] = true;
3970
+ }
3971
+
3972
+ // Build Failure Links (BFS)
3973
+ const queue = [];
3974
+ for (const val in this.next[0]) {
3975
+ if (Object.prototype.hasOwnProperty.call(this.next[0], val)) {
3976
+ const child = this.next[0][val];
3977
+ this.fail[child] = 0;
3978
+ queue.push(child);
3979
+ }
3980
+ }
3981
+ while (queue.length > 0) {
3982
+ const curr = queue.shift();
3983
+ for (const val in this.next[curr]) {
3984
+ if (Object.prototype.hasOwnProperty.call(this.next[curr], val)) {
3985
+ const child = this.next[curr][val];
3986
+ let failNode = this.fail[curr];
3987
+ while (failNode !== 0 && !(val in this.next[failNode])) {
3988
+ failNode = this.fail[failNode];
3989
+ }
3990
+ if (val in this.next[failNode]) {
3991
+ this.fail[child] = this.next[failNode][val];
3992
+ } else {
3993
+ this.fail[child] = 0;
3994
+ }
3995
+ this.match[child] = this.match[child] || this.match[this.fail[child]];
3996
+ queue.push(child);
3997
+ }
3998
+ }
3999
+ }
4000
+ }
4001
+ searchUTF16(charSeq, start, end) {
4002
+ let node = 0;
4003
+ for (let i = start; i < end; i++) {
4004
+ const val = charSeq.charCodeAt(i);
4005
+ while (node !== 0 && !(val in this.next[node])) {
4006
+ node = this.fail[node];
4007
+ }
4008
+ if (val in this.next[node]) {
4009
+ node = this.next[node][val];
4010
+ }
4011
+ if (this.match[node]) return true;
4012
+ }
4013
+ return false;
4014
+ }
4015
+ searchUTF8(bytes, start, end) {
4016
+ let node = 0;
4017
+ for (let i = start; i < end; i++) {
4018
+ const val = bytes[i];
4019
+ while (node !== 0 && !(val in this.next[node])) {
4020
+ node = this.fail[node];
4021
+ }
4022
+ if (val in this.next[node]) {
4023
+ node = this.next[node][val];
4024
+ }
4025
+ if (this.match[node]) return true;
4026
+ }
4027
+ return false;
4028
+ }
4029
+ }
3797
4030
  class Prefilter {
3798
4031
  static Type = {
3799
4032
  NONE: 0,
@@ -3806,6 +4039,8 @@ class Prefilter {
3806
4039
  this.subs = [];
3807
4040
  this.str = '';
3808
4041
  this.bytes = null;
4042
+ this.ac16 = null;
4043
+ this.ac8 = null;
3809
4044
  }
3810
4045
  eval(input, pos) {
3811
4046
  switch (this.type) {
@@ -3819,6 +4054,10 @@ class Prefilter {
3819
4054
  }
3820
4055
  return true;
3821
4056
  case Prefilter.Type.OR:
4057
+ // Exploit Aho-Corasick if it was successfully built
4058
+ if (this.ac16 && this.ac8) {
4059
+ return input.hasAnyString(this, pos);
4060
+ }
3822
4061
  for (let i = 0; i < this.subs.length; i++) {
3823
4062
  if (this.subs[i].eval(input, pos)) return true;
3824
4063
  }
@@ -3909,7 +4148,9 @@ class PrefilterTree {
3909
4148
  const s = PrefilterTree.simplify(sub);
3910
4149
  if (s.type !== Prefilter.Type.NONE) {
3911
4150
  if (s.type === Prefilter.Type.AND) {
3912
- newSubs.push(...s.subs);
4151
+ for (let j = 0; j < s.subs.length; j++) {
4152
+ newSubs.push(s.subs[j]);
4153
+ }
3913
4154
  } else {
3914
4155
  newSubs.push(s);
3915
4156
  }
@@ -3951,6 +4192,27 @@ class PrefilterTree {
3951
4192
  }
3952
4193
  }
3953
4194
  pf.subs = uniqueSubs;
4195
+
4196
+ // Build an Aho-Corasick automaton if all children are exact matches
4197
+ let allExact = true;
4198
+ for (const sub of uniqueSubs) {
4199
+ if (sub.type !== Prefilter.Type.EXACT) {
4200
+ allExact = false;
4201
+ break;
4202
+ }
4203
+ }
4204
+ if (allExact && uniqueSubs.length > 1) {
4205
+ const words16 = uniqueSubs.map(s => {
4206
+ const arr = [];
4207
+ for (let i = 0; i < s.str.length; i++) {
4208
+ arr.push(s.str.charCodeAt(i));
4209
+ }
4210
+ return arr;
4211
+ });
4212
+ pf.ac16 = new AhoCorasick(words16);
4213
+ const words8 = uniqueSubs.map(s => s.bytes);
4214
+ pf.ac8 = new AhoCorasick(words8);
4215
+ }
3954
4216
  return pf;
3955
4217
  }
3956
4218
  return pf;
@@ -4477,7 +4739,9 @@ class Simplify {
4477
4739
  // Flatten nested concatenations
4478
4740
  if (nsub.op === Regexp.Op.CONCAT) {
4479
4741
  changed = true;
4480
- newSubs.push(...nsub.subs);
4742
+ for (let j = 0; j < nsub.subs.length; j++) {
4743
+ newSubs.push(nsub.subs[j]);
4744
+ }
4481
4745
  continue;
4482
4746
  }
4483
4747
  } else if (re.op === Regexp.Op.ALTERNATE) {
@@ -4489,7 +4753,9 @@ class Simplify {
4489
4753
  // Flatten nested alternations
4490
4754
  if (nsub.op === Regexp.Op.ALTERNATE) {
4491
4755
  changed = true;
4492
- newSubs.push(...nsub.subs);
4756
+ for (let j = 0; j < nsub.subs.length; j++) {
4757
+ newSubs.push(nsub.subs[j]);
4758
+ }
4493
4759
  continue;
4494
4760
  }
4495
4761
  }
@@ -5499,7 +5765,10 @@ class Parser {
5499
5765
  return t.pop();
5500
5766
  }
5501
5767
  static concatRunes(x, y) {
5502
- return [...x, ...y];
5768
+ for (let i = 0; i < y.length; i++) {
5769
+ x.push(y[i]);
5770
+ }
5771
+ return x;
5503
5772
  }
5504
5773
  constructor(wholeRegexp, flags = 0) {
5505
5774
  this.wholeRegexp = wholeRegexp;
@@ -5535,8 +5804,8 @@ class Parser {
5535
5804
  return re;
5536
5805
  }
5537
5806
  reuse(re) {
5538
- if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
5539
- delete this.height[re];
5807
+ if (this.height !== null && this.height.has(re)) {
5808
+ this.height.delete(re);
5540
5809
  }
5541
5810
  if (re.subs !== null && re.subs.length > 0) {
5542
5811
  re.subs[0] = this.free;
@@ -5568,20 +5837,20 @@ class Parser {
5568
5837
  if (n <= 0) {
5569
5838
  n = 1;
5570
5839
  }
5571
- if (n > Parser.MAX_SIZE / this.repeats) {
5840
+ if (n > Math.floor(Parser.MAX_SIZE / this.repeats)) {
5572
5841
  this.repeats = Parser.MAX_SIZE;
5573
5842
  } else {
5574
5843
  this.repeats *= n;
5575
5844
  }
5576
5845
  }
5577
- if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
5846
+ if (this.numRegexp < Math.floor(Parser.MAX_SIZE / this.repeats)) {
5578
5847
  return;
5579
5848
  }
5580
5849
 
5581
5850
  // We need to start tracking size.
5582
5851
  // Make the map and belatedly populate it
5583
5852
  // with info about everything we've constructed so far.
5584
- this.size = Object.create(null);
5853
+ this.size = new Map();
5585
5854
  for (let reEx of this.stack) {
5586
5855
  this.checkSize(reEx);
5587
5856
  }
@@ -5592,8 +5861,8 @@ class Parser {
5592
5861
  }
5593
5862
  calcSize(re, force = false) {
5594
5863
  if (!force && this.size !== null) {
5595
- if (Object.prototype.hasOwnProperty.call(this.size, re)) {
5596
- return this.size[re];
5864
+ if (this.size.has(re)) {
5865
+ return this.size.get(re);
5597
5866
  }
5598
5867
  }
5599
5868
  let size = 0;
@@ -5653,9 +5922,9 @@ class Parser {
5653
5922
  }
5654
5923
  size = Math.max(1, size);
5655
5924
  if (this.size === null) {
5656
- this.size = Object.create(null);
5925
+ this.size = new Map();
5657
5926
  }
5658
- this.size[re] = size;
5927
+ this.size.set(re, size);
5659
5928
  return size;
5660
5929
  }
5661
5930
  checkHeight(re) {
@@ -5663,7 +5932,7 @@ class Parser {
5663
5932
  return;
5664
5933
  }
5665
5934
  if (this.height === null) {
5666
- this.height = Object.create(null);
5935
+ this.height = new Map();
5667
5936
  for (let reEx of this.stack) {
5668
5937
  this.checkHeight(reEx);
5669
5938
  }
@@ -5674,8 +5943,8 @@ class Parser {
5674
5943
  }
5675
5944
  calcHeight(re, force = false) {
5676
5945
  if (!force && this.height !== null) {
5677
- if (Object.prototype.hasOwnProperty.call(this.height, re)) {
5678
- return this.height[re];
5946
+ if (this.height.has(re)) {
5947
+ return this.height.get(re);
5679
5948
  }
5680
5949
  }
5681
5950
  let h = 1;
@@ -5686,9 +5955,9 @@ class Parser {
5686
5955
  }
5687
5956
  }
5688
5957
  if (this.height === null) {
5689
- this.height = Object.create(null);
5958
+ this.height = new Map();
5690
5959
  }
5691
- this.height[re] = h;
5960
+ this.height.set(re, h);
5692
5961
  return h;
5693
5962
  }
5694
5963
 
@@ -7810,7 +8079,7 @@ class TranslateRegExpString {
7810
8079
  changed = true;
7811
8080
  continue;
7812
8081
  } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
7813
- if (i + 3 >= size || data[i + 3] !== '=' && data[i + 3] !== '!') {
8082
+ if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
7814
8083
  result += '(?P<';
7815
8084
  i += 3;
7816
8085
  changed = true;