re2js 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.0
6
- * @author Alexey Vasiliev
5
+ * @version v2.2.2
6
+ * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
9
9
  * @license MIT
@@ -76,6 +76,8 @@ const PublicFlags = {
76
76
  const ASCII_SIZE = 128;
77
77
  const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
78
78
  const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
79
+ // The highest legal Basic Multilingual Plane (BMP) value.
80
+ const MAX_BMP = 0xffff;
79
81
  for (let i = 0; i < ASCII_SIZE; i++) {
80
82
  if (i >= 97 && i <= 122) {
81
83
  // a-z
@@ -99,11 +101,13 @@ class Codepoint {
99
101
  static toUpperCase(codepoint) {
100
102
  if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
101
103
  const s = String.fromCodePoint(codepoint).toUpperCase();
102
- if (s.length > 1) {
104
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
105
+ if (s.length > expectedLen) {
103
106
  return codepoint;
104
107
  }
105
108
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toLowerCase();
106
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
109
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
110
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
107
111
  return codepoint;
108
112
  }
109
113
  return s.codePointAt(0);
@@ -114,11 +118,13 @@ class Codepoint {
114
118
  static toLowerCase(codepoint) {
115
119
  if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
116
120
  const s = String.fromCodePoint(codepoint).toLowerCase();
117
- if (s.length > 1) {
121
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
122
+ if (s.length > expectedLen) {
118
123
  return codepoint;
119
124
  }
120
125
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toUpperCase();
121
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
126
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
127
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
122
128
  return codepoint;
123
129
  }
124
130
  return s.codePointAt(0);
@@ -220,7 +226,7 @@ class UnicodeTables {
220
226
  static _CASE_ORBIT = null;
221
227
  static get CASE_ORBIT() {
222
228
  if (!this._CASE_ORBIT) {
223
- this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B6nCohhCBphhCBqhhCBrhhCBshhCBthhCBuhhCBvhhCBwhhCBxhhCByhhCBzhhCB0hhCB1hhCB2hhCB3hhCB4hhCB5hhCB6hhCB7hhCB8hhCB9hhCB+hhCB-hhCBgihCBhihCBiihCBjihCBkihCBlihCBmihCBnihCBoihCBpihCBqihCBrihCBsihCBtihCBuihCBvihCBgghCBhghCBighCBjghCBkghCBlghCBmghCBnghCBoghCBpghCBqghCBrghCBsghCBtghCBughCBvghCBwghCBxghCByghCBzghCB0ghCB1ghCB2ghCB3ghCB4ghCB5ghCB6ghCB7ghCB8ghCB9ghCB+ghCB-ghCBghhCBhhhCBihhCBjhhCBkhhCBlhhCBmhhCBnhhChD4mhCB5mhCB6mhCB7mhCB8mhCB9mhCB+mhCB-mhCBgnhCBhnhCBinhCBjnhCBknhCBlnhCBmnhCBnnhCBonhCBpnhCBqnhCBrnhCBsnhCBtnhCBunhCBvnhCBwnhCBxnhCBynhCBznhCB0nhCB1nhCB2nhCB3nhCB4nhCB5nhCB6nhCB7nhCFwlhCBxlhCBylhCBzlhCB0lhCB1lhCB2lhCB3lhCB4lhCB5lhCB6lhCB7lhCB8lhCB9lhCB+lhCB-lhCBgmhCBhmhCBimhCBjmhCBkmhCBlmhCBmmhCBnmhCBomhCBpmhCBqmhCBrmhCBsmhCBtmhCBumhCBvmhCBwmhCBxmhCBymhCBzmhC1D3shCB4shCB5shCB6shCB7shCB8shCB9shCB+shCB-shCBgthCBhthCCjthCBkthCBlthCBmthCBnthCBothCBpthCBqthCBrthCBsthCBtthCButhCBvthCBwthCBxthCCzthCB0thCB1thCB2thCB3thCB4thCB5thCC7thCB8thCCwrhCBxrhCByrhCBzrhCB0rhCB1rhCB2rhCB3rhCB4rhCB5rhCB6rhCC8rhCB9rhCB+rhCB-rhCBgshCBhshCBishCBjshCBkshCBlshCBmshCBnshCBoshCBpshCBqshCCsshCBtshCBushCBvshCBwshCBxshCByshCC0shCB1shCk2BgmjCBhmjCBimjCBjmjCBkmjCBlmjCBmmjCBnmjCBomjCBpmjCBqmjCBrmjCBsmjCBtmjCBumjCBvmjCBwmjCBxmjCBymjCBzmjCB0mjCB1mjCB2mjCB3mjCB4mjCB5mjCB6mjCB7mjCB8mjCB9mjCB+mjCB-mjCBgnjCBhnjCBinjCBjnjCBknjCBlnjCBmnjCBnnjCBonjCBpnjCBqnjCBrnjCBsnjCBtnjCBunjCBvnjCBwnjCBxnjCBynjCOgkjCBhkjCBikjCBjkjCBkkjCBlkjCBmkjCBnkjCBokjCBpkjCBqkjCBrkjCBskjCBtkjCBukjCBvkjCBwkjCBxkjCBykjCBzkjCB0kjCB1kjCB2kjCB3kjCB4kjCB5kjCB6kjCB7kjCB8kjCB9kjCB+kjCB-kjCBgljCBhljCBiljCBjljCBkljCBlljCBmljCBnljCBoljCBpljCBqljCBrljCBsljCBtljCBuljCBvljCBwljCBxljCByljC+CwrjCBxrjCByrjCBzrjCB0rjCB1rjCB2rjCB3rjCB4rjCB5rjCB6rjCB7rjCB8rjCB9rjCB+rjCB-rjCBgsjCBhsjCBisjCBjsjCBksjCBlsjCLwqjCBxqjCByqjCBzqjCB0qjCB1qjCB2qjCB3qjCB4qjCB5qjCB6qjCB7qjCB8qjCB9qjCB+qjCB-qjCBgrjCBhrjCBirjCBjrjCBkrjCBlrjC74CgmmCBhmmCBimmCBjmmCBkmmCBlmmCBmmmCBnmmCBommCBpmmCBqmmCBrmmCBsmmCBtmmCBummCBvmmCBwmmCBxmmCBymmCBzmmCB0mmCB1mmCB2mmCB3mmCB4mmCB5mmCB6mmCB7mmCB8mmCB9mmCB+mmCB-mmCBglmCBhlmCBilmCBjlmCBklmCBllmCBmlmCBnlmCBolmCBplmCBqlmCBrlmCBslmCBtlmCBulmCBvlmCBwlmCBxlmCBylmCBzlmCB0lmCB1lmCB2lmCB3lmCB4lmCB5lmCB6lmCB7lmCB8lmCB9lmCB+lmCB-lmChrVgz7CBhz7CBiz7CBjz7CBkz7CBlz7CBmz7CBnz7CBoz7CBpz7CBqz7CBrz7CBsz7CBtz7CBuz7CBvz7CBwz7CBxz7CByz7CBzz7CB0z7CB1z7CB2z7CB3z7CB4z7CB5z7CB6z7CB7z7CB8z7CB9z7CB+z7CB-z7CBgy7CBhy7CBiy7CBjy7CBky7CBly7CBmy7CBny7CBoy7CBpy7CBqy7CBry7CBsy7CBty7CBuy7CBvy7CBwy7CBxy7CByy7CBzy7CB0y7CB1y7CB2y7CB3y7CB4y7CB5y7CB6y7CB7y7CB8y7CB9y7CB+y7CB-y7ChB717CB817CB917CB+17CB-17CBg27CBh27CBi27CBj27CBk27CBl27CBm27CBn27CBo27CBp27CBq27CBr27CBs27CBt27CBu27CBv27CBw27CBx27CBy27CBz27CDg17CBh17CBi17CBj17CBk17CBl17CBm17CBn17CBo17CBp17CBq17CBr17CBs17CBt17CBu17CBv17CBw17CBx17CBy17CBz17CB017CB117CB217CB317CB417Ctxeip6DBjp6DBkp6DBlp6DBmp6DBnp6DBop6DBpp6DBqp6DBrp6DBsp6DBtp6DBup6DBvp6DBwp6DBxp6DByp6DBzp6DB0p6DB1p6DB2p6DB3p6DB4p6DB5p6DB6p6DB7p6DB8p6DB9p6DB+p6DB-p6DBgq6DBhq6DBiq6DBjq6DBgo6DBho6DBio6DBjo6DBko6DBlo6DBmo6DBno6DBoo6DBpo6DBqo6DBro6DBso6DBto6DBuo6DBvo6DBwo6DBxo6DByo6DBzo6DB0o6DB1o6DB2o6DB3o6DB4o6DB5o6DB6o6DB7o6DB8o6DB9o6DB+o6DB-o6DBgp6DBhp6D');
229
+ this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B');
224
230
  }
225
231
  return this._CASE_ORBIT;
226
232
  }
@@ -592,11 +598,16 @@ class Unicode {
592
598
  // to compare it to |r2|.
593
599
  // -1 is interpreted as the end-of-file mark.
594
600
  static equalsIgnoreCase(r1, r2) {
595
- // Runes already match, or one of them is EOF
596
- if (r1 < 0 || r2 < 0 || r1 === r2) {
601
+ // Runes already match
602
+ if (r1 === r2) {
597
603
  return true;
598
604
  }
599
605
 
606
+ // Safely fail if either is EOF (and they didn't explicitly match above)
607
+ if (r1 < 0 || r2 < 0) {
608
+ return false;
609
+ }
610
+
600
611
  // Fast path for the common case where both runes are ASCII characters.
601
612
  // Coerces both runes to lowercase if applicable.
602
613
  if (r1 <= this.MAX_ASCII && r2 <= this.MAX_ASCII) {
@@ -849,7 +860,7 @@ class Utils {
849
860
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
850
861
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
851
862
  const createEnum = (values = [], initNum = 0) => {
852
- const enumObject = {};
863
+ const enumObject = Object.create(null);
853
864
  for (let i = 0; i < values.length; i++) {
854
865
  const val = values[i];
855
866
  const keyVal = initNum + i;
@@ -991,6 +1002,9 @@ class MachineInputBase {
991
1002
  hasString() {
992
1003
  return false;
993
1004
  }
1005
+ hasAnyString() {
1006
+ return false;
1007
+ }
994
1008
 
995
1009
  // Helper for the exact-literal fast-path execution router
996
1010
  prefixLength() {
@@ -1016,6 +1030,13 @@ class MachineUTF8Input extends MachineInputBase {
1016
1030
  return idx !== -1 && idx <= this.end - target.length;
1017
1031
  }
1018
1032
 
1033
+ // Executes a high-speed, single - pass search for multiple literal strings
1034
+ // simultaneously using an Aho-Corasick automaton.
1035
+ hasAnyString(prefilter, pos) {
1036
+ if (!prefilter.ac8) return false;
1037
+ return prefilter.ac8.searchUTF8(this.bytes, this.start + pos, this.end);
1038
+ }
1039
+
1019
1040
  // Returns the rune at the specified index; the units are
1020
1041
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1021
1042
  // indices. Returns the width (in the same units) of the rune in
@@ -1034,17 +1055,23 @@ class MachineUTF8Input extends MachineInputBase {
1034
1055
  return c << 3 | 1;
1035
1056
  } else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
1036
1057
  const c1 = this.bytes[pos + 1] & 0xff;
1058
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1037
1059
  const rune = (c & 0x1f) << 6 | c1 & 0x3f;
1038
1060
  return rune << 3 | 2;
1039
1061
  } else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
1040
1062
  const c1 = this.bytes[pos + 1] & 0xff;
1063
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1041
1064
  const c2 = this.bytes[pos + 2] & 0xff;
1065
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1042
1066
  const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
1043
1067
  return rune << 3 | 3;
1044
1068
  } else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
1045
1069
  const c1 = this.bytes[pos + 1] & 0xff;
1070
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1046
1071
  const c2 = this.bytes[pos + 2] & 0xff;
1072
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1047
1073
  const c3 = this.bytes[pos + 3] & 0xff;
1074
+ if ((c3 & 0xc0) !== 0x80) return c << 3 | 1;
1048
1075
  const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
1049
1076
  return rune << 3 | 4;
1050
1077
  } else {
@@ -1123,6 +1150,13 @@ class MachineUTF16Input extends MachineInputBase {
1123
1150
  return idx !== -1 && idx <= this.end - prefilter.str.length;
1124
1151
  }
1125
1152
 
1153
+ // Executes a high-speed, single - pass search for multiple literal strings
1154
+ // simultaneously using an Aho-Corasick automaton.
1155
+ hasAnyString(prefilter, pos) {
1156
+ if (!prefilter.ac16) return false;
1157
+ return prefilter.ac16.searchUTF16(this.charSequence, this.start + pos, this.end);
1158
+ }
1159
+
1126
1160
  // Returns the rune at the specified index; the units are
1127
1161
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1128
1162
  // indices. Returns the width (in the same units) of the rune in
@@ -1568,7 +1602,15 @@ class Matcher {
1568
1602
  if (this.hasMatch) {
1569
1603
  start = this.groups[1];
1570
1604
  if (this.groups[0] === this.groups[1]) {
1571
- start++;
1605
+ // Safely calculate structural encoding width to avoid sequence corruption
1606
+ const machineInput = this.matcherInput.isUTF16Encoding() ? MachineInput.fromUTF16(this.matcherInput.asCharSequence(), 0, this.matcherInputLength) : MachineInput.fromUTF8(this.matcherInput.asBytes(), 0, this.matcherInputLength);
1607
+ const r = machineInput.step(start);
1608
+ if (r < 0) {
1609
+ // EOF
1610
+ start++; // Advance past length to force loop exit
1611
+ } else {
1612
+ start += r & 7; // Advance by safely decoded width
1613
+ }
1572
1614
  }
1573
1615
  }
1574
1616
  return this.genMatch(start, RE2Flags.UNANCHORED);
@@ -1707,6 +1749,8 @@ class Matcher {
1707
1749
  const groupName = replacement.substring(i + 1, j);
1708
1750
  res += this.group(groupName);
1709
1751
  last = j + 1;
1752
+ i = j;
1753
+ continue;
1710
1754
  }
1711
1755
  }
1712
1756
  }
@@ -1786,6 +1830,7 @@ class Matcher {
1786
1830
  if (j === replacement.length || replacement.codePointAt(j) !== Codepoint.CODES.get('>')) {
1787
1831
  res += replacement.substring(i - 1, j + 1);
1788
1832
  last = j + 1;
1833
+ i = j;
1789
1834
  continue;
1790
1835
  }
1791
1836
  const groupName = replacement.substring(i + 1, j);
@@ -1795,6 +1840,8 @@ class Matcher {
1795
1840
  res += `$<${groupName}>`;
1796
1841
  }
1797
1842
  last = j + 1;
1843
+ i = j;
1844
+ continue;
1798
1845
  }
1799
1846
  }
1800
1847
  }
@@ -1919,6 +1966,8 @@ class Inst {
1919
1966
  return r === r0;
1920
1967
  }
1921
1968
  const len = this.runes.length;
1969
+ if (len === 0) return false;
1970
+
1922
1971
  // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1923
1972
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1924
1973
  for (let j = 0; j < len; j += 2) {
@@ -1932,22 +1981,19 @@ class Inst {
1932
1981
  return false; // Stop here
1933
1982
  }
1934
1983
 
1935
- // Otherwise binary search.
1936
- let lo = 0;
1937
- let hi = this.runes.length / 2 | 0;
1938
- while (lo < hi) {
1939
- const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
1940
- const c = this.runes[2 * m];
1941
- if (c <= r) {
1942
- if (r <= this.runes[2 * m + 1]) {
1943
- return true;
1944
- }
1945
- lo = m + 1;
1946
- } else {
1947
- hi = m;
1948
- }
1984
+ // Branchless Binary Search (Lower Bound)
1985
+ // Compiles to optimal conditional move (cmov) machine code, preventing
1986
+ // branch mispredictions on large, chaotic Unicode arrays
1987
+ let base = 0;
1988
+ let n = len >> 1;
1989
+ while (n > 1) {
1990
+ const half = n >> 1;
1991
+ base += this.runes[base + half << 1] <= r ? half : 0;
1992
+ n -= half;
1949
1993
  }
1950
- return false;
1994
+ base += this.runes[base << 1] <= r ? 1 : 0;
1995
+ const m = base - 1;
1996
+ return m >= 0 && r <= this.runes[m << 1 | 1];
1951
1997
  }
1952
1998
 
1953
1999
  // matchRunePos checks whether the instruction matches (and consumes) r.
@@ -1962,6 +2008,7 @@ class Inst {
1962
2008
  return r === r0 ? 0 : -1;
1963
2009
  }
1964
2010
  const len = this.runes.length;
2011
+ if (len === 0) return -1;
1965
2012
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1966
2013
  for (let j = 0; j < len; j += 2) {
1967
2014
  if (r < this.runes[j]) return -1;
@@ -1969,19 +2016,18 @@ class Inst {
1969
2016
  }
1970
2017
  return -1;
1971
2018
  }
1972
- let lo = 0;
1973
- let hi = Math.floor(len / 2);
1974
- while (lo < hi) {
1975
- const m = lo + hi >> 1;
1976
- const c = this.runes[2 * m];
1977
- if (c <= r) {
1978
- if (r <= this.runes[2 * m + 1]) return m;
1979
- lo = m + 1;
1980
- } else {
1981
- hi = m;
1982
- }
2019
+
2020
+ // Branchless Binary Search (Lower Bound)
2021
+ let base = 0;
2022
+ let n = len >> 1;
2023
+ while (n > 1) {
2024
+ const half = n >> 1;
2025
+ base += this.runes[base + half << 1] <= r ? half : 0;
2026
+ n -= half;
1983
2027
  }
1984
- return -1;
2028
+ base += this.runes[base << 1] <= r ? 1 : 0;
2029
+ const m = base - 1;
2030
+ return m >= 0 && r <= this.runes[m << 1 | 1] ? m : -1;
1985
2031
  }
1986
2032
  /**
1987
2033
  *
@@ -2080,6 +2126,7 @@ class Queue {
2080
2126
  //
2081
2127
  // Called by RE2.doExecute.
2082
2128
  class Machine {
2129
+ static THREADS_CHUNK_SIZE = 128;
2083
2130
  static fromRE2(re2) {
2084
2131
  const m = new Machine();
2085
2132
  m.prog = re2.prog;
@@ -2120,15 +2167,15 @@ class Machine {
2120
2167
  resetCap() {
2121
2168
  for (let i = 0; i < this.poolSize; i++) {
2122
2169
  const t = this.pool[i];
2123
- t.cap.fill(0);
2170
+ t.cap.fill(-1);
2124
2171
  }
2125
2172
  }
2126
2173
  initNewCap(ncap) {
2127
2174
  for (let i = 0; i < this.poolSize; i++) {
2128
2175
  const t = this.pool[i];
2129
- t.cap = new Int32Array(ncap);
2176
+ t.cap = new Int32Array(ncap).fill(-1);
2130
2177
  }
2131
- this.matchcap = new Int32Array(ncap);
2178
+ this.matchcap = new Int32Array(ncap).fill(-1);
2132
2179
  }
2133
2180
  submatches() {
2134
2181
  if (this.ncap === 0) {
@@ -2141,14 +2188,21 @@ class Machine {
2141
2188
  // alloc() allocates a new thread with the given instruction.
2142
2189
  // It uses the free pool if possible.
2143
2190
  alloc(inst) {
2144
- let t;
2145
- if (this.poolSize > 0) {
2146
- this.poolSize--;
2147
- t = this.pool[this.poolSize];
2148
- } else {
2149
- t = new Thread();
2150
- t.cap = new Int32Array(this.matchcap.length);
2191
+ if (this.poolSize === 0) {
2192
+ const capLen = this.matchcap.length;
2193
+
2194
+ // Bulk allocate threads in a tight loop so the V8 engine
2195
+ // places them adjacently in the young generation heap
2196
+ for (let i = 0; i < Machine.THREADS_CHUNK_SIZE; i++) {
2197
+ const t = new Thread();
2198
+ t.cap = new Int32Array(capLen);
2199
+ this.pool[this.poolSize++] = t;
2200
+ }
2151
2201
  }
2202
+
2203
+ // Pop a thread from the top of the pool stack
2204
+ this.poolSize--;
2205
+ const t = this.pool[this.poolSize];
2152
2206
  t.inst = inst;
2153
2207
  return t;
2154
2208
  }
@@ -2201,6 +2255,9 @@ class Machine {
2201
2255
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2202
2256
  break;
2203
2257
  }
2258
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2259
+ break;
2260
+ }
2204
2261
  if (this.matched) {
2205
2262
  break;
2206
2263
  }
@@ -2278,6 +2335,9 @@ class Machine {
2278
2335
  while (true) {
2279
2336
  if (runq.isEmpty()) {
2280
2337
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2338
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2339
+ break;
2340
+ }
2281
2341
  }
2282
2342
  if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2283
2343
  // Spawn Lookbehind threads BEFORE the main pattern
@@ -2393,78 +2453,83 @@ class Machine {
2393
2453
  runq.clear();
2394
2454
  }
2395
2455
  add(q, pc, pos, cap, cond, t) {
2396
- if (pc === 0) {
2397
- return t;
2398
- }
2399
- if (q.contains(pc)) {
2400
- return t;
2401
- }
2402
- const d = q.add(pc);
2403
- const inst = this.prog.inst[pc];
2404
- switch (inst.op) {
2405
- case Inst.FAIL:
2406
- break;
2407
- case Inst.ALT:
2408
- case Inst.ALT_MATCH:
2409
- t = this.add(q, inst.out, pos, cap, cond, t);
2410
- t = this.add(q, inst.arg, pos, cap, cond, t);
2411
- break;
2412
- case Inst.EMPTY_WIDTH:
2413
- if ((inst.arg & ~cond) === 0) {
2414
- t = this.add(q, inst.out, pos, cap, cond, t);
2415
- }
2416
- break;
2417
- case Inst.NOP:
2418
- t = this.add(q, inst.out, pos, cap, cond, t);
2419
- break;
2420
- case Inst.CAPTURE:
2421
- if (inst.arg < this.ncap) {
2422
- const opos = cap[inst.arg];
2423
- cap[inst.arg] = pos;
2424
- this.add(q, inst.out, pos, cap, cond, null);
2425
- cap[inst.arg] = opos;
2426
- } else {
2456
+ while (true) {
2457
+ if (pc === 0) {
2458
+ return t;
2459
+ }
2460
+ if (q.contains(pc)) {
2461
+ return t;
2462
+ }
2463
+ const d = q.add(pc);
2464
+ const inst = this.prog.inst[pc];
2465
+ switch (inst.op) {
2466
+ case Inst.FAIL:
2467
+ return t;
2468
+ case Inst.ALT:
2469
+ case Inst.ALT_MATCH:
2427
2470
  t = this.add(q, inst.out, pos, cap, cond, t);
2428
- }
2429
- break;
2430
- case Inst.LB_WRITE:
2431
- this.lbTable[Math.abs(inst.lb)] = pos;
2432
- t = this.add(q, inst.out, pos, cap, cond, t);
2433
- break;
2434
- case Inst.LB_CHECK:
2435
- if (inst.lb > 0) {
2436
- // Positive Lookbehind
2437
- if (this.lbTable[inst.lb] === pos) {
2438
- t = this.add(q, inst.out, pos, cap, cond, t);
2471
+ pc = inst.arg; // Flattened tail recursion
2472
+ continue;
2473
+ case Inst.EMPTY_WIDTH:
2474
+ if ((inst.arg & ~cond) === 0) {
2475
+ pc = inst.out; // Flattened tail recursion
2476
+ continue;
2439
2477
  }
2440
- } else if (this.lbTable[-inst.lb] !== pos) {
2441
- // Negative Lookbehind
2442
- t = this.add(q, inst.out, pos, cap, cond, t);
2443
- }
2444
- break;
2445
- case Inst.MATCH:
2446
- case Inst.RUNE:
2447
- case Inst.RUNE1:
2448
- case Inst.RUNE_ANY:
2449
- case Inst.RUNE_ANY_NOT_NL:
2450
- if (t === null) {
2451
- t = this.alloc(inst);
2452
- } else {
2453
- t.inst = inst;
2454
- }
2455
- if (this.ncap > 0 && t.cap !== cap) {
2456
- // Direct assignment utilizing Typed Array performance
2457
- for (let c = 0; c < this.ncap; c++) {
2458
- t.cap[c] = cap[c];
2478
+ return t;
2479
+ case Inst.NOP:
2480
+ pc = inst.out; // Flattened tail recursion
2481
+ continue;
2482
+ case Inst.CAPTURE:
2483
+ if (inst.arg < this.ncap) {
2484
+ const opos = cap[inst.arg];
2485
+ cap[inst.arg] = pos;
2486
+ this.add(q, inst.out, pos, cap, cond, null);
2487
+ cap[inst.arg] = opos;
2488
+ return t;
2489
+ } else {
2490
+ pc = inst.out; // Flattened tail recursion
2491
+ continue;
2459
2492
  }
2460
- }
2461
- q.denseThreads[d] = t;
2462
- t = null;
2463
- break;
2464
- default:
2465
- throw new Error('unhandled');
2493
+ case Inst.LB_WRITE:
2494
+ this.lbTable[Math.abs(inst.lb)] = pos;
2495
+ pc = inst.out;
2496
+ continue;
2497
+ case Inst.LB_CHECK:
2498
+ if (inst.lb > 0) {
2499
+ // Positive Lookbehind
2500
+ if (this.lbTable[inst.lb] === pos) {
2501
+ pc = inst.out; // Flattened tail recursion
2502
+ continue;
2503
+ }
2504
+ } else if (this.lbTable[-inst.lb] !== pos) {
2505
+ // Negative Lookbehind
2506
+ pc = inst.out; // Flattened tail recursion
2507
+ continue;
2508
+ }
2509
+ return t;
2510
+ case Inst.MATCH:
2511
+ case Inst.RUNE:
2512
+ case Inst.RUNE1:
2513
+ case Inst.RUNE_ANY:
2514
+ case Inst.RUNE_ANY_NOT_NL:
2515
+ if (t === null) {
2516
+ t = this.alloc(inst);
2517
+ } else {
2518
+ t.inst = inst;
2519
+ }
2520
+ if (this.ncap > 0 && t.cap !== cap) {
2521
+ // Direct assignment utilizing Typed Array performance
2522
+ for (let c = 0; c < this.ncap; c++) {
2523
+ t.cap[c] = cap[c];
2524
+ }
2525
+ }
2526
+ q.denseThreads[d] = t;
2527
+ t = null;
2528
+ return t;
2529
+ default:
2530
+ throw new Error('unhandled');
2531
+ }
2466
2532
  }
2467
- return t;
2468
2533
  }
2469
2534
  }
2470
2535
 
@@ -2492,8 +2557,15 @@ class DFAState {
2492
2557
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2493
2558
  this.isMatch = isMatch; // Boolean
2494
2559
  this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2495
- this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2496
- this.nextMap = new Map(); // Cache of Char -> DFAState
2560
+
2561
+ // Latin-1 (Unicode.MAX_LATIN1 + 1) flat arrays for blisteringly fast O(1) lookups
2562
+ // completely covering standard English, European languages, and 1-byte encodings.
2563
+ this.nextLatin1 = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2564
+ this.nextLatin1Anchored = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2565
+ // 2 arrays used as hash map for V8 optimization (N is small number, so O(n) faster than Map O(1))
2566
+ this.transKeys = [];
2567
+ this.transVals = [];
2568
+ this.lastSeen = 0; // Track when this state was last used for LRU eviction
2497
2569
  }
2498
2570
  }
2499
2571
  class DFA {
@@ -2506,6 +2578,7 @@ class DFA {
2506
2578
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2507
2579
  this.cacheClears = 0; // Track thrashing
2508
2580
  this.failed = false; // mark if DFA cannot work with provided prog
2581
+ this.clock = 0; // Global clock for LRU eviction
2509
2582
  }
2510
2583
 
2511
2584
  // Follows epsilon (empty) transitions to find all reachable states without consuming a char
@@ -2565,6 +2638,7 @@ class DFA {
2565
2638
  for (let i = 0; i < bucket.length; i++) {
2566
2639
  const state = bucket[i];
2567
2640
  if (arraysEqual(state.nfaStates, sortedPCs)) {
2641
+ state.lastSeen = ++this.clock;
2568
2642
  return state;
2569
2643
  }
2570
2644
  }
@@ -2577,40 +2651,99 @@ class DFA {
2577
2651
  if (this.failed) return null;
2578
2652
 
2579
2653
  // Safety: prevent memory exhaustion from state explosion
2580
- // We flush the cache and return null, which seamlessly routes execution to the NFA
2654
+ // We prune the cache to keep the newest 50%
2581
2655
  if (this.stateCount >= this.stateLimit) {
2582
- this.stateCache.clear();
2583
- this.stateCount = 0;
2584
- this.startState = null;
2585
2656
  this.cacheClears++;
2586
2657
 
2587
2658
  // If this regex causes continuous cache thrashing, permanently fall back to NFA
2588
2659
  // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2589
2660
  if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2590
2661
  this.failed = true;
2662
+ this.stateCache.clear();
2663
+ this.stateCount = 0;
2664
+ this.startState = null;
2665
+ return null;
2666
+ }
2667
+ this.evictCache();
2668
+
2669
+ // After eviction, the bucket reference might be stale or empty.
2670
+ // We must re-fetch or re-create the bucket.
2671
+ bucket = this.stateCache.get(hash);
2672
+ if (!bucket) {
2673
+ bucket = [];
2674
+ this.stateCache.set(hash, bucket);
2591
2675
  }
2592
- return null;
2593
2676
  }
2594
2677
 
2595
2678
  // State not found, create it and add to bucket
2596
2679
  const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2680
+ state.lastSeen = ++this.clock;
2597
2681
  bucket.push(state);
2598
2682
  this.stateCount++;
2599
2683
  return state;
2600
2684
  }
2685
+ evictCache() {
2686
+ const allStates = [];
2687
+ for (const bucket of this.stateCache.values()) {
2688
+ for (let i = 0; i < bucket.length; i++) {
2689
+ allStates.push(bucket[i]);
2690
+ }
2691
+ }
2692
+
2693
+ // Sort ascending by lastSeen (oldest first)
2694
+ allStates.sort((a, b) => a.lastSeen - b.lastSeen);
2695
+
2696
+ // Keep the newest 50%
2697
+ const keepCount = Math.max(1, Math.floor(this.stateLimit / 2));
2698
+ const startIndex = allStates.length - keepCount;
2699
+ const survivorsArray = allStates.slice(startIndex);
2700
+ const survivors = new Set(survivorsArray);
2701
+ this.stateCache.clear();
2702
+ this.stateCount = 0;
2703
+ for (let i = 0; i < survivorsArray.length; i++) {
2704
+ const state = survivorsArray[i];
2705
+
2706
+ // Sever ties to all states to prevent memory leaks and dangling pointers
2707
+ state.nextLatin1.fill(null);
2708
+ state.nextLatin1Anchored.fill(null);
2709
+ // zero-allocation cleanup
2710
+ state.transKeys.length = 0;
2711
+ state.transVals.length = 0;
2712
+ const hash = hashPCs(state.nfaStates);
2713
+ let bucket = this.stateCache.get(hash);
2714
+ if (!bucket) {
2715
+ bucket = [];
2716
+ this.stateCache.set(hash, bucket);
2717
+ }
2718
+ bucket.push(state);
2719
+ this.stateCount++;
2720
+ }
2721
+
2722
+ // Start state must either be preserved or nullified so it gets re-created
2723
+ if (this.startState && !survivors.has(this.startState)) {
2724
+ this.startState = null;
2725
+ }
2726
+ }
2601
2727
 
2602
2728
  // Compute the next DFA state given a current state and a character
2603
2729
  step(state, charCode, anchor) {
2604
- // OPTIMIZATION: ASCII Fast-Path
2605
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2606
- const next = state.nextAscii[charCode];
2607
- if (next !== null) {
2608
- return next;
2730
+ // OPTIMIZATION: Latin-1 Array Fast-Path
2731
+ if (charCode <= Unicode.MAX_LATIN1) {
2732
+ if (anchor === RE2Flags.UNANCHORED) {
2733
+ const next = state.nextLatin1[charCode];
2734
+ if (next !== null) return next;
2735
+ } else {
2736
+ const next = state.nextLatin1Anchored[charCode];
2737
+ if (next !== null) return next;
2609
2738
  }
2610
2739
  } else {
2740
+ // Dense Array Linear Search fallback for Runes > 255
2611
2741
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2612
- if (state.nextMap.has(key)) {
2613
- return state.nextMap.get(key);
2742
+ // get [key] -> nextState
2743
+ const keys = state.transKeys;
2744
+ const len = keys.length;
2745
+ for (let i = 0; i < len; i++) {
2746
+ if (keys[i] === key) return state.transVals[i];
2614
2747
  }
2615
2748
  }
2616
2749
  const nextPCs = [];
@@ -2627,11 +2760,17 @@ class DFA {
2627
2760
  const nextState = this.getState(nextPCs);
2628
2761
 
2629
2762
  // Cache the result
2630
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2631
- state.nextAscii[charCode] = nextState;
2763
+ if (charCode <= Unicode.MAX_LATIN1) {
2764
+ if (anchor === RE2Flags.UNANCHORED) {
2765
+ state.nextLatin1[charCode] = nextState;
2766
+ } else {
2767
+ state.nextLatin1Anchored[charCode] = nextState;
2768
+ }
2632
2769
  } else {
2633
2770
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2634
- state.nextMap.set(key, nextState);
2771
+ // store key -> nextState
2772
+ state.transKeys.push(key);
2773
+ state.transVals.push(nextState);
2635
2774
  }
2636
2775
  return nextState;
2637
2776
  }
@@ -2664,10 +2803,11 @@ class DFA {
2664
2803
  if (width === 0) {
2665
2804
  break;
2666
2805
  }
2667
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2806
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2668
2807
 
2669
2808
  // If we hit an unrecoverable DFA error or bailout, signal fallback
2670
2809
  if (currentState === null) return null;
2810
+ currentState.lastSeen = ++this.clock;
2671
2811
  if (currentState.isMatch) {
2672
2812
  if (anchor === RE2Flags.ANCHOR_BOTH) {
2673
2813
  if (i + width === endPos) return true;
@@ -2715,9 +2855,10 @@ class DFA {
2715
2855
  const rune = r >> 3;
2716
2856
  const width = r & 7;
2717
2857
  if (width === 0) break;
2718
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2858
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2719
2859
  if (currentState === null) return null; // Bailout to NFA
2720
2860
 
2861
+ currentState.lastSeen = ++this.clock;
2721
2862
  i += width;
2722
2863
  checkMatch(currentState, i);
2723
2864
  if (currentState.nfaStates.length === 0) {
@@ -2755,7 +2896,7 @@ class BitState {
2755
2896
  // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2756
2897
  const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2757
2898
  if (this.visited.length < visitedSize) {
2758
- this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2899
+ this.visited = new Uint32Array(visitedSize);
2759
2900
  } else {
2760
2901
  this.visited.fill(0, 0, visitedSize);
2761
2902
  }
@@ -2841,11 +2982,12 @@ class BitState {
2841
2982
  const outInst = re2.prog.getInst(inst.out);
2842
2983
  if (Inst.isRuneOp(outInst.op)) {
2843
2984
  this.push(re2, inst.arg, currentPos, false);
2844
- currentPc = inst.out;
2985
+ currentPc = inst.arg;
2986
+ currentPos = this.end;
2845
2987
  continue;
2846
2988
  }
2847
2989
  this.push(re2, inst.out, this.end, false);
2848
- currentPc = inst.arg;
2990
+ currentPc = inst.out;
2849
2991
  continue;
2850
2992
  }
2851
2993
  case Inst.RUNE:
@@ -2926,6 +3068,11 @@ class BitState {
2926
3068
  if (currentPos === this.end) return true;
2927
3069
  break;
2928
3070
  }
3071
+ case Inst.LB_WRITE:
3072
+ case Inst.LB_CHECK:
3073
+ {
3074
+ throw new RE2JSInternalException('Backtracker cannot evaluate Lookbehind instructions');
3075
+ }
2929
3076
  default:
2930
3077
  {
2931
3078
  throw new RE2JSInternalException('bad inst');
@@ -3194,7 +3341,9 @@ const makeOnePass = p => {
3194
3341
  }
3195
3342
  runes.sort((a, b) => a - b);
3196
3343
  } else {
3197
- runes.push(...inst.runes);
3344
+ for (let j = 0; j < inst.runes.length; j++) {
3345
+ runes.push(inst.runes[j]);
3346
+ }
3198
3347
  }
3199
3348
  onePassRunes[pc] = runes;
3200
3349
  inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
@@ -3362,6 +3511,10 @@ class OnePass {
3362
3511
  switch (inst.op) {
3363
3512
  case Inst.MATCH:
3364
3513
  {
3514
+ // Verify ANCHOR_BOTH constraint before accepting the match
3515
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) {
3516
+ return null;
3517
+ }
3365
3518
  matched = true;
3366
3519
  if (matchcap.length > 0) {
3367
3520
  matchcap[0] = 0;
@@ -3792,6 +3945,88 @@ class Regexp {
3792
3945
  }
3793
3946
  }
3794
3947
 
3948
+ // High-speed, single-pass Aho-Corasick string matcher optimized for V8.
3949
+ // Builds a trie with failure links to search for multiple prefixes simultaneously.
3950
+ class AhoCorasick {
3951
+ constructor(wordArrays) {
3952
+ this.next = [Object.create(null)];
3953
+ this.fail = [0];
3954
+ this.match = [false];
3955
+
3956
+ // Build Trie
3957
+ for (const word of wordArrays) {
3958
+ let node = 0;
3959
+ for (let i = 0; i < word.length; i++) {
3960
+ const val = word[i];
3961
+ if (!(val in this.next[node])) {
3962
+ this.next.push(Object.create(null));
3963
+ this.fail.push(0);
3964
+ this.match.push(false);
3965
+ this.next[node][val] = this.next.length - 1;
3966
+ }
3967
+ node = this.next[node][val];
3968
+ }
3969
+ this.match[node] = true;
3970
+ }
3971
+
3972
+ // Build Failure Links (BFS)
3973
+ const queue = [];
3974
+ for (const val in this.next[0]) {
3975
+ if (Object.prototype.hasOwnProperty.call(this.next[0], val)) {
3976
+ const child = this.next[0][val];
3977
+ this.fail[child] = 0;
3978
+ queue.push(child);
3979
+ }
3980
+ }
3981
+ while (queue.length > 0) {
3982
+ const curr = queue.shift();
3983
+ for (const val in this.next[curr]) {
3984
+ if (Object.prototype.hasOwnProperty.call(this.next[curr], val)) {
3985
+ const child = this.next[curr][val];
3986
+ let failNode = this.fail[curr];
3987
+ while (failNode !== 0 && !(val in this.next[failNode])) {
3988
+ failNode = this.fail[failNode];
3989
+ }
3990
+ if (val in this.next[failNode]) {
3991
+ this.fail[child] = this.next[failNode][val];
3992
+ } else {
3993
+ this.fail[child] = 0;
3994
+ }
3995
+ this.match[child] = this.match[child] || this.match[this.fail[child]];
3996
+ queue.push(child);
3997
+ }
3998
+ }
3999
+ }
4000
+ }
4001
+ searchUTF16(charSeq, start, end) {
4002
+ let node = 0;
4003
+ for (let i = start; i < end; i++) {
4004
+ const val = charSeq.charCodeAt(i);
4005
+ while (node !== 0 && !(val in this.next[node])) {
4006
+ node = this.fail[node];
4007
+ }
4008
+ if (val in this.next[node]) {
4009
+ node = this.next[node][val];
4010
+ }
4011
+ if (this.match[node]) return true;
4012
+ }
4013
+ return false;
4014
+ }
4015
+ searchUTF8(bytes, start, end) {
4016
+ let node = 0;
4017
+ for (let i = start; i < end; i++) {
4018
+ const val = bytes[i];
4019
+ while (node !== 0 && !(val in this.next[node])) {
4020
+ node = this.fail[node];
4021
+ }
4022
+ if (val in this.next[node]) {
4023
+ node = this.next[node][val];
4024
+ }
4025
+ if (this.match[node]) return true;
4026
+ }
4027
+ return false;
4028
+ }
4029
+ }
3795
4030
  class Prefilter {
3796
4031
  static Type = {
3797
4032
  NONE: 0,
@@ -3804,6 +4039,8 @@ class Prefilter {
3804
4039
  this.subs = [];
3805
4040
  this.str = '';
3806
4041
  this.bytes = null;
4042
+ this.ac16 = null;
4043
+ this.ac8 = null;
3807
4044
  }
3808
4045
  eval(input, pos) {
3809
4046
  switch (this.type) {
@@ -3817,6 +4054,10 @@ class Prefilter {
3817
4054
  }
3818
4055
  return true;
3819
4056
  case Prefilter.Type.OR:
4057
+ // Exploit Aho-Corasick if it was successfully built
4058
+ if (this.ac16 && this.ac8) {
4059
+ return input.hasAnyString(this, pos);
4060
+ }
3820
4061
  for (let i = 0; i < this.subs.length; i++) {
3821
4062
  if (this.subs[i].eval(input, pos)) return true;
3822
4063
  }
@@ -3907,7 +4148,9 @@ class PrefilterTree {
3907
4148
  const s = PrefilterTree.simplify(sub);
3908
4149
  if (s.type !== Prefilter.Type.NONE) {
3909
4150
  if (s.type === Prefilter.Type.AND) {
3910
- newSubs.push(...s.subs);
4151
+ for (let j = 0; j < s.subs.length; j++) {
4152
+ newSubs.push(s.subs[j]);
4153
+ }
3911
4154
  } else {
3912
4155
  newSubs.push(s);
3913
4156
  }
@@ -3927,7 +4170,9 @@ class PrefilterTree {
3927
4170
  return new Prefilter(Prefilter.Type.NONE);
3928
4171
  }
3929
4172
  if (s.type === Prefilter.Type.OR) {
3930
- newSubs.push(...s.subs);
4173
+ for (let j = 0; j < s.subs.length; j++) {
4174
+ newSubs.push(s.subs[j]);
4175
+ }
3931
4176
  } else {
3932
4177
  newSubs.push(s);
3933
4178
  }
@@ -3949,6 +4194,27 @@ class PrefilterTree {
3949
4194
  }
3950
4195
  }
3951
4196
  pf.subs = uniqueSubs;
4197
+
4198
+ // Build an Aho-Corasick automaton if all children are exact matches
4199
+ let allExact = true;
4200
+ for (const sub of uniqueSubs) {
4201
+ if (sub.type !== Prefilter.Type.EXACT) {
4202
+ allExact = false;
4203
+ break;
4204
+ }
4205
+ }
4206
+ if (allExact && uniqueSubs.length > 1) {
4207
+ const words16 = uniqueSubs.map(s => {
4208
+ const arr = [];
4209
+ for (let i = 0; i < s.str.length; i++) {
4210
+ arr.push(s.str.charCodeAt(i));
4211
+ }
4212
+ return arr;
4213
+ });
4214
+ pf.ac16 = new AhoCorasick(words16);
4215
+ const words8 = uniqueSubs.map(s => s.bytes);
4216
+ pf.ac8 = new AhoCorasick(words8);
4217
+ }
3952
4218
  return pf;
3953
4219
  }
3954
4220
  return pf;
@@ -4475,7 +4741,9 @@ class Simplify {
4475
4741
  // Flatten nested concatenations
4476
4742
  if (nsub.op === Regexp.Op.CONCAT) {
4477
4743
  changed = true;
4478
- newSubs.push(...nsub.subs);
4744
+ for (let j = 0; j < nsub.subs.length; j++) {
4745
+ newSubs.push(nsub.subs[j]);
4746
+ }
4479
4747
  continue;
4480
4748
  }
4481
4749
  } else if (re.op === Regexp.Op.ALTERNATE) {
@@ -4487,7 +4755,9 @@ class Simplify {
4487
4755
  // Flatten nested alternations
4488
4756
  if (nsub.op === Regexp.Op.ALTERNATE) {
4489
4757
  changed = true;
4490
- newSubs.push(...nsub.subs);
4758
+ for (let j = 0; j < nsub.subs.length; j++) {
4759
+ newSubs.push(nsub.subs[j]);
4760
+ }
4491
4761
  continue;
4492
4762
  }
4493
4763
  }
@@ -5497,7 +5767,10 @@ class Parser {
5497
5767
  return t.pop();
5498
5768
  }
5499
5769
  static concatRunes(x, y) {
5500
- return [...x, ...y];
5770
+ for (let i = 0; i < y.length; i++) {
5771
+ x.push(y[i]);
5772
+ }
5773
+ return x;
5501
5774
  }
5502
5775
  constructor(wholeRegexp, flags = 0) {
5503
5776
  this.wholeRegexp = wholeRegexp;
@@ -5533,8 +5806,8 @@ class Parser {
5533
5806
  return re;
5534
5807
  }
5535
5808
  reuse(re) {
5536
- if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
5537
- delete this.height[re];
5809
+ if (this.height !== null && this.height.has(re)) {
5810
+ this.height.delete(re);
5538
5811
  }
5539
5812
  if (re.subs !== null && re.subs.length > 0) {
5540
5813
  re.subs[0] = this.free;
@@ -5566,20 +5839,20 @@ class Parser {
5566
5839
  if (n <= 0) {
5567
5840
  n = 1;
5568
5841
  }
5569
- if (n > Parser.MAX_SIZE / this.repeats) {
5842
+ if (n > Math.floor(Parser.MAX_SIZE / this.repeats)) {
5570
5843
  this.repeats = Parser.MAX_SIZE;
5571
5844
  } else {
5572
5845
  this.repeats *= n;
5573
5846
  }
5574
5847
  }
5575
- if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
5848
+ if (this.numRegexp < Math.floor(Parser.MAX_SIZE / this.repeats)) {
5576
5849
  return;
5577
5850
  }
5578
5851
 
5579
5852
  // We need to start tracking size.
5580
5853
  // Make the map and belatedly populate it
5581
5854
  // with info about everything we've constructed so far.
5582
- this.size = Object.create(null);
5855
+ this.size = new Map();
5583
5856
  for (let reEx of this.stack) {
5584
5857
  this.checkSize(reEx);
5585
5858
  }
@@ -5590,8 +5863,8 @@ class Parser {
5590
5863
  }
5591
5864
  calcSize(re, force = false) {
5592
5865
  if (!force && this.size !== null) {
5593
- if (Object.prototype.hasOwnProperty.call(this.size, re)) {
5594
- return this.size[re];
5866
+ if (this.size.has(re)) {
5867
+ return this.size.get(re);
5595
5868
  }
5596
5869
  }
5597
5870
  let size = 0;
@@ -5651,9 +5924,9 @@ class Parser {
5651
5924
  }
5652
5925
  size = Math.max(1, size);
5653
5926
  if (this.size === null) {
5654
- this.size = Object.create(null);
5927
+ this.size = new Map();
5655
5928
  }
5656
- this.size[re] = size;
5929
+ this.size.set(re, size);
5657
5930
  return size;
5658
5931
  }
5659
5932
  checkHeight(re) {
@@ -5661,7 +5934,7 @@ class Parser {
5661
5934
  return;
5662
5935
  }
5663
5936
  if (this.height === null) {
5664
- this.height = Object.create(null);
5937
+ this.height = new Map();
5665
5938
  for (let reEx of this.stack) {
5666
5939
  this.checkHeight(reEx);
5667
5940
  }
@@ -5672,8 +5945,8 @@ class Parser {
5672
5945
  }
5673
5946
  calcHeight(re, force = false) {
5674
5947
  if (!force && this.height !== null) {
5675
- if (Object.prototype.hasOwnProperty.call(this.height, re)) {
5676
- return this.height[re];
5948
+ if (this.height.has(re)) {
5949
+ return this.height.get(re);
5677
5950
  }
5678
5951
  }
5679
5952
  let h = 1;
@@ -5684,9 +5957,9 @@ class Parser {
5684
5957
  }
5685
5958
  }
5686
5959
  if (this.height === null) {
5687
- this.height = Object.create(null);
5960
+ this.height = new Map();
5688
5961
  }
5689
- this.height[re] = h;
5962
+ this.height.set(re, h);
5690
5963
  return h;
5691
5964
  }
5692
5965
 
@@ -7808,7 +8081,7 @@ class TranslateRegExpString {
7808
8081
  changed = true;
7809
8082
  continue;
7810
8083
  } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
7811
- if (i + 3 >= size || data[i + 3] !== '=' && data[i + 3] !== '!') {
8084
+ if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
7812
8085
  result += '(?P<';
7813
8086
  i += 3;
7814
8087
  changed = true;