re2js 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.0
6
- * @author Alexey Vasiliev
5
+ * @version v2.2.2
6
+ * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
9
9
  * @license MIT
@@ -82,6 +82,8 @@
82
82
  const ASCII_SIZE = 128;
83
83
  const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
84
84
  const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
85
+ // The highest legal Basic Multilingual Plane (BMP) value.
86
+ const MAX_BMP = 0xffff;
85
87
  for (let i = 0; i < ASCII_SIZE; i++) {
86
88
  if (i >= 97 && i <= 122) {
87
89
  // a-z
@@ -105,11 +107,13 @@
105
107
  static toUpperCase(codepoint) {
106
108
  if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
107
109
  const s = String.fromCodePoint(codepoint).toUpperCase();
108
- if (s.length > 1) {
110
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
111
+ if (s.length > expectedLen) {
109
112
  return codepoint;
110
113
  }
111
114
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toLowerCase();
112
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
115
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
116
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
113
117
  return codepoint;
114
118
  }
115
119
  return s.codePointAt(0);
@@ -120,11 +124,13 @@
120
124
  static toLowerCase(codepoint) {
121
125
  if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
122
126
  const s = String.fromCodePoint(codepoint).toLowerCase();
123
- if (s.length > 1) {
127
+ const expectedLen = s.codePointAt(0) > MAX_BMP ? 2 : 1;
128
+ if (s.length > expectedLen) {
124
129
  return codepoint;
125
130
  }
126
131
  const sOrigin = String.fromCodePoint(s.codePointAt(0)).toUpperCase();
127
- if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
132
+ const originExpectedLen = sOrigin.codePointAt(0) > MAX_BMP ? 2 : 1;
133
+ if (sOrigin.length > originExpectedLen || sOrigin.codePointAt(0) !== codepoint) {
128
134
  return codepoint;
129
135
  }
130
136
  return s.codePointAt(0);
@@ -226,7 +232,7 @@
226
232
  static _CASE_ORBIT = null;
227
233
  static get CASE_ORBIT() {
228
234
  if (!this._CASE_ORBIT) {
229
- this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B6nCohhCBphhCBqhhCBrhhCBshhCBthhCBuhhCBvhhCBwhhCBxhhCByhhCBzhhCB0hhCB1hhCB2hhCB3hhCB4hhCB5hhCB6hhCB7hhCB8hhCB9hhCB+hhCB-hhCBgihCBhihCBiihCBjihCBkihCBlihCBmihCBnihCBoihCBpihCBqihCBrihCBsihCBtihCBuihCBvihCBgghCBhghCBighCBjghCBkghCBlghCBmghCBnghCBoghCBpghCBqghCBrghCBsghCBtghCBughCBvghCBwghCBxghCByghCBzghCB0ghCB1ghCB2ghCB3ghCB4ghCB5ghCB6ghCB7ghCB8ghCB9ghCB+ghCB-ghCBghhCBhhhCBihhCBjhhCBkhhCBlhhCBmhhCBnhhChD4mhCB5mhCB6mhCB7mhCB8mhCB9mhCB+mhCB-mhCBgnhCBhnhCBinhCBjnhCBknhCBlnhCBmnhCBnnhCBonhCBpnhCBqnhCBrnhCBsnhCBtnhCBunhCBvnhCBwnhCBxnhCBynhCBznhCB0nhCB1nhCB2nhCB3nhCB4nhCB5nhCB6nhCB7nhCFwlhCBxlhCBylhCBzlhCB0lhCB1lhCB2lhCB3lhCB4lhCB5lhCB6lhCB7lhCB8lhCB9lhCB+lhCB-lhCBgmhCBhmhCBimhCBjmhCBkmhCBlmhCBmmhCBnmhCBomhCBpmhCBqmhCBrmhCBsmhCBtmhCBumhCBvmhCBwmhCBxmhCBymhCBzmhC1D3shCB4shCB5shCB6shCB7shCB8shCB9shCB+shCB-shCBgthCBhthCCjthCBkthCBlthCBmthCBnthCBothCBpthCBqthCBrthCBsthCBtthCButhCBvthCBwthCBxthCCzthCB0thCB1thCB2thCB3thCB4thCB5thCC7thCB8thCCwrhCBxrhCByrhCBzrhCB0rhCB1rhCB2rhCB3rhCB4rhCB5rhCB6rhCC8rhCB9rhCB+rhCB-rhCBgshCBhshCBishCBjshCBkshCBlshCBmshCBnshCBoshCBpshCBqshCCsshCBtshCBushCBvshCBwshCBxshCByshCC0shCB1shCk2BgmjCBhmjCBimjCBjmjCBkmjCBlmjCBmmjCBnmjCBomjCBpmjCBqmjCBrmjCBsmjCBtmjCBumjCBvmjCBwmjCBxmjCBymjCBzmjCB0mjCB1mjCB2mjCB3mjCB4mjCB5mjCB6mjCB7mjCB8mjCB9mjCB+mjCB-mjCBgnjCBhnjCBinjCBjnjCBknjCBlnjCBmnjCBnnjCBonjCBpnjCBqnjCBrnjCBsnjCBtnjCBunjCBvnjCBwnjCBxnjCBynjCOgkjCBhkjCBikjCBjkjCBkkjCBlkjCBmkjCBnkjCBokjCBpkjCBqkjCBrkjCBskjCBtkjCBukjCBvkjCBwkjCBxkjCBykjCBzkjCB0kjCB1kjCB2kjCB3kjCB4kjCB5kjCB6kjCB7kjCB8kjCB9kjCB+kjCB-kjCBgljCBhljCBiljCBjljCBkljCBlljCBmljCBnljCBoljCBpljCBqljCBrljCBsljCBtljCBuljCBvljCBwljCBxljCByljC+CwrjCBxrjCByrjCBzrjCB0rjCB1rjCB2rjCB3rjCB4rjCB5rjCB6rjCB7rjCB8rjCB9rjCB+rjCB-rjCBgsjCBhsjCBisjCBjsjCBksjCBlsjCLwqjCBxqjCByqjCBzqjCB0qjCB1qjCB2qjCB3qjCB4qjCB5qjCB6qjCB7qjCB8qjCB9qjCB+qjCB-qjCBgrjCBhrjCBirjCBjrjCBkrjCBlrjC74CgmmCBhmmCBimmCBjmmCBkmmCBlmmCBmmmCBnmmCBommCBpmmCBqmmCBrmmCBsmmCBtmmCBummCBvmmCBwmmCBxmmCBymmCBzmmCB0mmCB1mmCB2mmCB3mmCB4mmCB5mmCB6mmCB7mmCB8mmCB9mmCB+mmCB-mmCBglmCBhlmCBilmCBjlmCBklmCBllmCBmlmCBnlmCBolmCBplmCBqlmCBrlmCBslmCBtlmCBulmCBvlmCBwlmCBxlmCBylmCBzlmCB0lmCB1lmCB2lmCB3lmCB4lmCB5lmCB6lmCB7lmCB8lmCB9lmCB+lmCB-lmChrVgz7CBhz7CBiz7CBjz7CBkz7CBlz7CBmz7CBnz7CBoz7CBpz7CBqz7CBrz7CBsz7CBtz7CBuz7CBvz7CBwz7CBxz7CByz7CBzz7CB0z7CB1z7CB2z7CB3z7CB4z7CB5z7CB6z7CB7z7CB8z7CB9z7CB+z7CB-z7CBgy7CBhy7CBiy7CBjy7CBky7CBly7CBmy7CBny7CBoy7CBpy7CBqy7CBry7CBsy7CBty7CBuy7CBvy7CBwy7CBxy7CByy7CBzy7CB0y7CB1y7CB2y7CB3y7CB4y7CB5y7CB6y7CB7y7CB8y7CB9y7CB+y7CB-y7ChB717CB817CB917CB+17CB-17CBg27CBh27CBi27CBj27CBk27CBl27CBm27CBn27CBo27CBp27CBq27CBr27CBs27CBt27CBu27CBv27CBw27CBx27CBy27CBz27CDg17CBh17CBi17CBj17CBk17CBl17CBm17CBn17CBo17CBp17CBq17CBr17CBs17CBt17CBu17CBv17CBw17CBx17CBy17CBz17CB017CB117CB217CB317CB417Ctxeip6DBjp6DBkp6DBlp6DBmp6DBnp6DBop6DBpp6DBqp6DBrp6DBsp6DBtp6DBup6DBvp6DBwp6DBxp6DByp6DBzp6DB0p6DB1p6DB2p6DB3p6DB4p6DB5p6DB6p6DB7p6DB8p6DB9p6DB+p6DB-p6DBgq6DBhq6DBiq6DBjq6DBgo6DBho6DBio6DBjo6DBko6DBlo6DBmo6DBno6DBoo6DBpo6DBqo6DBro6DBso6DBto6DBuo6DBvo6DBwo6DBxo6DByo6DBzo6DB0o6DB1o6DB2o6DB3o6DB4o6DB5o6DB6o6DB7o6DB8o6DB9o6DB+o6DB-o6DBgp6DBhp6D');
235
+ this._CASE_ORBIT = decodeOrbit('rCrDIzDYqpII-LiC8cQlHa+0HGrpI6EzClClOBmOBkOBoOBpOBnOBrOBsOBqOlByPBzPBxPyK5crCz+HCydD1dD4dB5dB6dC8dEgeBheCieDmeDpeHj-HCweD1fDxeB+9HBwfC1FE2eBxfBjeBjdD1eDmpIHycB0fEmdBgda6cBhdD4cB1cdyhBC0hBK+hBDhiBBiiBIqiBIgkHChkHKikHDjkHBkkHImkHYjjBBnkH9gGygBB0gBB+gBBhhBBlkHBihBBqhBBijBBqypB4OhzHB70H6BgzHD-GiHo8HBp8HBq8HBr8HBs8HBt8HBu8HBv8HBg8HBh8HBi8HBj8HBk8HBl8HBm8HBn8HB48HB58HB68HB78HB88HB98HB+8HB-8HBw8HBx8HBy8HBz8HB08HB18HB28HB38HBo9HBp9HBq9HBr9HBs9HBt9HBu9HBv9HBg9HBh9HBi9HBj9HBk9HBl9HBm9HBn9HE89HJz9HClaFs+HJj+HHwcQwdQ8-HJz-HqJpdErCBlG-ohBrypBBokH6lVm4+BBl4+B');
230
236
  }
231
237
  return this._CASE_ORBIT;
232
238
  }
@@ -598,11 +604,16 @@
598
604
  // to compare it to |r2|.
599
605
  // -1 is interpreted as the end-of-file mark.
600
606
  static equalsIgnoreCase(r1, r2) {
601
- // Runes already match, or one of them is EOF
602
- if (r1 < 0 || r2 < 0 || r1 === r2) {
607
+ // Runes already match
608
+ if (r1 === r2) {
603
609
  return true;
604
610
  }
605
611
 
612
+ // Safely fail if either is EOF (and they didn't explicitly match above)
613
+ if (r1 < 0 || r2 < 0) {
614
+ return false;
615
+ }
616
+
606
617
  // Fast path for the common case where both runes are ASCII characters.
607
618
  // Coerces both runes to lowercase if applicable.
608
619
  if (r1 <= this.MAX_ASCII && r2 <= this.MAX_ASCII) {
@@ -855,7 +866,7 @@
855
866
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
856
867
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
857
868
  const createEnum = (values = [], initNum = 0) => {
858
- const enumObject = {};
869
+ const enumObject = Object.create(null);
859
870
  for (let i = 0; i < values.length; i++) {
860
871
  const val = values[i];
861
872
  const keyVal = initNum + i;
@@ -997,6 +1008,9 @@
997
1008
  hasString() {
998
1009
  return false;
999
1010
  }
1011
+ hasAnyString() {
1012
+ return false;
1013
+ }
1000
1014
 
1001
1015
  // Helper for the exact-literal fast-path execution router
1002
1016
  prefixLength() {
@@ -1022,6 +1036,13 @@
1022
1036
  return idx !== -1 && idx <= this.end - target.length;
1023
1037
  }
1024
1038
 
1039
+ // Executes a high-speed, single - pass search for multiple literal strings
1040
+ // simultaneously using an Aho-Corasick automaton.
1041
+ hasAnyString(prefilter, pos) {
1042
+ if (!prefilter.ac8) return false;
1043
+ return prefilter.ac8.searchUTF8(this.bytes, this.start + pos, this.end);
1044
+ }
1045
+
1025
1046
  // Returns the rune at the specified index; the units are
1026
1047
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1027
1048
  // indices. Returns the width (in the same units) of the rune in
@@ -1040,17 +1061,23 @@
1040
1061
  return c << 3 | 1;
1041
1062
  } else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
1042
1063
  const c1 = this.bytes[pos + 1] & 0xff;
1064
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1043
1065
  const rune = (c & 0x1f) << 6 | c1 & 0x3f;
1044
1066
  return rune << 3 | 2;
1045
1067
  } else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
1046
1068
  const c1 = this.bytes[pos + 1] & 0xff;
1069
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1047
1070
  const c2 = this.bytes[pos + 2] & 0xff;
1071
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1048
1072
  const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
1049
1073
  return rune << 3 | 3;
1050
1074
  } else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
1051
1075
  const c1 = this.bytes[pos + 1] & 0xff;
1076
+ if ((c1 & 0xc0) !== 0x80) return c << 3 | 1;
1052
1077
  const c2 = this.bytes[pos + 2] & 0xff;
1078
+ if ((c2 & 0xc0) !== 0x80) return c << 3 | 1;
1053
1079
  const c3 = this.bytes[pos + 3] & 0xff;
1080
+ if ((c3 & 0xc0) !== 0x80) return c << 3 | 1;
1054
1081
  const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
1055
1082
  return rune << 3 | 4;
1056
1083
  } else {
@@ -1129,6 +1156,13 @@
1129
1156
  return idx !== -1 && idx <= this.end - prefilter.str.length;
1130
1157
  }
1131
1158
 
1159
+ // Executes a high-speed, single - pass search for multiple literal strings
1160
+ // simultaneously using an Aho-Corasick automaton.
1161
+ hasAnyString(prefilter, pos) {
1162
+ if (!prefilter.ac16) return false;
1163
+ return prefilter.ac16.searchUTF16(this.charSequence, this.start + pos, this.end);
1164
+ }
1165
+
1132
1166
  // Returns the rune at the specified index; the units are
1133
1167
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1134
1168
  // indices. Returns the width (in the same units) of the rune in
@@ -1574,7 +1608,15 @@
1574
1608
  if (this.hasMatch) {
1575
1609
  start = this.groups[1];
1576
1610
  if (this.groups[0] === this.groups[1]) {
1577
- start++;
1611
+ // Safely calculate structural encoding width to avoid sequence corruption
1612
+ const machineInput = this.matcherInput.isUTF16Encoding() ? MachineInput.fromUTF16(this.matcherInput.asCharSequence(), 0, this.matcherInputLength) : MachineInput.fromUTF8(this.matcherInput.asBytes(), 0, this.matcherInputLength);
1613
+ const r = machineInput.step(start);
1614
+ if (r < 0) {
1615
+ // EOF
1616
+ start++; // Advance past length to force loop exit
1617
+ } else {
1618
+ start += r & 7; // Advance by safely decoded width
1619
+ }
1578
1620
  }
1579
1621
  }
1580
1622
  return this.genMatch(start, RE2Flags.UNANCHORED);
@@ -1713,6 +1755,8 @@
1713
1755
  const groupName = replacement.substring(i + 1, j);
1714
1756
  res += this.group(groupName);
1715
1757
  last = j + 1;
1758
+ i = j;
1759
+ continue;
1716
1760
  }
1717
1761
  }
1718
1762
  }
@@ -1792,6 +1836,7 @@
1792
1836
  if (j === replacement.length || replacement.codePointAt(j) !== Codepoint.CODES.get('>')) {
1793
1837
  res += replacement.substring(i - 1, j + 1);
1794
1838
  last = j + 1;
1839
+ i = j;
1795
1840
  continue;
1796
1841
  }
1797
1842
  const groupName = replacement.substring(i + 1, j);
@@ -1801,6 +1846,8 @@
1801
1846
  res += `$<${groupName}>`;
1802
1847
  }
1803
1848
  last = j + 1;
1849
+ i = j;
1850
+ continue;
1804
1851
  }
1805
1852
  }
1806
1853
  }
@@ -1925,6 +1972,8 @@
1925
1972
  return r === r0;
1926
1973
  }
1927
1974
  const len = this.runes.length;
1975
+ if (len === 0) return false;
1976
+
1928
1977
  // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1929
1978
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1930
1979
  for (let j = 0; j < len; j += 2) {
@@ -1938,22 +1987,19 @@
1938
1987
  return false; // Stop here
1939
1988
  }
1940
1989
 
1941
- // Otherwise binary search.
1942
- let lo = 0;
1943
- let hi = this.runes.length / 2 | 0;
1944
- while (lo < hi) {
1945
- const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
1946
- const c = this.runes[2 * m];
1947
- if (c <= r) {
1948
- if (r <= this.runes[2 * m + 1]) {
1949
- return true;
1950
- }
1951
- lo = m + 1;
1952
- } else {
1953
- hi = m;
1954
- }
1990
+ // Branchless Binary Search (Lower Bound)
1991
+ // Compiles to optimal conditional move (cmov) machine code, preventing
1992
+ // branch mispredictions on large, chaotic Unicode arrays
1993
+ let base = 0;
1994
+ let n = len >> 1;
1995
+ while (n > 1) {
1996
+ const half = n >> 1;
1997
+ base += this.runes[base + half << 1] <= r ? half : 0;
1998
+ n -= half;
1955
1999
  }
1956
- return false;
2000
+ base += this.runes[base << 1] <= r ? 1 : 0;
2001
+ const m = base - 1;
2002
+ return m >= 0 && r <= this.runes[m << 1 | 1];
1957
2003
  }
1958
2004
 
1959
2005
  // matchRunePos checks whether the instruction matches (and consumes) r.
@@ -1968,6 +2014,7 @@
1968
2014
  return r === r0 ? 0 : -1;
1969
2015
  }
1970
2016
  const len = this.runes.length;
2017
+ if (len === 0) return -1;
1971
2018
  if (len === 2 || len === 4 || len === 6 || len === 8) {
1972
2019
  for (let j = 0; j < len; j += 2) {
1973
2020
  if (r < this.runes[j]) return -1;
@@ -1975,19 +2022,18 @@
1975
2022
  }
1976
2023
  return -1;
1977
2024
  }
1978
- let lo = 0;
1979
- let hi = Math.floor(len / 2);
1980
- while (lo < hi) {
1981
- const m = lo + hi >> 1;
1982
- const c = this.runes[2 * m];
1983
- if (c <= r) {
1984
- if (r <= this.runes[2 * m + 1]) return m;
1985
- lo = m + 1;
1986
- } else {
1987
- hi = m;
1988
- }
2025
+
2026
+ // Branchless Binary Search (Lower Bound)
2027
+ let base = 0;
2028
+ let n = len >> 1;
2029
+ while (n > 1) {
2030
+ const half = n >> 1;
2031
+ base += this.runes[base + half << 1] <= r ? half : 0;
2032
+ n -= half;
1989
2033
  }
1990
- return -1;
2034
+ base += this.runes[base << 1] <= r ? 1 : 0;
2035
+ const m = base - 1;
2036
+ return m >= 0 && r <= this.runes[m << 1 | 1] ? m : -1;
1991
2037
  }
1992
2038
  /**
1993
2039
  *
@@ -2086,6 +2132,7 @@
2086
2132
  //
2087
2133
  // Called by RE2.doExecute.
2088
2134
  class Machine {
2135
+ static THREADS_CHUNK_SIZE = 128;
2089
2136
  static fromRE2(re2) {
2090
2137
  const m = new Machine();
2091
2138
  m.prog = re2.prog;
@@ -2126,15 +2173,15 @@
2126
2173
  resetCap() {
2127
2174
  for (let i = 0; i < this.poolSize; i++) {
2128
2175
  const t = this.pool[i];
2129
- t.cap.fill(0);
2176
+ t.cap.fill(-1);
2130
2177
  }
2131
2178
  }
2132
2179
  initNewCap(ncap) {
2133
2180
  for (let i = 0; i < this.poolSize; i++) {
2134
2181
  const t = this.pool[i];
2135
- t.cap = new Int32Array(ncap);
2182
+ t.cap = new Int32Array(ncap).fill(-1);
2136
2183
  }
2137
- this.matchcap = new Int32Array(ncap);
2184
+ this.matchcap = new Int32Array(ncap).fill(-1);
2138
2185
  }
2139
2186
  submatches() {
2140
2187
  if (this.ncap === 0) {
@@ -2147,14 +2194,21 @@
2147
2194
  // alloc() allocates a new thread with the given instruction.
2148
2195
  // It uses the free pool if possible.
2149
2196
  alloc(inst) {
2150
- let t;
2151
- if (this.poolSize > 0) {
2152
- this.poolSize--;
2153
- t = this.pool[this.poolSize];
2154
- } else {
2155
- t = new Thread();
2156
- t.cap = new Int32Array(this.matchcap.length);
2197
+ if (this.poolSize === 0) {
2198
+ const capLen = this.matchcap.length;
2199
+
2200
+ // Bulk allocate threads in a tight loop so the V8 engine
2201
+ // places them adjacently in the young generation heap
2202
+ for (let i = 0; i < Machine.THREADS_CHUNK_SIZE; i++) {
2203
+ const t = new Thread();
2204
+ t.cap = new Int32Array(capLen);
2205
+ this.pool[this.poolSize++] = t;
2206
+ }
2157
2207
  }
2208
+
2209
+ // Pop a thread from the top of the pool stack
2210
+ this.poolSize--;
2211
+ const t = this.pool[this.poolSize];
2158
2212
  t.inst = inst;
2159
2213
  return t;
2160
2214
  }
@@ -2207,6 +2261,9 @@
2207
2261
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2208
2262
  break;
2209
2263
  }
2264
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2265
+ break;
2266
+ }
2210
2267
  if (this.matched) {
2211
2268
  break;
2212
2269
  }
@@ -2284,6 +2341,9 @@
2284
2341
  while (true) {
2285
2342
  if (runq.isEmpty()) {
2286
2343
  if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2344
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2345
+ break;
2346
+ }
2287
2347
  }
2288
2348
  if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2289
2349
  // Spawn Lookbehind threads BEFORE the main pattern
@@ -2399,78 +2459,83 @@
2399
2459
  runq.clear();
2400
2460
  }
2401
2461
  add(q, pc, pos, cap, cond, t) {
2402
- if (pc === 0) {
2403
- return t;
2404
- }
2405
- if (q.contains(pc)) {
2406
- return t;
2407
- }
2408
- const d = q.add(pc);
2409
- const inst = this.prog.inst[pc];
2410
- switch (inst.op) {
2411
- case Inst.FAIL:
2412
- break;
2413
- case Inst.ALT:
2414
- case Inst.ALT_MATCH:
2415
- t = this.add(q, inst.out, pos, cap, cond, t);
2416
- t = this.add(q, inst.arg, pos, cap, cond, t);
2417
- break;
2418
- case Inst.EMPTY_WIDTH:
2419
- if ((inst.arg & ~cond) === 0) {
2420
- t = this.add(q, inst.out, pos, cap, cond, t);
2421
- }
2422
- break;
2423
- case Inst.NOP:
2424
- t = this.add(q, inst.out, pos, cap, cond, t);
2425
- break;
2426
- case Inst.CAPTURE:
2427
- if (inst.arg < this.ncap) {
2428
- const opos = cap[inst.arg];
2429
- cap[inst.arg] = pos;
2430
- this.add(q, inst.out, pos, cap, cond, null);
2431
- cap[inst.arg] = opos;
2432
- } else {
2462
+ while (true) {
2463
+ if (pc === 0) {
2464
+ return t;
2465
+ }
2466
+ if (q.contains(pc)) {
2467
+ return t;
2468
+ }
2469
+ const d = q.add(pc);
2470
+ const inst = this.prog.inst[pc];
2471
+ switch (inst.op) {
2472
+ case Inst.FAIL:
2473
+ return t;
2474
+ case Inst.ALT:
2475
+ case Inst.ALT_MATCH:
2433
2476
  t = this.add(q, inst.out, pos, cap, cond, t);
2434
- }
2435
- break;
2436
- case Inst.LB_WRITE:
2437
- this.lbTable[Math.abs(inst.lb)] = pos;
2438
- t = this.add(q, inst.out, pos, cap, cond, t);
2439
- break;
2440
- case Inst.LB_CHECK:
2441
- if (inst.lb > 0) {
2442
- // Positive Lookbehind
2443
- if (this.lbTable[inst.lb] === pos) {
2444
- t = this.add(q, inst.out, pos, cap, cond, t);
2477
+ pc = inst.arg; // Flattened tail recursion
2478
+ continue;
2479
+ case Inst.EMPTY_WIDTH:
2480
+ if ((inst.arg & ~cond) === 0) {
2481
+ pc = inst.out; // Flattened tail recursion
2482
+ continue;
2445
2483
  }
2446
- } else if (this.lbTable[-inst.lb] !== pos) {
2447
- // Negative Lookbehind
2448
- t = this.add(q, inst.out, pos, cap, cond, t);
2449
- }
2450
- break;
2451
- case Inst.MATCH:
2452
- case Inst.RUNE:
2453
- case Inst.RUNE1:
2454
- case Inst.RUNE_ANY:
2455
- case Inst.RUNE_ANY_NOT_NL:
2456
- if (t === null) {
2457
- t = this.alloc(inst);
2458
- } else {
2459
- t.inst = inst;
2460
- }
2461
- if (this.ncap > 0 && t.cap !== cap) {
2462
- // Direct assignment utilizing Typed Array performance
2463
- for (let c = 0; c < this.ncap; c++) {
2464
- t.cap[c] = cap[c];
2484
+ return t;
2485
+ case Inst.NOP:
2486
+ pc = inst.out; // Flattened tail recursion
2487
+ continue;
2488
+ case Inst.CAPTURE:
2489
+ if (inst.arg < this.ncap) {
2490
+ const opos = cap[inst.arg];
2491
+ cap[inst.arg] = pos;
2492
+ this.add(q, inst.out, pos, cap, cond, null);
2493
+ cap[inst.arg] = opos;
2494
+ return t;
2495
+ } else {
2496
+ pc = inst.out; // Flattened tail recursion
2497
+ continue;
2465
2498
  }
2466
- }
2467
- q.denseThreads[d] = t;
2468
- t = null;
2469
- break;
2470
- default:
2471
- throw new Error('unhandled');
2499
+ case Inst.LB_WRITE:
2500
+ this.lbTable[Math.abs(inst.lb)] = pos;
2501
+ pc = inst.out;
2502
+ continue;
2503
+ case Inst.LB_CHECK:
2504
+ if (inst.lb > 0) {
2505
+ // Positive Lookbehind
2506
+ if (this.lbTable[inst.lb] === pos) {
2507
+ pc = inst.out; // Flattened tail recursion
2508
+ continue;
2509
+ }
2510
+ } else if (this.lbTable[-inst.lb] !== pos) {
2511
+ // Negative Lookbehind
2512
+ pc = inst.out; // Flattened tail recursion
2513
+ continue;
2514
+ }
2515
+ return t;
2516
+ case Inst.MATCH:
2517
+ case Inst.RUNE:
2518
+ case Inst.RUNE1:
2519
+ case Inst.RUNE_ANY:
2520
+ case Inst.RUNE_ANY_NOT_NL:
2521
+ if (t === null) {
2522
+ t = this.alloc(inst);
2523
+ } else {
2524
+ t.inst = inst;
2525
+ }
2526
+ if (this.ncap > 0 && t.cap !== cap) {
2527
+ // Direct assignment utilizing Typed Array performance
2528
+ for (let c = 0; c < this.ncap; c++) {
2529
+ t.cap[c] = cap[c];
2530
+ }
2531
+ }
2532
+ q.denseThreads[d] = t;
2533
+ t = null;
2534
+ return t;
2535
+ default:
2536
+ throw new Error('unhandled');
2537
+ }
2472
2538
  }
2473
- return t;
2474
2539
  }
2475
2540
  }
2476
2541
 
@@ -2498,8 +2563,15 @@
2498
2563
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2499
2564
  this.isMatch = isMatch; // Boolean
2500
2565
  this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2501
- this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2502
- this.nextMap = new Map(); // Cache of Char -> DFAState
2566
+
2567
+ // Latin-1 (Unicode.MAX_LATIN1 + 1) flat arrays for blisteringly fast O(1) lookups
2568
+ // completely covering standard English, European languages, and 1-byte encodings.
2569
+ this.nextLatin1 = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2570
+ this.nextLatin1Anchored = new Array(Unicode.MAX_LATIN1 + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2571
+ // 2 arrays used as hash map for V8 optimization (N is small number, so O(n) faster than Map O(1))
2572
+ this.transKeys = [];
2573
+ this.transVals = [];
2574
+ this.lastSeen = 0; // Track when this state was last used for LRU eviction
2503
2575
  }
2504
2576
  }
2505
2577
  class DFA {
@@ -2512,6 +2584,7 @@
2512
2584
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2513
2585
  this.cacheClears = 0; // Track thrashing
2514
2586
  this.failed = false; // mark if DFA cannot work with provided prog
2587
+ this.clock = 0; // Global clock for LRU eviction
2515
2588
  }
2516
2589
 
2517
2590
  // Follows epsilon (empty) transitions to find all reachable states without consuming a char
@@ -2571,6 +2644,7 @@
2571
2644
  for (let i = 0; i < bucket.length; i++) {
2572
2645
  const state = bucket[i];
2573
2646
  if (arraysEqual(state.nfaStates, sortedPCs)) {
2647
+ state.lastSeen = ++this.clock;
2574
2648
  return state;
2575
2649
  }
2576
2650
  }
@@ -2583,40 +2657,99 @@
2583
2657
  if (this.failed) return null;
2584
2658
 
2585
2659
  // Safety: prevent memory exhaustion from state explosion
2586
- // We flush the cache and return null, which seamlessly routes execution to the NFA
2660
+ // We prune the cache to keep the newest 50%
2587
2661
  if (this.stateCount >= this.stateLimit) {
2588
- this.stateCache.clear();
2589
- this.stateCount = 0;
2590
- this.startState = null;
2591
2662
  this.cacheClears++;
2592
2663
 
2593
2664
  // If this regex causes continuous cache thrashing, permanently fall back to NFA
2594
2665
  // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2595
2666
  if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2596
2667
  this.failed = true;
2668
+ this.stateCache.clear();
2669
+ this.stateCount = 0;
2670
+ this.startState = null;
2671
+ return null;
2672
+ }
2673
+ this.evictCache();
2674
+
2675
+ // After eviction, the bucket reference might be stale or empty.
2676
+ // We must re-fetch or re-create the bucket.
2677
+ bucket = this.stateCache.get(hash);
2678
+ if (!bucket) {
2679
+ bucket = [];
2680
+ this.stateCache.set(hash, bucket);
2597
2681
  }
2598
- return null;
2599
2682
  }
2600
2683
 
2601
2684
  // State not found, create it and add to bucket
2602
2685
  const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2686
+ state.lastSeen = ++this.clock;
2603
2687
  bucket.push(state);
2604
2688
  this.stateCount++;
2605
2689
  return state;
2606
2690
  }
2691
+ evictCache() {
2692
+ const allStates = [];
2693
+ for (const bucket of this.stateCache.values()) {
2694
+ for (let i = 0; i < bucket.length; i++) {
2695
+ allStates.push(bucket[i]);
2696
+ }
2697
+ }
2698
+
2699
+ // Sort ascending by lastSeen (oldest first)
2700
+ allStates.sort((a, b) => a.lastSeen - b.lastSeen);
2701
+
2702
+ // Keep the newest 50%
2703
+ const keepCount = Math.max(1, Math.floor(this.stateLimit / 2));
2704
+ const startIndex = allStates.length - keepCount;
2705
+ const survivorsArray = allStates.slice(startIndex);
2706
+ const survivors = new Set(survivorsArray);
2707
+ this.stateCache.clear();
2708
+ this.stateCount = 0;
2709
+ for (let i = 0; i < survivorsArray.length; i++) {
2710
+ const state = survivorsArray[i];
2711
+
2712
+ // Sever ties to all states to prevent memory leaks and dangling pointers
2713
+ state.nextLatin1.fill(null);
2714
+ state.nextLatin1Anchored.fill(null);
2715
+ // zero-allocation cleanup
2716
+ state.transKeys.length = 0;
2717
+ state.transVals.length = 0;
2718
+ const hash = hashPCs(state.nfaStates);
2719
+ let bucket = this.stateCache.get(hash);
2720
+ if (!bucket) {
2721
+ bucket = [];
2722
+ this.stateCache.set(hash, bucket);
2723
+ }
2724
+ bucket.push(state);
2725
+ this.stateCount++;
2726
+ }
2727
+
2728
+ // Start state must either be preserved or nullified so it gets re-created
2729
+ if (this.startState && !survivors.has(this.startState)) {
2730
+ this.startState = null;
2731
+ }
2732
+ }
2607
2733
 
2608
2734
  // Compute the next DFA state given a current state and a character
2609
2735
  step(state, charCode, anchor) {
2610
- // OPTIMIZATION: ASCII Fast-Path
2611
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2612
- const next = state.nextAscii[charCode];
2613
- if (next !== null) {
2614
- return next;
2736
+ // OPTIMIZATION: Latin-1 Array Fast-Path
2737
+ if (charCode <= Unicode.MAX_LATIN1) {
2738
+ if (anchor === RE2Flags.UNANCHORED) {
2739
+ const next = state.nextLatin1[charCode];
2740
+ if (next !== null) return next;
2741
+ } else {
2742
+ const next = state.nextLatin1Anchored[charCode];
2743
+ if (next !== null) return next;
2615
2744
  }
2616
2745
  } else {
2746
+ // Dense Array Linear Search fallback for Runes > 255
2617
2747
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2618
- if (state.nextMap.has(key)) {
2619
- return state.nextMap.get(key);
2748
+ // get [key] -> nextState
2749
+ const keys = state.transKeys;
2750
+ const len = keys.length;
2751
+ for (let i = 0; i < len; i++) {
2752
+ if (keys[i] === key) return state.transVals[i];
2620
2753
  }
2621
2754
  }
2622
2755
  const nextPCs = [];
@@ -2633,11 +2766,17 @@
2633
2766
  const nextState = this.getState(nextPCs);
2634
2767
 
2635
2768
  // Cache the result
2636
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2637
- state.nextAscii[charCode] = nextState;
2769
+ if (charCode <= Unicode.MAX_LATIN1) {
2770
+ if (anchor === RE2Flags.UNANCHORED) {
2771
+ state.nextLatin1[charCode] = nextState;
2772
+ } else {
2773
+ state.nextLatin1Anchored[charCode] = nextState;
2774
+ }
2638
2775
  } else {
2639
2776
  const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2640
- state.nextMap.set(key, nextState);
2777
+ // store key -> nextState
2778
+ state.transKeys.push(key);
2779
+ state.transVals.push(nextState);
2641
2780
  }
2642
2781
  return nextState;
2643
2782
  }
@@ -2670,10 +2809,11 @@
2670
2809
  if (width === 0) {
2671
2810
  break;
2672
2811
  }
2673
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2812
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2674
2813
 
2675
2814
  // If we hit an unrecoverable DFA error or bailout, signal fallback
2676
2815
  if (currentState === null) return null;
2816
+ currentState.lastSeen = ++this.clock;
2677
2817
  if (currentState.isMatch) {
2678
2818
  if (anchor === RE2Flags.ANCHOR_BOTH) {
2679
2819
  if (i + width === endPos) return true;
@@ -2721,9 +2861,10 @@
2721
2861
  const rune = r >> 3;
2722
2862
  const width = r & 7;
2723
2863
  if (width === 0) break;
2724
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2864
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_LATIN1 && currentState.nextLatin1[rune] || this.step(currentState, rune, anchor);
2725
2865
  if (currentState === null) return null; // Bailout to NFA
2726
2866
 
2867
+ currentState.lastSeen = ++this.clock;
2727
2868
  i += width;
2728
2869
  checkMatch(currentState, i);
2729
2870
  if (currentState.nfaStates.length === 0) {
@@ -2761,7 +2902,7 @@
2761
2902
  // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2762
2903
  const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2763
2904
  if (this.visited.length < visitedSize) {
2764
- this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2905
+ this.visited = new Uint32Array(visitedSize);
2765
2906
  } else {
2766
2907
  this.visited.fill(0, 0, visitedSize);
2767
2908
  }
@@ -2847,11 +2988,12 @@
2847
2988
  const outInst = re2.prog.getInst(inst.out);
2848
2989
  if (Inst.isRuneOp(outInst.op)) {
2849
2990
  this.push(re2, inst.arg, currentPos, false);
2850
- currentPc = inst.out;
2991
+ currentPc = inst.arg;
2992
+ currentPos = this.end;
2851
2993
  continue;
2852
2994
  }
2853
2995
  this.push(re2, inst.out, this.end, false);
2854
- currentPc = inst.arg;
2996
+ currentPc = inst.out;
2855
2997
  continue;
2856
2998
  }
2857
2999
  case Inst.RUNE:
@@ -2932,6 +3074,11 @@
2932
3074
  if (currentPos === this.end) return true;
2933
3075
  break;
2934
3076
  }
3077
+ case Inst.LB_WRITE:
3078
+ case Inst.LB_CHECK:
3079
+ {
3080
+ throw new RE2JSInternalException('Backtracker cannot evaluate Lookbehind instructions');
3081
+ }
2935
3082
  default:
2936
3083
  {
2937
3084
  throw new RE2JSInternalException('bad inst');
@@ -3200,7 +3347,9 @@
3200
3347
  }
3201
3348
  runes.sort((a, b) => a - b);
3202
3349
  } else {
3203
- runes.push(...inst.runes);
3350
+ for (let j = 0; j < inst.runes.length; j++) {
3351
+ runes.push(inst.runes[j]);
3352
+ }
3204
3353
  }
3205
3354
  onePassRunes[pc] = runes;
3206
3355
  inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
@@ -3368,6 +3517,10 @@
3368
3517
  switch (inst.op) {
3369
3518
  case Inst.MATCH:
3370
3519
  {
3520
+ // Verify ANCHOR_BOTH constraint before accepting the match
3521
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) {
3522
+ return null;
3523
+ }
3371
3524
  matched = true;
3372
3525
  if (matchcap.length > 0) {
3373
3526
  matchcap[0] = 0;
@@ -3798,6 +3951,88 @@
3798
3951
  }
3799
3952
  }
3800
3953
 
3954
+ // High-speed, single-pass Aho-Corasick string matcher optimized for V8.
3955
+ // Builds a trie with failure links to search for multiple prefixes simultaneously.
3956
+ class AhoCorasick {
3957
+ constructor(wordArrays) {
3958
+ this.next = [Object.create(null)];
3959
+ this.fail = [0];
3960
+ this.match = [false];
3961
+
3962
+ // Build Trie
3963
+ for (const word of wordArrays) {
3964
+ let node = 0;
3965
+ for (let i = 0; i < word.length; i++) {
3966
+ const val = word[i];
3967
+ if (!(val in this.next[node])) {
3968
+ this.next.push(Object.create(null));
3969
+ this.fail.push(0);
3970
+ this.match.push(false);
3971
+ this.next[node][val] = this.next.length - 1;
3972
+ }
3973
+ node = this.next[node][val];
3974
+ }
3975
+ this.match[node] = true;
3976
+ }
3977
+
3978
+ // Build Failure Links (BFS)
3979
+ const queue = [];
3980
+ for (const val in this.next[0]) {
3981
+ if (Object.prototype.hasOwnProperty.call(this.next[0], val)) {
3982
+ const child = this.next[0][val];
3983
+ this.fail[child] = 0;
3984
+ queue.push(child);
3985
+ }
3986
+ }
3987
+ while (queue.length > 0) {
3988
+ const curr = queue.shift();
3989
+ for (const val in this.next[curr]) {
3990
+ if (Object.prototype.hasOwnProperty.call(this.next[curr], val)) {
3991
+ const child = this.next[curr][val];
3992
+ let failNode = this.fail[curr];
3993
+ while (failNode !== 0 && !(val in this.next[failNode])) {
3994
+ failNode = this.fail[failNode];
3995
+ }
3996
+ if (val in this.next[failNode]) {
3997
+ this.fail[child] = this.next[failNode][val];
3998
+ } else {
3999
+ this.fail[child] = 0;
4000
+ }
4001
+ this.match[child] = this.match[child] || this.match[this.fail[child]];
4002
+ queue.push(child);
4003
+ }
4004
+ }
4005
+ }
4006
+ }
4007
+ searchUTF16(charSeq, start, end) {
4008
+ let node = 0;
4009
+ for (let i = start; i < end; i++) {
4010
+ const val = charSeq.charCodeAt(i);
4011
+ while (node !== 0 && !(val in this.next[node])) {
4012
+ node = this.fail[node];
4013
+ }
4014
+ if (val in this.next[node]) {
4015
+ node = this.next[node][val];
4016
+ }
4017
+ if (this.match[node]) return true;
4018
+ }
4019
+ return false;
4020
+ }
4021
+ searchUTF8(bytes, start, end) {
4022
+ let node = 0;
4023
+ for (let i = start; i < end; i++) {
4024
+ const val = bytes[i];
4025
+ while (node !== 0 && !(val in this.next[node])) {
4026
+ node = this.fail[node];
4027
+ }
4028
+ if (val in this.next[node]) {
4029
+ node = this.next[node][val];
4030
+ }
4031
+ if (this.match[node]) return true;
4032
+ }
4033
+ return false;
4034
+ }
4035
+ }
3801
4036
  class Prefilter {
3802
4037
  static Type = {
3803
4038
  NONE: 0,
@@ -3810,6 +4045,8 @@
3810
4045
  this.subs = [];
3811
4046
  this.str = '';
3812
4047
  this.bytes = null;
4048
+ this.ac16 = null;
4049
+ this.ac8 = null;
3813
4050
  }
3814
4051
  eval(input, pos) {
3815
4052
  switch (this.type) {
@@ -3823,6 +4060,10 @@
3823
4060
  }
3824
4061
  return true;
3825
4062
  case Prefilter.Type.OR:
4063
+ // Exploit Aho-Corasick if it was successfully built
4064
+ if (this.ac16 && this.ac8) {
4065
+ return input.hasAnyString(this, pos);
4066
+ }
3826
4067
  for (let i = 0; i < this.subs.length; i++) {
3827
4068
  if (this.subs[i].eval(input, pos)) return true;
3828
4069
  }
@@ -3913,7 +4154,9 @@
3913
4154
  const s = PrefilterTree.simplify(sub);
3914
4155
  if (s.type !== Prefilter.Type.NONE) {
3915
4156
  if (s.type === Prefilter.Type.AND) {
3916
- newSubs.push(...s.subs);
4157
+ for (let j = 0; j < s.subs.length; j++) {
4158
+ newSubs.push(s.subs[j]);
4159
+ }
3917
4160
  } else {
3918
4161
  newSubs.push(s);
3919
4162
  }
@@ -3933,7 +4176,9 @@
3933
4176
  return new Prefilter(Prefilter.Type.NONE);
3934
4177
  }
3935
4178
  if (s.type === Prefilter.Type.OR) {
3936
- newSubs.push(...s.subs);
4179
+ for (let j = 0; j < s.subs.length; j++) {
4180
+ newSubs.push(s.subs[j]);
4181
+ }
3937
4182
  } else {
3938
4183
  newSubs.push(s);
3939
4184
  }
@@ -3955,6 +4200,27 @@
3955
4200
  }
3956
4201
  }
3957
4202
  pf.subs = uniqueSubs;
4203
+
4204
+ // Build an Aho-Corasick automaton if all children are exact matches
4205
+ let allExact = true;
4206
+ for (const sub of uniqueSubs) {
4207
+ if (sub.type !== Prefilter.Type.EXACT) {
4208
+ allExact = false;
4209
+ break;
4210
+ }
4211
+ }
4212
+ if (allExact && uniqueSubs.length > 1) {
4213
+ const words16 = uniqueSubs.map(s => {
4214
+ const arr = [];
4215
+ for (let i = 0; i < s.str.length; i++) {
4216
+ arr.push(s.str.charCodeAt(i));
4217
+ }
4218
+ return arr;
4219
+ });
4220
+ pf.ac16 = new AhoCorasick(words16);
4221
+ const words8 = uniqueSubs.map(s => s.bytes);
4222
+ pf.ac8 = new AhoCorasick(words8);
4223
+ }
3958
4224
  return pf;
3959
4225
  }
3960
4226
  return pf;
@@ -4481,7 +4747,9 @@
4481
4747
  // Flatten nested concatenations
4482
4748
  if (nsub.op === Regexp.Op.CONCAT) {
4483
4749
  changed = true;
4484
- newSubs.push(...nsub.subs);
4750
+ for (let j = 0; j < nsub.subs.length; j++) {
4751
+ newSubs.push(nsub.subs[j]);
4752
+ }
4485
4753
  continue;
4486
4754
  }
4487
4755
  } else if (re.op === Regexp.Op.ALTERNATE) {
@@ -4493,7 +4761,9 @@
4493
4761
  // Flatten nested alternations
4494
4762
  if (nsub.op === Regexp.Op.ALTERNATE) {
4495
4763
  changed = true;
4496
- newSubs.push(...nsub.subs);
4764
+ for (let j = 0; j < nsub.subs.length; j++) {
4765
+ newSubs.push(nsub.subs[j]);
4766
+ }
4497
4767
  continue;
4498
4768
  }
4499
4769
  }
@@ -5503,7 +5773,10 @@
5503
5773
  return t.pop();
5504
5774
  }
5505
5775
  static concatRunes(x, y) {
5506
- return [...x, ...y];
5776
+ for (let i = 0; i < y.length; i++) {
5777
+ x.push(y[i]);
5778
+ }
5779
+ return x;
5507
5780
  }
5508
5781
  constructor(wholeRegexp, flags = 0) {
5509
5782
  this.wholeRegexp = wholeRegexp;
@@ -5539,8 +5812,8 @@
5539
5812
  return re;
5540
5813
  }
5541
5814
  reuse(re) {
5542
- if (this.height !== null && Object.prototype.hasOwnProperty.call(this.height, re)) {
5543
- delete this.height[re];
5815
+ if (this.height !== null && this.height.has(re)) {
5816
+ this.height.delete(re);
5544
5817
  }
5545
5818
  if (re.subs !== null && re.subs.length > 0) {
5546
5819
  re.subs[0] = this.free;
@@ -5572,20 +5845,20 @@
5572
5845
  if (n <= 0) {
5573
5846
  n = 1;
5574
5847
  }
5575
- if (n > Parser.MAX_SIZE / this.repeats) {
5848
+ if (n > Math.floor(Parser.MAX_SIZE / this.repeats)) {
5576
5849
  this.repeats = Parser.MAX_SIZE;
5577
5850
  } else {
5578
5851
  this.repeats *= n;
5579
5852
  }
5580
5853
  }
5581
- if (this.numRegexp < Parser.MAX_SIZE / this.repeats) {
5854
+ if (this.numRegexp < Math.floor(Parser.MAX_SIZE / this.repeats)) {
5582
5855
  return;
5583
5856
  }
5584
5857
 
5585
5858
  // We need to start tracking size.
5586
5859
  // Make the map and belatedly populate it
5587
5860
  // with info about everything we've constructed so far.
5588
- this.size = Object.create(null);
5861
+ this.size = new Map();
5589
5862
  for (let reEx of this.stack) {
5590
5863
  this.checkSize(reEx);
5591
5864
  }
@@ -5596,8 +5869,8 @@
5596
5869
  }
5597
5870
  calcSize(re, force = false) {
5598
5871
  if (!force && this.size !== null) {
5599
- if (Object.prototype.hasOwnProperty.call(this.size, re)) {
5600
- return this.size[re];
5872
+ if (this.size.has(re)) {
5873
+ return this.size.get(re);
5601
5874
  }
5602
5875
  }
5603
5876
  let size = 0;
@@ -5657,9 +5930,9 @@
5657
5930
  }
5658
5931
  size = Math.max(1, size);
5659
5932
  if (this.size === null) {
5660
- this.size = Object.create(null);
5933
+ this.size = new Map();
5661
5934
  }
5662
- this.size[re] = size;
5935
+ this.size.set(re, size);
5663
5936
  return size;
5664
5937
  }
5665
5938
  checkHeight(re) {
@@ -5667,7 +5940,7 @@
5667
5940
  return;
5668
5941
  }
5669
5942
  if (this.height === null) {
5670
- this.height = Object.create(null);
5943
+ this.height = new Map();
5671
5944
  for (let reEx of this.stack) {
5672
5945
  this.checkHeight(reEx);
5673
5946
  }
@@ -5678,8 +5951,8 @@
5678
5951
  }
5679
5952
  calcHeight(re, force = false) {
5680
5953
  if (!force && this.height !== null) {
5681
- if (Object.prototype.hasOwnProperty.call(this.height, re)) {
5682
- return this.height[re];
5954
+ if (this.height.has(re)) {
5955
+ return this.height.get(re);
5683
5956
  }
5684
5957
  }
5685
5958
  let h = 1;
@@ -5690,9 +5963,9 @@
5690
5963
  }
5691
5964
  }
5692
5965
  if (this.height === null) {
5693
- this.height = Object.create(null);
5966
+ this.height = new Map();
5694
5967
  }
5695
- this.height[re] = h;
5968
+ this.height.set(re, h);
5696
5969
  return h;
5697
5970
  }
5698
5971
 
@@ -7814,7 +8087,7 @@
7814
8087
  changed = true;
7815
8088
  continue;
7816
8089
  } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
7817
- if (i + 3 >= size || data[i + 3] !== '=' && data[i + 3] !== '!') {
8090
+ if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
7818
8091
  result += '(?P<';
7819
8092
  i += 3;
7820
8093
  changed = true;