re2js 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v1.2.3
5
+ * @version v1.3.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -843,6 +843,181 @@ class MatcherInput {
843
843
  }
844
844
  }
845
845
 
846
+ /**
847
+ * MachineInput abstracts different representations of the input text supplied to the Machine. It
848
+ * provides one-character lookahead.
849
+ */
850
+ class MachineInputBase {
851
+ static EOF() {
852
+ return -1 << 3;
853
+ }
854
+
855
+ // can we look ahead without losing info?
856
+ canCheckPrefix() {
857
+ return true;
858
+ }
859
+
860
+ // Returns the end position in the same units as step().
861
+ endPos() {
862
+ return this.end;
863
+ }
864
+ }
865
+
866
+ // An implementation of MachineInput for UTF-8 byte arrays.
867
+ // |pos| and |width| are byte indices.
868
+ class MachineUTF8Input extends MachineInputBase {
869
+ constructor(bytes, start = 0, end = bytes.length) {
870
+ super();
871
+ this.bytes = bytes;
872
+ this.start = start;
873
+ this.end = end;
874
+ }
875
+
876
+ // Returns the rune at the specified index; the units are
877
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
878
+ // indices. Returns the width (in the same units) of the rune in
879
+ // the lower 3 bits, and the rune (Unicode code point) in the high
880
+ // bits. Never negative, except for EOF which is represented as -1
881
+ // << 3 | 0.
882
+ step(i) {
883
+ i += this.start;
884
+ if (i >= this.end) {
885
+ return MachineInputBase.EOF();
886
+ }
887
+ let x = this.bytes[i++] & 255;
888
+ if ((x & 128) === 0) {
889
+ return x << 3 | 1;
890
+ } else if ((x & 224) === 192) {
891
+ x = x & 31;
892
+ if (i >= this.end) {
893
+ return MachineInputBase.EOF();
894
+ }
895
+ x = x << 6 | this.bytes[i++] & 63;
896
+ return x << 3 | 2;
897
+ } else if ((x & 240) === 224) {
898
+ x = x & 15;
899
+ if (i + 1 >= this.end) {
900
+ return MachineInputBase.EOF();
901
+ }
902
+ x = x << 6 | this.bytes[i++] & 63;
903
+ x = x << 6 | this.bytes[i++] & 63;
904
+ return x << 3 | 3;
905
+ } else {
906
+ x = x & 7;
907
+ if (i + 2 >= this.end) {
908
+ return MachineInputBase.EOF();
909
+ }
910
+ x = x << 6 | this.bytes[i++] & 63;
911
+ x = x << 6 | this.bytes[i++] & 63;
912
+ x = x << 6 | this.bytes[i++] & 63;
913
+ return x << 3 | 4;
914
+ }
915
+ }
916
+
917
+ // Returns the index relative to |pos| at which |re2.prefix| is found
918
+ // in this input stream, or a negative value if not found.
919
+ index(re2, pos) {
920
+ pos += this.start;
921
+ const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
922
+ return i < 0 ? i : i - pos;
923
+ }
924
+
925
+ // Returns a bitmask of EMPTY_* flags.
926
+ context(pos) {
927
+ pos += this.start;
928
+ let r1 = -1;
929
+ if (pos > this.start && pos <= this.end) {
930
+ let start = pos - 1;
931
+ r1 = this.bytes[start--];
932
+ if (r1 >= 128) {
933
+ let lim = pos - 4;
934
+ if (lim < this.start) {
935
+ lim = this.start;
936
+ }
937
+ while (start >= lim && (this.bytes[start] & 192) === 128) {
938
+ start--;
939
+ }
940
+ if (start < this.start) {
941
+ start = this.start;
942
+ }
943
+ r1 = this.step(start) >> 3;
944
+ }
945
+ }
946
+ const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
947
+ return Utils.emptyOpContext(r1, r2);
948
+ }
949
+
950
+ // Returns the index of the first occurrence of array |target| within
951
+ // array |source| after |fromIndex|, or -1 if not found.
952
+ indexOf(source, target, fromIndex = 0) {
953
+ let targetLength = target.length;
954
+ if (targetLength === 0) {
955
+ return -1;
956
+ }
957
+ let sourceLength = source.length;
958
+ for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
959
+ for (let j = 0; j < targetLength; j++) {
960
+ if (source[i + j] !== target[j]) {
961
+ break;
962
+ } else if (j === targetLength - 1) {
963
+ return i;
964
+ }
965
+ }
966
+ }
967
+ return -1;
968
+ }
969
+ }
970
+
971
+ // |pos| and |width| are in JS "char" units.
972
+ class MachineUTF16Input extends MachineInputBase {
973
+ constructor(charSequence, start = 0, end = charSequence.length) {
974
+ super();
975
+ this.charSequence = charSequence;
976
+ this.start = start;
977
+ this.end = end;
978
+ }
979
+
980
+ // Returns the rune at the specified index; the units are
981
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
982
+ // indices. Returns the width (in the same units) of the rune in
983
+ // the lower 3 bits, and the rune (Unicode code point) in the high
984
+ // bits. Never negative, except for EOF which is represented as -1
985
+ // << 3 | 0.
986
+ step(pos) {
987
+ pos += this.start;
988
+ if (pos < this.end) {
989
+ const rune = this.charSequence.codePointAt(pos);
990
+ return rune << 3 | Utils.charCount(rune);
991
+ } else {
992
+ return MachineInputBase.EOF();
993
+ }
994
+ }
995
+
996
+ // Returns the index relative to |pos| at which |re2.prefix| is found
997
+ // in this input stream, or a negative value if not found.
998
+ index(re2, pos) {
999
+ pos += this.start;
1000
+ const i = this.charSequence.indexOf(re2.prefix, pos);
1001
+ return i < 0 ? i : i - pos;
1002
+ }
1003
+
1004
+ // Returns a bitmask of EMPTY_* flags.
1005
+ context(pos) {
1006
+ pos += this.start;
1007
+ const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1008
+ const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1009
+ return Utils.emptyOpContext(r1, r2);
1010
+ }
1011
+ }
1012
+ class MachineInput {
1013
+ static fromUTF8(bytes, start = 0, end = bytes.length) {
1014
+ return new MachineUTF8Input(bytes, start, end);
1015
+ }
1016
+ static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1017
+ return new MachineUTF16Input(charSequence, start, end);
1018
+ }
1019
+ }
1020
+
846
1021
  class RE2JSException extends Error {
847
1022
  /** @param {string} message */
848
1023
  constructor(message) {
@@ -1479,181 +1654,6 @@ class Matcher {
1479
1654
  }
1480
1655
  }
1481
1656
 
1482
- /**
1483
- * MachineInput abstracts different representations of the input text supplied to the Machine. It
1484
- * provides one-character lookahead.
1485
- */
1486
- class MachineInputBase {
1487
- static EOF() {
1488
- return -1 << 3;
1489
- }
1490
-
1491
- // can we look ahead without losing info?
1492
- canCheckPrefix() {
1493
- return true;
1494
- }
1495
-
1496
- // Returns the end position in the same units as step().
1497
- endPos() {
1498
- return this.end;
1499
- }
1500
- }
1501
-
1502
- // An implementation of MachineInput for UTF-8 byte arrays.
1503
- // |pos| and |width| are byte indices.
1504
- class MachineUTF8Input extends MachineInputBase {
1505
- constructor(bytes, start = 0, end = bytes.length) {
1506
- super();
1507
- this.bytes = bytes;
1508
- this.start = start;
1509
- this.end = end;
1510
- }
1511
-
1512
- // Returns the rune at the specified index; the units are
1513
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1514
- // indices. Returns the width (in the same units) of the rune in
1515
- // the lower 3 bits, and the rune (Unicode code point) in the high
1516
- // bits. Never negative, except for EOF which is represented as -1
1517
- // << 3 | 0.
1518
- step(i) {
1519
- i += this.start;
1520
- if (i >= this.end) {
1521
- return MachineInputBase.EOF();
1522
- }
1523
- let x = this.bytes[i++] & 255;
1524
- if ((x & 128) === 0) {
1525
- return x << 3 | 1;
1526
- } else if ((x & 224) === 192) {
1527
- x = x & 31;
1528
- if (i >= this.end) {
1529
- return MachineInputBase.EOF();
1530
- }
1531
- x = x << 6 | this.bytes[i++] & 63;
1532
- return x << 3 | 2;
1533
- } else if ((x & 240) === 224) {
1534
- x = x & 15;
1535
- if (i + 1 >= this.end) {
1536
- return MachineInputBase.EOF();
1537
- }
1538
- x = x << 6 | this.bytes[i++] & 63;
1539
- x = x << 6 | this.bytes[i++] & 63;
1540
- return x << 3 | 3;
1541
- } else {
1542
- x = x & 7;
1543
- if (i + 2 >= this.end) {
1544
- return MachineInputBase.EOF();
1545
- }
1546
- x = x << 6 | this.bytes[i++] & 63;
1547
- x = x << 6 | this.bytes[i++] & 63;
1548
- x = x << 6 | this.bytes[i++] & 63;
1549
- return x << 3 | 4;
1550
- }
1551
- }
1552
-
1553
- // Returns the index relative to |pos| at which |re2.prefix| is found
1554
- // in this input stream, or a negative value if not found.
1555
- index(re2, pos) {
1556
- pos += this.start;
1557
- const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
1558
- return i < 0 ? i : i - pos;
1559
- }
1560
-
1561
- // Returns a bitmask of EMPTY_* flags.
1562
- context(pos) {
1563
- pos += this.start;
1564
- let r1 = -1;
1565
- if (pos > this.start && pos <= this.end) {
1566
- let start = pos - 1;
1567
- r1 = this.bytes[start--];
1568
- if (r1 >= 128) {
1569
- let lim = pos - 4;
1570
- if (lim < this.start) {
1571
- lim = this.start;
1572
- }
1573
- while (start >= lim && (this.bytes[start] & 192) === 128) {
1574
- start--;
1575
- }
1576
- if (start < this.start) {
1577
- start = this.start;
1578
- }
1579
- r1 = this.step(start) >> 3;
1580
- }
1581
- }
1582
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1583
- return Utils.emptyOpContext(r1, r2);
1584
- }
1585
-
1586
- // Returns the index of the first occurrence of array |target| within
1587
- // array |source| after |fromIndex|, or -1 if not found.
1588
- indexOf(source, target, fromIndex = 0) {
1589
- let targetLength = target.length;
1590
- if (targetLength === 0) {
1591
- return -1;
1592
- }
1593
- let sourceLength = source.length;
1594
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1595
- for (let j = 0; j < targetLength; j++) {
1596
- if (source[i + j] !== target[j]) {
1597
- break;
1598
- } else if (j === targetLength - 1) {
1599
- return i;
1600
- }
1601
- }
1602
- }
1603
- return -1;
1604
- }
1605
- }
1606
-
1607
- // |pos| and |width| are in JS "char" units.
1608
- class MachineUTF16Input extends MachineInputBase {
1609
- constructor(charSequence, start = 0, end = charSequence.length) {
1610
- super();
1611
- this.charSequence = charSequence;
1612
- this.start = start;
1613
- this.end = end;
1614
- }
1615
-
1616
- // Returns the rune at the specified index; the units are
1617
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1618
- // indices. Returns the width (in the same units) of the rune in
1619
- // the lower 3 bits, and the rune (Unicode code point) in the high
1620
- // bits. Never negative, except for EOF which is represented as -1
1621
- // << 3 | 0.
1622
- step(pos) {
1623
- pos += this.start;
1624
- if (pos < this.end) {
1625
- const rune = this.charSequence.codePointAt(pos);
1626
- return rune << 3 | Utils.charCount(rune);
1627
- } else {
1628
- return MachineInputBase.EOF();
1629
- }
1630
- }
1631
-
1632
- // Returns the index relative to |pos| at which |re2.prefix| is found
1633
- // in this input stream, or a negative value if not found.
1634
- index(re2, pos) {
1635
- pos += this.start;
1636
- const i = this.charSequence.indexOf(re2.prefix, pos);
1637
- return i < 0 ? i : i - pos;
1638
- }
1639
-
1640
- // Returns a bitmask of EMPTY_* flags.
1641
- context(pos) {
1642
- pos += this.start;
1643
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1644
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1645
- return Utils.emptyOpContext(r1, r2);
1646
- }
1647
- }
1648
- class MachineInput {
1649
- static fromUTF8(bytes, start = 0, end = bytes.length) {
1650
- return new MachineUTF8Input(bytes, start, end);
1651
- }
1652
- static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1653
- return new MachineUTF16Input(charSequence, start, end);
1654
- }
1655
- }
1656
-
1657
1657
  /**
1658
1658
  * A single instruction in the regular expression virtual machine.
1659
1659
  *
@@ -2112,6 +2112,204 @@ class Machine {
2112
2112
  }
2113
2113
  }
2114
2114
 
2115
+ // FNV-1a 32-bit hash for an array of integers.
2116
+ // Extremely fast, allocates no memory, and produces good distribution.
2117
+ const hashPCs = pcs => {
2118
+ let h = -2128831035; // 0x811c9dc5 (32-bit signed offset basis)
2119
+ for (let i = 0; i < pcs.length; i++) {
2120
+ h ^= pcs[i];
2121
+ h = Math.imul(h, 16777619); // 0x01000193 (FNV prime)
2122
+ }
2123
+ return h;
2124
+ };
2125
+
2126
+ // Zero-allocation array comparison for hash collision resolution
2127
+ const arraysEqual = (a, b) => {
2128
+ if (a.length !== b.length) return false;
2129
+ for (let i = 0; i < a.length; i++) {
2130
+ if (a[i] !== b[i]) return false;
2131
+ }
2132
+ return true;
2133
+ };
2134
+ class DFAState {
2135
+ constructor(nfaStates, isMatch) {
2136
+ this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2137
+ this.isMatch = isMatch; // Boolean
2138
+ this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2139
+ this.nextMap = new Map(); // Cache of Char -> DFAState
2140
+ }
2141
+ }
2142
+ class DFA {
2143
+ constructor(prog) {
2144
+ this.prog = prog;
2145
+ this.stateCache = new Map(); // hash(number) -> DFAState[]
2146
+ this.stateCount = 0; // Tracks total states for memory limits
2147
+ this.startState = null;
2148
+ this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2149
+ }
2150
+
2151
+ // Follows epsilon (empty) transitions to find all reachable states without consuming a char
2152
+ computeClosure(pcs) {
2153
+ const closure = new Set();
2154
+ const stack = [...pcs];
2155
+ let isMatch = false;
2156
+ while (stack.length > 0) {
2157
+ const pc = stack.pop();
2158
+ if (closure.has(pc)) continue;
2159
+ closure.add(pc);
2160
+ const inst = this.prog.getInst(pc);
2161
+ switch (inst.op) {
2162
+ case Inst.MATCH:
2163
+ isMatch = true;
2164
+ break;
2165
+ case Inst.ALT:
2166
+ case Inst.ALT_MATCH:
2167
+ stack.push(inst.out);
2168
+ stack.push(inst.arg);
2169
+ break;
2170
+ case Inst.NOP:
2171
+ case Inst.CAPTURE:
2172
+ stack.push(inst.out);
2173
+ break;
2174
+ // Bailing out on complex empty-width assertions to keep DFA fast.
2175
+ // Engine will seamlessly fall back to the NFA.
2176
+ case Inst.EMPTY_WIDTH:
2177
+ return null;
2178
+ }
2179
+ }
2180
+ const sortedPCs = Int32Array.from(closure).sort();
2181
+ return {
2182
+ pcs: sortedPCs,
2183
+ isMatch
2184
+ };
2185
+ }
2186
+
2187
+ // Get or create a DFA state from a list of NFA PCs
2188
+ getState(pcs) {
2189
+ const closureResult = this.computeClosure(pcs);
2190
+ if (!closureResult) return null; // Bailout to NFA required
2191
+
2192
+ const sortedPCs = closureResult.pcs;
2193
+ const hash = hashPCs(sortedPCs);
2194
+
2195
+ // Lookup hash bucket
2196
+ let bucket = this.stateCache.get(hash);
2197
+ if (bucket) {
2198
+ // Resolve potential hash collisions
2199
+ for (let i = 0; i < bucket.length; i++) {
2200
+ const state = bucket[i];
2201
+ if (arraysEqual(state.nfaStates, sortedPCs)) {
2202
+ return state;
2203
+ }
2204
+ }
2205
+ } else {
2206
+ bucket = [];
2207
+ this.stateCache.set(hash, bucket);
2208
+ }
2209
+
2210
+ // Safety: prevent memory exhaustion from state explosion
2211
+ // We flush the cache and return null, which seamlessly routes execution to the NFA
2212
+ if (this.stateCount >= this.stateLimit) {
2213
+ this.stateCache.clear();
2214
+ this.stateCount = 0;
2215
+ this.startState = null;
2216
+ return null;
2217
+ }
2218
+
2219
+ // State not found, create it and add to bucket
2220
+ const state = new DFAState(sortedPCs, closureResult.isMatch);
2221
+ bucket.push(state);
2222
+ this.stateCount++;
2223
+ return state;
2224
+ }
2225
+
2226
+ // Compute the next DFA state given a current state and a character
2227
+ step(state, charCode, anchor) {
2228
+ // OPTIMIZATION: ASCII Fast-Path
2229
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2230
+ const next = state.nextAscii[charCode];
2231
+ if (next !== null) {
2232
+ return next;
2233
+ }
2234
+ } else {
2235
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2236
+ if (state.nextMap.has(key)) {
2237
+ return state.nextMap.get(key);
2238
+ }
2239
+ }
2240
+ const nextPCs = [];
2241
+ for (let i = 0; i < state.nfaStates.length; i++) {
2242
+ const pc = state.nfaStates[i];
2243
+ const inst = this.prog.getInst(pc);
2244
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2245
+ nextPCs.push(inst.out);
2246
+ }
2247
+ }
2248
+ if (anchor === RE2Flags.UNANCHORED) {
2249
+ nextPCs.push(this.prog.start);
2250
+ }
2251
+ const nextState = this.getState(nextPCs);
2252
+
2253
+ // Cache the result
2254
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2255
+ state.nextAscii[charCode] = nextState;
2256
+ } else {
2257
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2258
+ state.nextMap.set(key, nextState);
2259
+ }
2260
+ return nextState;
2261
+ }
2262
+
2263
+ // The hot loop: Execute the Lazy DFA
2264
+ match(input, pos, anchor) {
2265
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2266
+ return false;
2267
+ }
2268
+ if (!this.startState) {
2269
+ this.startState = this.getState([this.prog.start]);
2270
+ if (!this.startState) return null; // Fallback to NFA
2271
+ }
2272
+ let endPos = input.endPos();
2273
+ let currentState = this.startState;
2274
+ if (currentState.isMatch) {
2275
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2276
+ if (pos === endPos) return true;
2277
+ } else {
2278
+ return true;
2279
+ }
2280
+ }
2281
+ let i = pos;
2282
+ while (i < endPos) {
2283
+ const r = input.step(i);
2284
+ const rune = r >> 3;
2285
+ const width = r & 7;
2286
+
2287
+ // prevent infinite loop on EOF
2288
+ if (width === 0) {
2289
+ break;
2290
+ }
2291
+ currentState = this.step(currentState, rune, anchor);
2292
+
2293
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2294
+ if (currentState === null) return null;
2295
+ if (currentState.isMatch) {
2296
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2297
+ if (i + width === endPos) return true;
2298
+ } else {
2299
+ return true;
2300
+ }
2301
+ }
2302
+
2303
+ // If we hit a dead end, and anchored, fail early
2304
+ if (currentState.nfaStates.length === 0) {
2305
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2306
+ }
2307
+ i += width;
2308
+ }
2309
+ return false;
2310
+ }
2311
+ }
2312
+
2115
2313
  /**
2116
2314
  * Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this
2117
2315
  * corresponds to {@code syntax.regexp} in the Go implementation; Go's {@code regexp} is called
@@ -5244,6 +5442,22 @@ class RE2 {
5244
5442
  this.prefixComplete = false; // true if prefix is the entire regexp
5245
5443
  this.prefixRune = 0; // first rune in prefix
5246
5444
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5445
+ this.dfa = new DFA(prog); // Initialize the Lazy DFA
5446
+ }
5447
+ executeEngine(input, pos, anchor, ncap) {
5448
+ // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5449
+ // We must use the NFA.
5450
+ if (ncap > 0) {
5451
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5452
+ }
5453
+ const dfaResult = this.dfa.match(input, pos, anchor);
5454
+ if (dfaResult !== null) {
5455
+ // DFA succeeded (returned true or false)
5456
+ return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5457
+ }
5458
+
5459
+ // Fallback to NFA
5460
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5247
5461
  }
5248
5462
 
5249
5463
  /**
@@ -5328,10 +5542,10 @@ class RE2 {
5328
5542
  return this.expr;
5329
5543
  }
5330
5544
 
5331
- // doExecute() finds the leftmost match in the input and returns
5545
+ // doExecuteNFA() finds the leftmost match in the input and returns
5332
5546
  // the position of its subexpressions.
5333
5547
  // Derived from exec.go.
5334
- doExecute(input, pos, anchor, ncap) {
5548
+ doExecuteNFA(input, pos, anchor, ncap) {
5335
5549
  let m = this.get();
5336
5550
  // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
5337
5551
  // the bottom of the stack (i.e., next == null).
@@ -5349,7 +5563,7 @@ class RE2 {
5349
5563
  return cap;
5350
5564
  }
5351
5565
  match(s) {
5352
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5566
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5353
5567
  }
5354
5568
 
5355
5569
  /**
@@ -5377,7 +5591,7 @@ class RE2 {
5377
5591
  return [false, null];
5378
5592
  }
5379
5593
  const machineInput = input.isUTF16Encoding() ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end);
5380
- const groupMatch = this.doExecute(machineInput, start, anchor, 2 * ngroup);
5594
+ const groupMatch = this.executeEngine(machineInput, start, anchor, 2 * ngroup);
5381
5595
  if (groupMatch === null) {
5382
5596
  return [false, null];
5383
5597
  }
@@ -5389,7 +5603,7 @@ class RE2 {
5389
5603
  */
5390
5604
  // This is visible for testing.
5391
5605
  matchUTF8(b) {
5392
- return this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5606
+ return this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5393
5607
  }
5394
5608
 
5395
5609
  /**
@@ -5426,7 +5640,7 @@ class RE2 {
5426
5640
  const input = MachineInput.fromUTF16(src);
5427
5641
  let numReplaces = 0;
5428
5642
  while (searchPos <= src.length) {
5429
- const a = this.doExecute(input, searchPos, RE2Flags.UNANCHORED, 2);
5643
+ const a = this.executeEngine(input, searchPos, RE2Flags.UNANCHORED, 2);
5430
5644
  if (a === null || a.length === 0) {
5431
5645
  break;
5432
5646
  }
@@ -5484,7 +5698,7 @@ class RE2 {
5484
5698
  let i = 0;
5485
5699
  let prevMatchEnd = -1;
5486
5700
  while (i < n && pos <= end) {
5487
- const matches = this.doExecute(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5701
+ const matches = this.executeEngine(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5488
5702
  if (matches === null || matches.length === 0) {
5489
5703
  break;
5490
5704
  }
@@ -5555,7 +5769,7 @@ class RE2 {
5555
5769
  */
5556
5770
  // This is visible for testing.
5557
5771
  findUTF8(b) {
5558
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5772
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5559
5773
  if (a === null) {
5560
5774
  return null;
5561
5775
  }
@@ -5570,7 +5784,7 @@ class RE2 {
5570
5784
  */
5571
5785
  // This is visible for testing.
5572
5786
  findUTF8Index(b) {
5573
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5787
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5574
5788
  if (a === null) {
5575
5789
  return null;
5576
5790
  }
@@ -5587,7 +5801,7 @@ class RE2 {
5587
5801
  */
5588
5802
  // This is visible for testing.
5589
5803
  find(s) {
5590
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5804
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5591
5805
  if (a === null) {
5592
5806
  return '';
5593
5807
  }
@@ -5603,7 +5817,7 @@ class RE2 {
5603
5817
  */
5604
5818
  // This is visible for testing.
5605
5819
  findIndex(s) {
5606
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5820
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5607
5821
  }
5608
5822
 
5609
5823
  /**
@@ -5615,7 +5829,7 @@ class RE2 {
5615
5829
  */
5616
5830
  // This is visible for testing.
5617
5831
  findUTF8Submatch(b) {
5618
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5832
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5619
5833
  if (a === null) {
5620
5834
  return null;
5621
5835
  }
@@ -5637,7 +5851,7 @@ class RE2 {
5637
5851
  */
5638
5852
  // This is visible for testing.
5639
5853
  findUTF8SubmatchIndex(b) {
5640
- return this.pad(this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5854
+ return this.pad(this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5641
5855
  }
5642
5856
 
5643
5857
  /**
@@ -5649,7 +5863,7 @@ class RE2 {
5649
5863
  */
5650
5864
  // This is visible for testing.
5651
5865
  findSubmatch(s) {
5652
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5866
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5653
5867
  if (a === null) {
5654
5868
  return null;
5655
5869
  }
@@ -5671,7 +5885,7 @@ class RE2 {
5671
5885
  */
5672
5886
  // This is visible for testing.
5673
5887
  findSubmatchIndex(s) {
5674
- return this.pad(this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5888
+ return this.pad(this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5675
5889
  }
5676
5890
 
5677
5891
  /**
@@ -6123,6 +6337,39 @@ class RE2JS {
6123
6337
  return new Matcher(this, input);
6124
6338
  }
6125
6339
 
6340
+ /**
6341
+ * Tests whether the regular expression matches any part of the input string.
6342
+ * Performance Note: This method is highly optimized. Because it only returns
6343
+ * a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
6344
+ * and guarantees execution on the high-speed DFA engine whenever possible.
6345
+ *
6346
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6347
+ * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
6348
+ */
6349
+ test(input) {
6350
+ if (Array.isArray(input)) {
6351
+ // Reuse the existing UTF-8 fast-path method
6352
+ return this.re2Input.matchUTF8(input);
6353
+ }
6354
+
6355
+ // Reuse the existing UTF-16 fast-path method
6356
+ return this.re2Input.match(input);
6357
+ }
6358
+
6359
+ /**
6360
+ * Tests whether the regular expression matches the ENTIRE input string.
6361
+ * * **Performance Note:** This operates identically to `.matches()`, but is significantly
6362
+ * faster because it does not request capture group data. By requesting 0 capture groups,
6363
+ * it securely routes execution through the DFA fast-path.
6364
+ *
6365
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6366
+ * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
6367
+ */
6368
+ testExact(input) {
6369
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
6370
+ return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
6371
+ }
6372
+
6126
6373
  /**
6127
6374
  * Splits input around instances of the regular expression. It returns an array giving the strings
6128
6375
  * that occur before, between, and after instances of the regular expression.