re2js 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v1.2.3
5
+ * @version v1.3.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -841,6 +841,181 @@ class MatcherInput {
841
841
  }
842
842
  }
843
843
 
844
+ /**
845
+ * MachineInput abstracts different representations of the input text supplied to the Machine. It
846
+ * provides one-character lookahead.
847
+ */
848
+ class MachineInputBase {
849
+ static EOF() {
850
+ return -1 << 3;
851
+ }
852
+
853
+ // can we look ahead without losing info?
854
+ canCheckPrefix() {
855
+ return true;
856
+ }
857
+
858
+ // Returns the end position in the same units as step().
859
+ endPos() {
860
+ return this.end;
861
+ }
862
+ }
863
+
864
+ // An implementation of MachineInput for UTF-8 byte arrays.
865
+ // |pos| and |width| are byte indices.
866
+ class MachineUTF8Input extends MachineInputBase {
867
+ constructor(bytes, start = 0, end = bytes.length) {
868
+ super();
869
+ this.bytes = bytes;
870
+ this.start = start;
871
+ this.end = end;
872
+ }
873
+
874
+ // Returns the rune at the specified index; the units are
875
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
876
+ // indices. Returns the width (in the same units) of the rune in
877
+ // the lower 3 bits, and the rune (Unicode code point) in the high
878
+ // bits. Never negative, except for EOF which is represented as -1
879
+ // << 3 | 0.
880
+ step(i) {
881
+ i += this.start;
882
+ if (i >= this.end) {
883
+ return MachineInputBase.EOF();
884
+ }
885
+ let x = this.bytes[i++] & 255;
886
+ if ((x & 128) === 0) {
887
+ return x << 3 | 1;
888
+ } else if ((x & 224) === 192) {
889
+ x = x & 31;
890
+ if (i >= this.end) {
891
+ return MachineInputBase.EOF();
892
+ }
893
+ x = x << 6 | this.bytes[i++] & 63;
894
+ return x << 3 | 2;
895
+ } else if ((x & 240) === 224) {
896
+ x = x & 15;
897
+ if (i + 1 >= this.end) {
898
+ return MachineInputBase.EOF();
899
+ }
900
+ x = x << 6 | this.bytes[i++] & 63;
901
+ x = x << 6 | this.bytes[i++] & 63;
902
+ return x << 3 | 3;
903
+ } else {
904
+ x = x & 7;
905
+ if (i + 2 >= this.end) {
906
+ return MachineInputBase.EOF();
907
+ }
908
+ x = x << 6 | this.bytes[i++] & 63;
909
+ x = x << 6 | this.bytes[i++] & 63;
910
+ x = x << 6 | this.bytes[i++] & 63;
911
+ return x << 3 | 4;
912
+ }
913
+ }
914
+
915
+ // Returns the index relative to |pos| at which |re2.prefix| is found
916
+ // in this input stream, or a negative value if not found.
917
+ index(re2, pos) {
918
+ pos += this.start;
919
+ const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
920
+ return i < 0 ? i : i - pos;
921
+ }
922
+
923
+ // Returns a bitmask of EMPTY_* flags.
924
+ context(pos) {
925
+ pos += this.start;
926
+ let r1 = -1;
927
+ if (pos > this.start && pos <= this.end) {
928
+ let start = pos - 1;
929
+ r1 = this.bytes[start--];
930
+ if (r1 >= 128) {
931
+ let lim = pos - 4;
932
+ if (lim < this.start) {
933
+ lim = this.start;
934
+ }
935
+ while (start >= lim && (this.bytes[start] & 192) === 128) {
936
+ start--;
937
+ }
938
+ if (start < this.start) {
939
+ start = this.start;
940
+ }
941
+ r1 = this.step(start) >> 3;
942
+ }
943
+ }
944
+ const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
945
+ return Utils.emptyOpContext(r1, r2);
946
+ }
947
+
948
+ // Returns the index of the first occurrence of array |target| within
949
+ // array |source| after |fromIndex|, or -1 if not found.
950
+ indexOf(source, target, fromIndex = 0) {
951
+ let targetLength = target.length;
952
+ if (targetLength === 0) {
953
+ return -1;
954
+ }
955
+ let sourceLength = source.length;
956
+ for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
957
+ for (let j = 0; j < targetLength; j++) {
958
+ if (source[i + j] !== target[j]) {
959
+ break;
960
+ } else if (j === targetLength - 1) {
961
+ return i;
962
+ }
963
+ }
964
+ }
965
+ return -1;
966
+ }
967
+ }
968
+
969
+ // |pos| and |width| are in JS "char" units.
970
+ class MachineUTF16Input extends MachineInputBase {
971
+ constructor(charSequence, start = 0, end = charSequence.length) {
972
+ super();
973
+ this.charSequence = charSequence;
974
+ this.start = start;
975
+ this.end = end;
976
+ }
977
+
978
+ // Returns the rune at the specified index; the units are
979
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
980
+ // indices. Returns the width (in the same units) of the rune in
981
+ // the lower 3 bits, and the rune (Unicode code point) in the high
982
+ // bits. Never negative, except for EOF which is represented as -1
983
+ // << 3 | 0.
984
+ step(pos) {
985
+ pos += this.start;
986
+ if (pos < this.end) {
987
+ const rune = this.charSequence.codePointAt(pos);
988
+ return rune << 3 | Utils.charCount(rune);
989
+ } else {
990
+ return MachineInputBase.EOF();
991
+ }
992
+ }
993
+
994
+ // Returns the index relative to |pos| at which |re2.prefix| is found
995
+ // in this input stream, or a negative value if not found.
996
+ index(re2, pos) {
997
+ pos += this.start;
998
+ const i = this.charSequence.indexOf(re2.prefix, pos);
999
+ return i < 0 ? i : i - pos;
1000
+ }
1001
+
1002
+ // Returns a bitmask of EMPTY_* flags.
1003
+ context(pos) {
1004
+ pos += this.start;
1005
+ const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1006
+ const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1007
+ return Utils.emptyOpContext(r1, r2);
1008
+ }
1009
+ }
1010
+ class MachineInput {
1011
+ static fromUTF8(bytes, start = 0, end = bytes.length) {
1012
+ return new MachineUTF8Input(bytes, start, end);
1013
+ }
1014
+ static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1015
+ return new MachineUTF16Input(charSequence, start, end);
1016
+ }
1017
+ }
1018
+
844
1019
  class RE2JSException extends Error {
845
1020
  /** @param {string} message */
846
1021
  constructor(message) {
@@ -1477,181 +1652,6 @@ class Matcher {
1477
1652
  }
1478
1653
  }
1479
1654
 
1480
- /**
1481
- * MachineInput abstracts different representations of the input text supplied to the Machine. It
1482
- * provides one-character lookahead.
1483
- */
1484
- class MachineInputBase {
1485
- static EOF() {
1486
- return -1 << 3;
1487
- }
1488
-
1489
- // can we look ahead without losing info?
1490
- canCheckPrefix() {
1491
- return true;
1492
- }
1493
-
1494
- // Returns the end position in the same units as step().
1495
- endPos() {
1496
- return this.end;
1497
- }
1498
- }
1499
-
1500
- // An implementation of MachineInput for UTF-8 byte arrays.
1501
- // |pos| and |width| are byte indices.
1502
- class MachineUTF8Input extends MachineInputBase {
1503
- constructor(bytes, start = 0, end = bytes.length) {
1504
- super();
1505
- this.bytes = bytes;
1506
- this.start = start;
1507
- this.end = end;
1508
- }
1509
-
1510
- // Returns the rune at the specified index; the units are
1511
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1512
- // indices. Returns the width (in the same units) of the rune in
1513
- // the lower 3 bits, and the rune (Unicode code point) in the high
1514
- // bits. Never negative, except for EOF which is represented as -1
1515
- // << 3 | 0.
1516
- step(i) {
1517
- i += this.start;
1518
- if (i >= this.end) {
1519
- return MachineInputBase.EOF();
1520
- }
1521
- let x = this.bytes[i++] & 255;
1522
- if ((x & 128) === 0) {
1523
- return x << 3 | 1;
1524
- } else if ((x & 224) === 192) {
1525
- x = x & 31;
1526
- if (i >= this.end) {
1527
- return MachineInputBase.EOF();
1528
- }
1529
- x = x << 6 | this.bytes[i++] & 63;
1530
- return x << 3 | 2;
1531
- } else if ((x & 240) === 224) {
1532
- x = x & 15;
1533
- if (i + 1 >= this.end) {
1534
- return MachineInputBase.EOF();
1535
- }
1536
- x = x << 6 | this.bytes[i++] & 63;
1537
- x = x << 6 | this.bytes[i++] & 63;
1538
- return x << 3 | 3;
1539
- } else {
1540
- x = x & 7;
1541
- if (i + 2 >= this.end) {
1542
- return MachineInputBase.EOF();
1543
- }
1544
- x = x << 6 | this.bytes[i++] & 63;
1545
- x = x << 6 | this.bytes[i++] & 63;
1546
- x = x << 6 | this.bytes[i++] & 63;
1547
- return x << 3 | 4;
1548
- }
1549
- }
1550
-
1551
- // Returns the index relative to |pos| at which |re2.prefix| is found
1552
- // in this input stream, or a negative value if not found.
1553
- index(re2, pos) {
1554
- pos += this.start;
1555
- const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
1556
- return i < 0 ? i : i - pos;
1557
- }
1558
-
1559
- // Returns a bitmask of EMPTY_* flags.
1560
- context(pos) {
1561
- pos += this.start;
1562
- let r1 = -1;
1563
- if (pos > this.start && pos <= this.end) {
1564
- let start = pos - 1;
1565
- r1 = this.bytes[start--];
1566
- if (r1 >= 128) {
1567
- let lim = pos - 4;
1568
- if (lim < this.start) {
1569
- lim = this.start;
1570
- }
1571
- while (start >= lim && (this.bytes[start] & 192) === 128) {
1572
- start--;
1573
- }
1574
- if (start < this.start) {
1575
- start = this.start;
1576
- }
1577
- r1 = this.step(start) >> 3;
1578
- }
1579
- }
1580
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1581
- return Utils.emptyOpContext(r1, r2);
1582
- }
1583
-
1584
- // Returns the index of the first occurrence of array |target| within
1585
- // array |source| after |fromIndex|, or -1 if not found.
1586
- indexOf(source, target, fromIndex = 0) {
1587
- let targetLength = target.length;
1588
- if (targetLength === 0) {
1589
- return -1;
1590
- }
1591
- let sourceLength = source.length;
1592
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1593
- for (let j = 0; j < targetLength; j++) {
1594
- if (source[i + j] !== target[j]) {
1595
- break;
1596
- } else if (j === targetLength - 1) {
1597
- return i;
1598
- }
1599
- }
1600
- }
1601
- return -1;
1602
- }
1603
- }
1604
-
1605
- // |pos| and |width| are in JS "char" units.
1606
- class MachineUTF16Input extends MachineInputBase {
1607
- constructor(charSequence, start = 0, end = charSequence.length) {
1608
- super();
1609
- this.charSequence = charSequence;
1610
- this.start = start;
1611
- this.end = end;
1612
- }
1613
-
1614
- // Returns the rune at the specified index; the units are
1615
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1616
- // indices. Returns the width (in the same units) of the rune in
1617
- // the lower 3 bits, and the rune (Unicode code point) in the high
1618
- // bits. Never negative, except for EOF which is represented as -1
1619
- // << 3 | 0.
1620
- step(pos) {
1621
- pos += this.start;
1622
- if (pos < this.end) {
1623
- const rune = this.charSequence.codePointAt(pos);
1624
- return rune << 3 | Utils.charCount(rune);
1625
- } else {
1626
- return MachineInputBase.EOF();
1627
- }
1628
- }
1629
-
1630
- // Returns the index relative to |pos| at which |re2.prefix| is found
1631
- // in this input stream, or a negative value if not found.
1632
- index(re2, pos) {
1633
- pos += this.start;
1634
- const i = this.charSequence.indexOf(re2.prefix, pos);
1635
- return i < 0 ? i : i - pos;
1636
- }
1637
-
1638
- // Returns a bitmask of EMPTY_* flags.
1639
- context(pos) {
1640
- pos += this.start;
1641
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1642
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1643
- return Utils.emptyOpContext(r1, r2);
1644
- }
1645
- }
1646
- class MachineInput {
1647
- static fromUTF8(bytes, start = 0, end = bytes.length) {
1648
- return new MachineUTF8Input(bytes, start, end);
1649
- }
1650
- static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1651
- return new MachineUTF16Input(charSequence, start, end);
1652
- }
1653
- }
1654
-
1655
1655
  /**
1656
1656
  * A single instruction in the regular expression virtual machine.
1657
1657
  *
@@ -2110,6 +2110,204 @@ class Machine {
2110
2110
  }
2111
2111
  }
2112
2112
 
2113
+ // FNV-1a 32-bit hash for an array of integers.
2114
+ // Extremely fast, allocates no memory, and produces good distribution.
2115
+ const hashPCs = pcs => {
2116
+ let h = -2128831035; // 0x811c9dc5 (32-bit signed offset basis)
2117
+ for (let i = 0; i < pcs.length; i++) {
2118
+ h ^= pcs[i];
2119
+ h = Math.imul(h, 16777619); // 0x01000193 (FNV prime)
2120
+ }
2121
+ return h;
2122
+ };
2123
+
2124
+ // Zero-allocation array comparison for hash collision resolution
2125
+ const arraysEqual = (a, b) => {
2126
+ if (a.length !== b.length) return false;
2127
+ for (let i = 0; i < a.length; i++) {
2128
+ if (a[i] !== b[i]) return false;
2129
+ }
2130
+ return true;
2131
+ };
2132
+ class DFAState {
2133
+ constructor(nfaStates, isMatch) {
2134
+ this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2135
+ this.isMatch = isMatch; // Boolean
2136
+ this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2137
+ this.nextMap = new Map(); // Cache of Char -> DFAState
2138
+ }
2139
+ }
2140
+ class DFA {
2141
+ constructor(prog) {
2142
+ this.prog = prog;
2143
+ this.stateCache = new Map(); // hash(number) -> DFAState[]
2144
+ this.stateCount = 0; // Tracks total states for memory limits
2145
+ this.startState = null;
2146
+ this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2147
+ }
2148
+
2149
+ // Follows epsilon (empty) transitions to find all reachable states without consuming a char
2150
+ computeClosure(pcs) {
2151
+ const closure = new Set();
2152
+ const stack = [...pcs];
2153
+ let isMatch = false;
2154
+ while (stack.length > 0) {
2155
+ const pc = stack.pop();
2156
+ if (closure.has(pc)) continue;
2157
+ closure.add(pc);
2158
+ const inst = this.prog.getInst(pc);
2159
+ switch (inst.op) {
2160
+ case Inst.MATCH:
2161
+ isMatch = true;
2162
+ break;
2163
+ case Inst.ALT:
2164
+ case Inst.ALT_MATCH:
2165
+ stack.push(inst.out);
2166
+ stack.push(inst.arg);
2167
+ break;
2168
+ case Inst.NOP:
2169
+ case Inst.CAPTURE:
2170
+ stack.push(inst.out);
2171
+ break;
2172
+ // Bailing out on complex empty-width assertions to keep DFA fast.
2173
+ // Engine will seamlessly fall back to the NFA.
2174
+ case Inst.EMPTY_WIDTH:
2175
+ return null;
2176
+ }
2177
+ }
2178
+ const sortedPCs = Int32Array.from(closure).sort();
2179
+ return {
2180
+ pcs: sortedPCs,
2181
+ isMatch
2182
+ };
2183
+ }
2184
+
2185
+ // Get or create a DFA state from a list of NFA PCs
2186
+ getState(pcs) {
2187
+ const closureResult = this.computeClosure(pcs);
2188
+ if (!closureResult) return null; // Bailout to NFA required
2189
+
2190
+ const sortedPCs = closureResult.pcs;
2191
+ const hash = hashPCs(sortedPCs);
2192
+
2193
+ // Lookup hash bucket
2194
+ let bucket = this.stateCache.get(hash);
2195
+ if (bucket) {
2196
+ // Resolve potential hash collisions
2197
+ for (let i = 0; i < bucket.length; i++) {
2198
+ const state = bucket[i];
2199
+ if (arraysEqual(state.nfaStates, sortedPCs)) {
2200
+ return state;
2201
+ }
2202
+ }
2203
+ } else {
2204
+ bucket = [];
2205
+ this.stateCache.set(hash, bucket);
2206
+ }
2207
+
2208
+ // Safety: prevent memory exhaustion from state explosion
2209
+ // We flush the cache and return null, which seamlessly routes execution to the NFA
2210
+ if (this.stateCount >= this.stateLimit) {
2211
+ this.stateCache.clear();
2212
+ this.stateCount = 0;
2213
+ this.startState = null;
2214
+ return null;
2215
+ }
2216
+
2217
+ // State not found, create it and add to bucket
2218
+ const state = new DFAState(sortedPCs, closureResult.isMatch);
2219
+ bucket.push(state);
2220
+ this.stateCount++;
2221
+ return state;
2222
+ }
2223
+
2224
+ // Compute the next DFA state given a current state and a character
2225
+ step(state, charCode, anchor) {
2226
+ // OPTIMIZATION: ASCII Fast-Path
2227
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2228
+ const next = state.nextAscii[charCode];
2229
+ if (next !== null) {
2230
+ return next;
2231
+ }
2232
+ } else {
2233
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2234
+ if (state.nextMap.has(key)) {
2235
+ return state.nextMap.get(key);
2236
+ }
2237
+ }
2238
+ const nextPCs = [];
2239
+ for (let i = 0; i < state.nfaStates.length; i++) {
2240
+ const pc = state.nfaStates[i];
2241
+ const inst = this.prog.getInst(pc);
2242
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2243
+ nextPCs.push(inst.out);
2244
+ }
2245
+ }
2246
+ if (anchor === RE2Flags.UNANCHORED) {
2247
+ nextPCs.push(this.prog.start);
2248
+ }
2249
+ const nextState = this.getState(nextPCs);
2250
+
2251
+ // Cache the result
2252
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2253
+ state.nextAscii[charCode] = nextState;
2254
+ } else {
2255
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2256
+ state.nextMap.set(key, nextState);
2257
+ }
2258
+ return nextState;
2259
+ }
2260
+
2261
+ // The hot loop: Execute the Lazy DFA
2262
+ match(input, pos, anchor) {
2263
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2264
+ return false;
2265
+ }
2266
+ if (!this.startState) {
2267
+ this.startState = this.getState([this.prog.start]);
2268
+ if (!this.startState) return null; // Fallback to NFA
2269
+ }
2270
+ let endPos = input.endPos();
2271
+ let currentState = this.startState;
2272
+ if (currentState.isMatch) {
2273
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2274
+ if (pos === endPos) return true;
2275
+ } else {
2276
+ return true;
2277
+ }
2278
+ }
2279
+ let i = pos;
2280
+ while (i < endPos) {
2281
+ const r = input.step(i);
2282
+ const rune = r >> 3;
2283
+ const width = r & 7;
2284
+
2285
+ // prevent infinite loop on EOF
2286
+ if (width === 0) {
2287
+ break;
2288
+ }
2289
+ currentState = this.step(currentState, rune, anchor);
2290
+
2291
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2292
+ if (currentState === null) return null;
2293
+ if (currentState.isMatch) {
2294
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2295
+ if (i + width === endPos) return true;
2296
+ } else {
2297
+ return true;
2298
+ }
2299
+ }
2300
+
2301
+ // If we hit a dead end, and anchored, fail early
2302
+ if (currentState.nfaStates.length === 0) {
2303
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2304
+ }
2305
+ i += width;
2306
+ }
2307
+ return false;
2308
+ }
2309
+ }
2310
+
2113
2311
  /**
2114
2312
  * Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this
2115
2313
  * corresponds to {@code syntax.regexp} in the Go implementation; Go's {@code regexp} is called
@@ -5242,6 +5440,22 @@ class RE2 {
5242
5440
  this.prefixComplete = false; // true if prefix is the entire regexp
5243
5441
  this.prefixRune = 0; // first rune in prefix
5244
5442
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5443
+ this.dfa = new DFA(prog); // Initialize the Lazy DFA
5444
+ }
5445
+ executeEngine(input, pos, anchor, ncap) {
5446
+ // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5447
+ // We must use the NFA.
5448
+ if (ncap > 0) {
5449
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5450
+ }
5451
+ const dfaResult = this.dfa.match(input, pos, anchor);
5452
+ if (dfaResult !== null) {
5453
+ // DFA succeeded (returned true or false)
5454
+ return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5455
+ }
5456
+
5457
+ // Fallback to NFA
5458
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5245
5459
  }
5246
5460
 
5247
5461
  /**
@@ -5326,10 +5540,10 @@ class RE2 {
5326
5540
  return this.expr;
5327
5541
  }
5328
5542
 
5329
- // doExecute() finds the leftmost match in the input and returns
5543
+ // doExecuteNFA() finds the leftmost match in the input and returns
5330
5544
  // the position of its subexpressions.
5331
5545
  // Derived from exec.go.
5332
- doExecute(input, pos, anchor, ncap) {
5546
+ doExecuteNFA(input, pos, anchor, ncap) {
5333
5547
  let m = this.get();
5334
5548
  // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
5335
5549
  // the bottom of the stack (i.e., next == null).
@@ -5347,7 +5561,7 @@ class RE2 {
5347
5561
  return cap;
5348
5562
  }
5349
5563
  match(s) {
5350
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5564
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5351
5565
  }
5352
5566
 
5353
5567
  /**
@@ -5375,7 +5589,7 @@ class RE2 {
5375
5589
  return [false, null];
5376
5590
  }
5377
5591
  const machineInput = input.isUTF16Encoding() ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end);
5378
- const groupMatch = this.doExecute(machineInput, start, anchor, 2 * ngroup);
5592
+ const groupMatch = this.executeEngine(machineInput, start, anchor, 2 * ngroup);
5379
5593
  if (groupMatch === null) {
5380
5594
  return [false, null];
5381
5595
  }
@@ -5387,7 +5601,7 @@ class RE2 {
5387
5601
  */
5388
5602
  // This is visible for testing.
5389
5603
  matchUTF8(b) {
5390
- return this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5604
+ return this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5391
5605
  }
5392
5606
 
5393
5607
  /**
@@ -5424,7 +5638,7 @@ class RE2 {
5424
5638
  const input = MachineInput.fromUTF16(src);
5425
5639
  let numReplaces = 0;
5426
5640
  while (searchPos <= src.length) {
5427
- const a = this.doExecute(input, searchPos, RE2Flags.UNANCHORED, 2);
5641
+ const a = this.executeEngine(input, searchPos, RE2Flags.UNANCHORED, 2);
5428
5642
  if (a === null || a.length === 0) {
5429
5643
  break;
5430
5644
  }
@@ -5482,7 +5696,7 @@ class RE2 {
5482
5696
  let i = 0;
5483
5697
  let prevMatchEnd = -1;
5484
5698
  while (i < n && pos <= end) {
5485
- const matches = this.doExecute(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5699
+ const matches = this.executeEngine(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5486
5700
  if (matches === null || matches.length === 0) {
5487
5701
  break;
5488
5702
  }
@@ -5553,7 +5767,7 @@ class RE2 {
5553
5767
  */
5554
5768
  // This is visible for testing.
5555
5769
  findUTF8(b) {
5556
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5770
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5557
5771
  if (a === null) {
5558
5772
  return null;
5559
5773
  }
@@ -5568,7 +5782,7 @@ class RE2 {
5568
5782
  */
5569
5783
  // This is visible for testing.
5570
5784
  findUTF8Index(b) {
5571
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5785
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5572
5786
  if (a === null) {
5573
5787
  return null;
5574
5788
  }
@@ -5585,7 +5799,7 @@ class RE2 {
5585
5799
  */
5586
5800
  // This is visible for testing.
5587
5801
  find(s) {
5588
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5802
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5589
5803
  if (a === null) {
5590
5804
  return '';
5591
5805
  }
@@ -5601,7 +5815,7 @@ class RE2 {
5601
5815
  */
5602
5816
  // This is visible for testing.
5603
5817
  findIndex(s) {
5604
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5818
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5605
5819
  }
5606
5820
 
5607
5821
  /**
@@ -5613,7 +5827,7 @@ class RE2 {
5613
5827
  */
5614
5828
  // This is visible for testing.
5615
5829
  findUTF8Submatch(b) {
5616
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5830
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5617
5831
  if (a === null) {
5618
5832
  return null;
5619
5833
  }
@@ -5635,7 +5849,7 @@ class RE2 {
5635
5849
  */
5636
5850
  // This is visible for testing.
5637
5851
  findUTF8SubmatchIndex(b) {
5638
- return this.pad(this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5852
+ return this.pad(this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5639
5853
  }
5640
5854
 
5641
5855
  /**
@@ -5647,7 +5861,7 @@ class RE2 {
5647
5861
  */
5648
5862
  // This is visible for testing.
5649
5863
  findSubmatch(s) {
5650
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5864
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5651
5865
  if (a === null) {
5652
5866
  return null;
5653
5867
  }
@@ -5669,7 +5883,7 @@ class RE2 {
5669
5883
  */
5670
5884
  // This is visible for testing.
5671
5885
  findSubmatchIndex(s) {
5672
- return this.pad(this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5886
+ return this.pad(this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5673
5887
  }
5674
5888
 
5675
5889
  /**
@@ -6121,6 +6335,39 @@ class RE2JS {
6121
6335
  return new Matcher(this, input);
6122
6336
  }
6123
6337
 
6338
+ /**
6339
+ * Tests whether the regular expression matches any part of the input string.
6340
+ * Performance Note: This method is highly optimized. Because it only returns
6341
+ * a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
6342
+ * and guarantees execution on the high-speed DFA engine whenever possible.
6343
+ *
6344
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6345
+ * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
6346
+ */
6347
+ test(input) {
6348
+ if (Array.isArray(input)) {
6349
+ // Reuse the existing UTF-8 fast-path method
6350
+ return this.re2Input.matchUTF8(input);
6351
+ }
6352
+
6353
+ // Reuse the existing UTF-16 fast-path method
6354
+ return this.re2Input.match(input);
6355
+ }
6356
+
6357
+ /**
6358
+ * Tests whether the regular expression matches the ENTIRE input string.
6359
+ * * **Performance Note:** This operates identically to `.matches()`, but is significantly
6360
+ * faster because it does not request capture group data. By requesting 0 capture groups,
6361
+ * it securely routes execution through the DFA fast-path.
6362
+ *
6363
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6364
+ * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
6365
+ */
6366
+ testExact(input) {
6367
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
6368
+ return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
6369
+ }
6370
+
6124
6371
  /**
6125
6372
  * Splits input around instances of the regular expression. It returns an array giving the strings
6126
6373
  * that occur before, between, and after instances of the regular expression.