re2js 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v1.2.3
5
+ * @version v1.3.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -847,6 +847,181 @@
847
847
  }
848
848
  }
849
849
 
850
+ /**
851
+ * MachineInput abstracts different representations of the input text supplied to the Machine. It
852
+ * provides one-character lookahead.
853
+ */
854
+ class MachineInputBase {
855
+ static EOF() {
856
+ return -1 << 3;
857
+ }
858
+
859
+ // can we look ahead without losing info?
860
+ canCheckPrefix() {
861
+ return true;
862
+ }
863
+
864
+ // Returns the end position in the same units as step().
865
+ endPos() {
866
+ return this.end;
867
+ }
868
+ }
869
+
870
+ // An implementation of MachineInput for UTF-8 byte arrays.
871
+ // |pos| and |width| are byte indices.
872
+ class MachineUTF8Input extends MachineInputBase {
873
+ constructor(bytes, start = 0, end = bytes.length) {
874
+ super();
875
+ this.bytes = bytes;
876
+ this.start = start;
877
+ this.end = end;
878
+ }
879
+
880
+ // Returns the rune at the specified index; the units are
881
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
882
+ // indices. Returns the width (in the same units) of the rune in
883
+ // the lower 3 bits, and the rune (Unicode code point) in the high
884
+ // bits. Never negative, except for EOF which is represented as -1
885
+ // << 3 | 0.
886
+ step(i) {
887
+ i += this.start;
888
+ if (i >= this.end) {
889
+ return MachineInputBase.EOF();
890
+ }
891
+ let x = this.bytes[i++] & 255;
892
+ if ((x & 128) === 0) {
893
+ return x << 3 | 1;
894
+ } else if ((x & 224) === 192) {
895
+ x = x & 31;
896
+ if (i >= this.end) {
897
+ return MachineInputBase.EOF();
898
+ }
899
+ x = x << 6 | this.bytes[i++] & 63;
900
+ return x << 3 | 2;
901
+ } else if ((x & 240) === 224) {
902
+ x = x & 15;
903
+ if (i + 1 >= this.end) {
904
+ return MachineInputBase.EOF();
905
+ }
906
+ x = x << 6 | this.bytes[i++] & 63;
907
+ x = x << 6 | this.bytes[i++] & 63;
908
+ return x << 3 | 3;
909
+ } else {
910
+ x = x & 7;
911
+ if (i + 2 >= this.end) {
912
+ return MachineInputBase.EOF();
913
+ }
914
+ x = x << 6 | this.bytes[i++] & 63;
915
+ x = x << 6 | this.bytes[i++] & 63;
916
+ x = x << 6 | this.bytes[i++] & 63;
917
+ return x << 3 | 4;
918
+ }
919
+ }
920
+
921
+ // Returns the index relative to |pos| at which |re2.prefix| is found
922
+ // in this input stream, or a negative value if not found.
923
+ index(re2, pos) {
924
+ pos += this.start;
925
+ const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
926
+ return i < 0 ? i : i - pos;
927
+ }
928
+
929
+ // Returns a bitmask of EMPTY_* flags.
930
+ context(pos) {
931
+ pos += this.start;
932
+ let r1 = -1;
933
+ if (pos > this.start && pos <= this.end) {
934
+ let start = pos - 1;
935
+ r1 = this.bytes[start--];
936
+ if (r1 >= 128) {
937
+ let lim = pos - 4;
938
+ if (lim < this.start) {
939
+ lim = this.start;
940
+ }
941
+ while (start >= lim && (this.bytes[start] & 192) === 128) {
942
+ start--;
943
+ }
944
+ if (start < this.start) {
945
+ start = this.start;
946
+ }
947
+ r1 = this.step(start) >> 3;
948
+ }
949
+ }
950
+ const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
951
+ return Utils.emptyOpContext(r1, r2);
952
+ }
953
+
954
+ // Returns the index of the first occurrence of array |target| within
955
+ // array |source| after |fromIndex|, or -1 if not found.
956
+ indexOf(source, target, fromIndex = 0) {
957
+ let targetLength = target.length;
958
+ if (targetLength === 0) {
959
+ return -1;
960
+ }
961
+ let sourceLength = source.length;
962
+ for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
963
+ for (let j = 0; j < targetLength; j++) {
964
+ if (source[i + j] !== target[j]) {
965
+ break;
966
+ } else if (j === targetLength - 1) {
967
+ return i;
968
+ }
969
+ }
970
+ }
971
+ return -1;
972
+ }
973
+ }
974
+
975
+ // |pos| and |width| are in JS "char" units.
976
+ class MachineUTF16Input extends MachineInputBase {
977
+ constructor(charSequence, start = 0, end = charSequence.length) {
978
+ super();
979
+ this.charSequence = charSequence;
980
+ this.start = start;
981
+ this.end = end;
982
+ }
983
+
984
+ // Returns the rune at the specified index; the units are
985
+ // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
986
+ // indices. Returns the width (in the same units) of the rune in
987
+ // the lower 3 bits, and the rune (Unicode code point) in the high
988
+ // bits. Never negative, except for EOF which is represented as -1
989
+ // << 3 | 0.
990
+ step(pos) {
991
+ pos += this.start;
992
+ if (pos < this.end) {
993
+ const rune = this.charSequence.codePointAt(pos);
994
+ return rune << 3 | Utils.charCount(rune);
995
+ } else {
996
+ return MachineInputBase.EOF();
997
+ }
998
+ }
999
+
1000
+ // Returns the index relative to |pos| at which |re2.prefix| is found
1001
+ // in this input stream, or a negative value if not found.
1002
+ index(re2, pos) {
1003
+ pos += this.start;
1004
+ const i = this.charSequence.indexOf(re2.prefix, pos);
1005
+ return i < 0 ? i : i - pos;
1006
+ }
1007
+
1008
+ // Returns a bitmask of EMPTY_* flags.
1009
+ context(pos) {
1010
+ pos += this.start;
1011
+ const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1012
+ const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1013
+ return Utils.emptyOpContext(r1, r2);
1014
+ }
1015
+ }
1016
+ class MachineInput {
1017
+ static fromUTF8(bytes, start = 0, end = bytes.length) {
1018
+ return new MachineUTF8Input(bytes, start, end);
1019
+ }
1020
+ static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1021
+ return new MachineUTF16Input(charSequence, start, end);
1022
+ }
1023
+ }
1024
+
850
1025
  class RE2JSException extends Error {
851
1026
  /** @param {string} message */
852
1027
  constructor(message) {
@@ -1483,181 +1658,6 @@
1483
1658
  }
1484
1659
  }
1485
1660
 
1486
- /**
1487
- * MachineInput abstracts different representations of the input text supplied to the Machine. It
1488
- * provides one-character lookahead.
1489
- */
1490
- class MachineInputBase {
1491
- static EOF() {
1492
- return -1 << 3;
1493
- }
1494
-
1495
- // can we look ahead without losing info?
1496
- canCheckPrefix() {
1497
- return true;
1498
- }
1499
-
1500
- // Returns the end position in the same units as step().
1501
- endPos() {
1502
- return this.end;
1503
- }
1504
- }
1505
-
1506
- // An implementation of MachineInput for UTF-8 byte arrays.
1507
- // |pos| and |width| are byte indices.
1508
- class MachineUTF8Input extends MachineInputBase {
1509
- constructor(bytes, start = 0, end = bytes.length) {
1510
- super();
1511
- this.bytes = bytes;
1512
- this.start = start;
1513
- this.end = end;
1514
- }
1515
-
1516
- // Returns the rune at the specified index; the units are
1517
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1518
- // indices. Returns the width (in the same units) of the rune in
1519
- // the lower 3 bits, and the rune (Unicode code point) in the high
1520
- // bits. Never negative, except for EOF which is represented as -1
1521
- // << 3 | 0.
1522
- step(i) {
1523
- i += this.start;
1524
- if (i >= this.end) {
1525
- return MachineInputBase.EOF();
1526
- }
1527
- let x = this.bytes[i++] & 255;
1528
- if ((x & 128) === 0) {
1529
- return x << 3 | 1;
1530
- } else if ((x & 224) === 192) {
1531
- x = x & 31;
1532
- if (i >= this.end) {
1533
- return MachineInputBase.EOF();
1534
- }
1535
- x = x << 6 | this.bytes[i++] & 63;
1536
- return x << 3 | 2;
1537
- } else if ((x & 240) === 224) {
1538
- x = x & 15;
1539
- if (i + 1 >= this.end) {
1540
- return MachineInputBase.EOF();
1541
- }
1542
- x = x << 6 | this.bytes[i++] & 63;
1543
- x = x << 6 | this.bytes[i++] & 63;
1544
- return x << 3 | 3;
1545
- } else {
1546
- x = x & 7;
1547
- if (i + 2 >= this.end) {
1548
- return MachineInputBase.EOF();
1549
- }
1550
- x = x << 6 | this.bytes[i++] & 63;
1551
- x = x << 6 | this.bytes[i++] & 63;
1552
- x = x << 6 | this.bytes[i++] & 63;
1553
- return x << 3 | 4;
1554
- }
1555
- }
1556
-
1557
- // Returns the index relative to |pos| at which |re2.prefix| is found
1558
- // in this input stream, or a negative value if not found.
1559
- index(re2, pos) {
1560
- pos += this.start;
1561
- const i = this.indexOf(this.bytes, re2.prefixUTF8, pos);
1562
- return i < 0 ? i : i - pos;
1563
- }
1564
-
1565
- // Returns a bitmask of EMPTY_* flags.
1566
- context(pos) {
1567
- pos += this.start;
1568
- let r1 = -1;
1569
- if (pos > this.start && pos <= this.end) {
1570
- let start = pos - 1;
1571
- r1 = this.bytes[start--];
1572
- if (r1 >= 128) {
1573
- let lim = pos - 4;
1574
- if (lim < this.start) {
1575
- lim = this.start;
1576
- }
1577
- while (start >= lim && (this.bytes[start] & 192) === 128) {
1578
- start--;
1579
- }
1580
- if (start < this.start) {
1581
- start = this.start;
1582
- }
1583
- r1 = this.step(start) >> 3;
1584
- }
1585
- }
1586
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1587
- return Utils.emptyOpContext(r1, r2);
1588
- }
1589
-
1590
- // Returns the index of the first occurrence of array |target| within
1591
- // array |source| after |fromIndex|, or -1 if not found.
1592
- indexOf(source, target, fromIndex = 0) {
1593
- let targetLength = target.length;
1594
- if (targetLength === 0) {
1595
- return -1;
1596
- }
1597
- let sourceLength = source.length;
1598
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1599
- for (let j = 0; j < targetLength; j++) {
1600
- if (source[i + j] !== target[j]) {
1601
- break;
1602
- } else if (j === targetLength - 1) {
1603
- return i;
1604
- }
1605
- }
1606
- }
1607
- return -1;
1608
- }
1609
- }
1610
-
1611
- // |pos| and |width| are in JS "char" units.
1612
- class MachineUTF16Input extends MachineInputBase {
1613
- constructor(charSequence, start = 0, end = charSequence.length) {
1614
- super();
1615
- this.charSequence = charSequence;
1616
- this.start = start;
1617
- this.end = end;
1618
- }
1619
-
1620
- // Returns the rune at the specified index; the units are
1621
- // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
1622
- // indices. Returns the width (in the same units) of the rune in
1623
- // the lower 3 bits, and the rune (Unicode code point) in the high
1624
- // bits. Never negative, except for EOF which is represented as -1
1625
- // << 3 | 0.
1626
- step(pos) {
1627
- pos += this.start;
1628
- if (pos < this.end) {
1629
- const rune = this.charSequence.codePointAt(pos);
1630
- return rune << 3 | Utils.charCount(rune);
1631
- } else {
1632
- return MachineInputBase.EOF();
1633
- }
1634
- }
1635
-
1636
- // Returns the index relative to |pos| at which |re2.prefix| is found
1637
- // in this input stream, or a negative value if not found.
1638
- index(re2, pos) {
1639
- pos += this.start;
1640
- const i = this.charSequence.indexOf(re2.prefix, pos);
1641
- return i < 0 ? i : i - pos;
1642
- }
1643
-
1644
- // Returns a bitmask of EMPTY_* flags.
1645
- context(pos) {
1646
- pos += this.start;
1647
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1648
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1649
- return Utils.emptyOpContext(r1, r2);
1650
- }
1651
- }
1652
- class MachineInput {
1653
- static fromUTF8(bytes, start = 0, end = bytes.length) {
1654
- return new MachineUTF8Input(bytes, start, end);
1655
- }
1656
- static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1657
- return new MachineUTF16Input(charSequence, start, end);
1658
- }
1659
- }
1660
-
1661
1661
  /**
1662
1662
  * A single instruction in the regular expression virtual machine.
1663
1663
  *
@@ -2116,6 +2116,204 @@
2116
2116
  }
2117
2117
  }
2118
2118
 
2119
+ // FNV-1a 32-bit hash for an array of integers.
2120
+ // Extremely fast, allocates no memory, and produces good distribution.
2121
+ const hashPCs = pcs => {
2122
+ let h = -2128831035; // 0x811c9dc5 (32-bit signed offset basis)
2123
+ for (let i = 0; i < pcs.length; i++) {
2124
+ h ^= pcs[i];
2125
+ h = Math.imul(h, 16777619); // 0x01000193 (FNV prime)
2126
+ }
2127
+ return h;
2128
+ };
2129
+
2130
+ // Zero-allocation array comparison for hash collision resolution
2131
+ const arraysEqual = (a, b) => {
2132
+ if (a.length !== b.length) return false;
2133
+ for (let i = 0; i < a.length; i++) {
2134
+ if (a[i] !== b[i]) return false;
2135
+ }
2136
+ return true;
2137
+ };
2138
+ class DFAState {
2139
+ constructor(nfaStates, isMatch) {
2140
+ this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2141
+ this.isMatch = isMatch; // Boolean
2142
+ this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2143
+ this.nextMap = new Map(); // Cache of Char -> DFAState
2144
+ }
2145
+ }
2146
+ class DFA {
2147
+ constructor(prog) {
2148
+ this.prog = prog;
2149
+ this.stateCache = new Map(); // hash(number) -> DFAState[]
2150
+ this.stateCount = 0; // Tracks total states for memory limits
2151
+ this.startState = null;
2152
+ this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2153
+ }
2154
+
2155
+ // Follows epsilon (empty) transitions to find all reachable states without consuming a char
2156
+ computeClosure(pcs) {
2157
+ const closure = new Set();
2158
+ const stack = [...pcs];
2159
+ let isMatch = false;
2160
+ while (stack.length > 0) {
2161
+ const pc = stack.pop();
2162
+ if (closure.has(pc)) continue;
2163
+ closure.add(pc);
2164
+ const inst = this.prog.getInst(pc);
2165
+ switch (inst.op) {
2166
+ case Inst.MATCH:
2167
+ isMatch = true;
2168
+ break;
2169
+ case Inst.ALT:
2170
+ case Inst.ALT_MATCH:
2171
+ stack.push(inst.out);
2172
+ stack.push(inst.arg);
2173
+ break;
2174
+ case Inst.NOP:
2175
+ case Inst.CAPTURE:
2176
+ stack.push(inst.out);
2177
+ break;
2178
+ // Bailing out on complex empty-width assertions to keep DFA fast.
2179
+ // Engine will seamlessly fall back to the NFA.
2180
+ case Inst.EMPTY_WIDTH:
2181
+ return null;
2182
+ }
2183
+ }
2184
+ const sortedPCs = Int32Array.from(closure).sort();
2185
+ return {
2186
+ pcs: sortedPCs,
2187
+ isMatch
2188
+ };
2189
+ }
2190
+
2191
+ // Get or create a DFA state from a list of NFA PCs
2192
+ getState(pcs) {
2193
+ const closureResult = this.computeClosure(pcs);
2194
+ if (!closureResult) return null; // Bailout to NFA required
2195
+
2196
+ const sortedPCs = closureResult.pcs;
2197
+ const hash = hashPCs(sortedPCs);
2198
+
2199
+ // Lookup hash bucket
2200
+ let bucket = this.stateCache.get(hash);
2201
+ if (bucket) {
2202
+ // Resolve potential hash collisions
2203
+ for (let i = 0; i < bucket.length; i++) {
2204
+ const state = bucket[i];
2205
+ if (arraysEqual(state.nfaStates, sortedPCs)) {
2206
+ return state;
2207
+ }
2208
+ }
2209
+ } else {
2210
+ bucket = [];
2211
+ this.stateCache.set(hash, bucket);
2212
+ }
2213
+
2214
+ // Safety: prevent memory exhaustion from state explosion
2215
+ // We flush the cache and return null, which seamlessly routes execution to the NFA
2216
+ if (this.stateCount >= this.stateLimit) {
2217
+ this.stateCache.clear();
2218
+ this.stateCount = 0;
2219
+ this.startState = null;
2220
+ return null;
2221
+ }
2222
+
2223
+ // State not found, create it and add to bucket
2224
+ const state = new DFAState(sortedPCs, closureResult.isMatch);
2225
+ bucket.push(state);
2226
+ this.stateCount++;
2227
+ return state;
2228
+ }
2229
+
2230
+ // Compute the next DFA state given a current state and a character
2231
+ step(state, charCode, anchor) {
2232
+ // OPTIMIZATION: ASCII Fast-Path
2233
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2234
+ const next = state.nextAscii[charCode];
2235
+ if (next !== null) {
2236
+ return next;
2237
+ }
2238
+ } else {
2239
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2240
+ if (state.nextMap.has(key)) {
2241
+ return state.nextMap.get(key);
2242
+ }
2243
+ }
2244
+ const nextPCs = [];
2245
+ for (let i = 0; i < state.nfaStates.length; i++) {
2246
+ const pc = state.nfaStates[i];
2247
+ const inst = this.prog.getInst(pc);
2248
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2249
+ nextPCs.push(inst.out);
2250
+ }
2251
+ }
2252
+ if (anchor === RE2Flags.UNANCHORED) {
2253
+ nextPCs.push(this.prog.start);
2254
+ }
2255
+ const nextState = this.getState(nextPCs);
2256
+
2257
+ // Cache the result
2258
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2259
+ state.nextAscii[charCode] = nextState;
2260
+ } else {
2261
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2262
+ state.nextMap.set(key, nextState);
2263
+ }
2264
+ return nextState;
2265
+ }
2266
+
2267
+ // The hot loop: Execute the Lazy DFA
2268
+ match(input, pos, anchor) {
2269
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2270
+ return false;
2271
+ }
2272
+ if (!this.startState) {
2273
+ this.startState = this.getState([this.prog.start]);
2274
+ if (!this.startState) return null; // Fallback to NFA
2275
+ }
2276
+ let endPos = input.endPos();
2277
+ let currentState = this.startState;
2278
+ if (currentState.isMatch) {
2279
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2280
+ if (pos === endPos) return true;
2281
+ } else {
2282
+ return true;
2283
+ }
2284
+ }
2285
+ let i = pos;
2286
+ while (i < endPos) {
2287
+ const r = input.step(i);
2288
+ const rune = r >> 3;
2289
+ const width = r & 7;
2290
+
2291
+ // prevent infinite loop on EOF
2292
+ if (width === 0) {
2293
+ break;
2294
+ }
2295
+ currentState = this.step(currentState, rune, anchor);
2296
+
2297
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2298
+ if (currentState === null) return null;
2299
+ if (currentState.isMatch) {
2300
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2301
+ if (i + width === endPos) return true;
2302
+ } else {
2303
+ return true;
2304
+ }
2305
+ }
2306
+
2307
+ // If we hit a dead end, and anchored, fail early
2308
+ if (currentState.nfaStates.length === 0) {
2309
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2310
+ }
2311
+ i += width;
2312
+ }
2313
+ return false;
2314
+ }
2315
+ }
2316
+
2119
2317
  /**
2120
2318
  * Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this
2121
2319
  * corresponds to {@code syntax.regexp} in the Go implementation; Go's {@code regexp} is called
@@ -5248,6 +5446,22 @@
5248
5446
  this.prefixComplete = false; // true if prefix is the entire regexp
5249
5447
  this.prefixRune = 0; // first rune in prefix
5250
5448
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5449
+ this.dfa = new DFA(prog); // Initialize the Lazy DFA
5450
+ }
5451
+ executeEngine(input, pos, anchor, ncap) {
5452
+ // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5453
+ // We must use the NFA.
5454
+ if (ncap > 0) {
5455
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5456
+ }
5457
+ const dfaResult = this.dfa.match(input, pos, anchor);
5458
+ if (dfaResult !== null) {
5459
+ // DFA succeeded (returned true or false)
5460
+ return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5461
+ }
5462
+
5463
+ // Fallback to NFA
5464
+ return this.doExecuteNFA(input, pos, anchor, ncap);
5251
5465
  }
5252
5466
 
5253
5467
  /**
@@ -5332,10 +5546,10 @@
5332
5546
  return this.expr;
5333
5547
  }
5334
5548
 
5335
- // doExecute() finds the leftmost match in the input and returns
5549
+ // doExecuteNFA() finds the leftmost match in the input and returns
5336
5550
  // the position of its subexpressions.
5337
5551
  // Derived from exec.go.
5338
- doExecute(input, pos, anchor, ncap) {
5552
+ doExecuteNFA(input, pos, anchor, ncap) {
5339
5553
  let m = this.get();
5340
5554
  // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
5341
5555
  // the bottom of the stack (i.e., next == null).
@@ -5353,7 +5567,7 @@
5353
5567
  return cap;
5354
5568
  }
5355
5569
  match(s) {
5356
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5570
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
5357
5571
  }
5358
5572
 
5359
5573
  /**
@@ -5381,7 +5595,7 @@
5381
5595
  return [false, null];
5382
5596
  }
5383
5597
  const machineInput = input.isUTF16Encoding() ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end);
5384
- const groupMatch = this.doExecute(machineInput, start, anchor, 2 * ngroup);
5598
+ const groupMatch = this.executeEngine(machineInput, start, anchor, 2 * ngroup);
5385
5599
  if (groupMatch === null) {
5386
5600
  return [false, null];
5387
5601
  }
@@ -5393,7 +5607,7 @@
5393
5607
  */
5394
5608
  // This is visible for testing.
5395
5609
  matchUTF8(b) {
5396
- return this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5610
+ return this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
5397
5611
  }
5398
5612
 
5399
5613
  /**
@@ -5430,7 +5644,7 @@
5430
5644
  const input = MachineInput.fromUTF16(src);
5431
5645
  let numReplaces = 0;
5432
5646
  while (searchPos <= src.length) {
5433
- const a = this.doExecute(input, searchPos, RE2Flags.UNANCHORED, 2);
5647
+ const a = this.executeEngine(input, searchPos, RE2Flags.UNANCHORED, 2);
5434
5648
  if (a === null || a.length === 0) {
5435
5649
  break;
5436
5650
  }
@@ -5488,7 +5702,7 @@
5488
5702
  let i = 0;
5489
5703
  let prevMatchEnd = -1;
5490
5704
  while (i < n && pos <= end) {
5491
- const matches = this.doExecute(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5705
+ const matches = this.executeEngine(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
5492
5706
  if (matches === null || matches.length === 0) {
5493
5707
  break;
5494
5708
  }
@@ -5559,7 +5773,7 @@
5559
5773
  */
5560
5774
  // This is visible for testing.
5561
5775
  findUTF8(b) {
5562
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5776
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5563
5777
  if (a === null) {
5564
5778
  return null;
5565
5779
  }
@@ -5574,7 +5788,7 @@
5574
5788
  */
5575
5789
  // This is visible for testing.
5576
5790
  findUTF8Index(b) {
5577
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5791
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
5578
5792
  if (a === null) {
5579
5793
  return null;
5580
5794
  }
@@ -5591,7 +5805,7 @@
5591
5805
  */
5592
5806
  // This is visible for testing.
5593
5807
  find(s) {
5594
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5808
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5595
5809
  if (a === null) {
5596
5810
  return '';
5597
5811
  }
@@ -5607,7 +5821,7 @@
5607
5821
  */
5608
5822
  // This is visible for testing.
5609
5823
  findIndex(s) {
5610
- return this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5824
+ return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
5611
5825
  }
5612
5826
 
5613
5827
  /**
@@ -5619,7 +5833,7 @@
5619
5833
  */
5620
5834
  // This is visible for testing.
5621
5835
  findUTF8Submatch(b) {
5622
- const a = this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5836
+ const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5623
5837
  if (a === null) {
5624
5838
  return null;
5625
5839
  }
@@ -5641,7 +5855,7 @@
5641
5855
  */
5642
5856
  // This is visible for testing.
5643
5857
  findUTF8SubmatchIndex(b) {
5644
- return this.pad(this.doExecute(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5858
+ return this.pad(this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5645
5859
  }
5646
5860
 
5647
5861
  /**
@@ -5653,7 +5867,7 @@
5653
5867
  */
5654
5868
  // This is visible for testing.
5655
5869
  findSubmatch(s) {
5656
- const a = this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5870
+ const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
5657
5871
  if (a === null) {
5658
5872
  return null;
5659
5873
  }
@@ -5675,7 +5889,7 @@
5675
5889
  */
5676
5890
  // This is visible for testing.
5677
5891
  findSubmatchIndex(s) {
5678
- return this.pad(this.doExecute(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5892
+ return this.pad(this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
5679
5893
  }
5680
5894
 
5681
5895
  /**
@@ -6127,6 +6341,39 @@
6127
6341
  return new Matcher(this, input);
6128
6342
  }
6129
6343
 
6344
+ /**
6345
+ * Tests whether the regular expression matches any part of the input string.
6346
+ * Performance Note: This method is highly optimized. Because it only returns
6347
+ * a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
6348
+ * and guarantees execution on the high-speed DFA engine whenever possible.
6349
+ *
6350
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6351
+ * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
6352
+ */
6353
+ test(input) {
6354
+ if (Array.isArray(input)) {
6355
+ // Reuse the existing UTF-8 fast-path method
6356
+ return this.re2Input.matchUTF8(input);
6357
+ }
6358
+
6359
+ // Reuse the existing UTF-16 fast-path method
6360
+ return this.re2Input.match(input);
6361
+ }
6362
+
6363
+ /**
6364
+ * Tests whether the regular expression matches the ENTIRE input string.
6365
+ * * **Performance Note:** This operates identically to `.matches()`, but is significantly
6366
+ * faster because it does not request capture group data. By requesting 0 capture groups,
6367
+ * it securely routes execution through the DFA fast-path.
6368
+ *
6369
+ * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
6370
+ * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
6371
+ */
6372
+ testExact(input) {
6373
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
6374
+ return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
6375
+ }
6376
+
6130
6377
  /**
6131
6378
  * Splits input around instances of the regular expression. It returns an array giving the strings
6132
6379
  * that occur before, between, and after instances of the regular expression.