re2js 2.0.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.0.2
5
+ * @version v2.1.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -54,6 +54,23 @@ class RE2Flags {
54
54
  /**
55
55
  * Various constants and helper for unicode codepoints.
56
56
  */
57
+ const ASCII_SIZE = 128;
58
+ const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
59
+ const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
60
+ for (let i = 0; i < ASCII_SIZE; i++) {
61
+ if (i >= 97 && i <= 122) {
62
+ // a-z
63
+ ASCII_TO_UPPER[i] = i - 32;
64
+ } else {
65
+ ASCII_TO_UPPER[i] = i;
66
+ }
67
+ if (i >= 65 && i <= 90) {
68
+ // A-Z
69
+ ASCII_TO_LOWER[i] = i + 32;
70
+ } else {
71
+ ASCII_TO_LOWER[i] = i;
72
+ }
73
+ }
57
74
  class Codepoint {
58
75
  // codePointAt(0)
59
76
  static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
@@ -61,6 +78,7 @@ class Codepoint {
61
78
  // convert unicode codepoint to upper case codepoint
62
79
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
63
80
  static toUpperCase(codepoint) {
81
+ if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
64
82
  const s = String.fromCodePoint(codepoint).toUpperCase();
65
83
  if (s.length > 1) {
66
84
  return codepoint;
@@ -75,6 +93,7 @@ class Codepoint {
75
93
  // convert unicode codepoint to lower case codepoint
76
94
  // return same codepoint, if cannot do it (or codepoint not have lower variation)
77
95
  static toLowerCase(codepoint) {
96
+ if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
78
97
  const s = String.fromCodePoint(codepoint).toLowerCase();
79
98
  if (s.length > 1) {
80
99
  return codepoint;
@@ -197,9 +216,11 @@ class UnicodeTables {
197
216
  C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
198
217
  Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
199
218
  Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
219
+ Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
200
220
  Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
201
221
  Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
202
222
  L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
223
+ LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
203
224
  Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
204
225
  Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
205
226
  Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
@@ -408,8 +429,11 @@ class UnicodeTables {
408
429
  Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
409
430
  });
410
431
  static FOLD_CATEGORIES = new LazyMap({
411
- L: () => new UnicodeRangeTable(decodeRanges('laA', true)),
412
- Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEEGGCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
432
+ C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
433
+ Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
434
+ L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
435
+ LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
436
+ Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
413
437
  Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
414
438
  Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
415
439
  M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
@@ -418,7 +442,9 @@ class UnicodeTables {
418
442
  static FOLD_SCRIPT = new LazyMap({
419
443
  Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
420
444
  Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
421
- Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
445
+ Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
446
+ Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
447
+ Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
422
448
  });
423
449
  }
424
450
 
@@ -672,7 +698,7 @@ class Utils {
672
698
 
673
699
  // Returns the array of runes in the specified Java UTF-16 string.
674
700
  static stringToRunes(str) {
675
- return String(str).split('').map(s => s.codePointAt(0));
701
+ return Array.from(String(str)).map(s => s.codePointAt(0));
676
702
  }
677
703
 
678
704
  // Returns the Java UTF-16 string containing the single rune |r|.
@@ -943,6 +969,14 @@ class MachineInputBase {
943
969
  endPos() {
944
970
  return this.end;
945
971
  }
972
+ hasString() {
973
+ return false;
974
+ }
975
+
976
+ // Helper for the exact-literal fast-path execution router
977
+ prefixLength() {
978
+ return 0;
979
+ }
946
980
  }
947
981
 
948
982
  // An implementation of MachineInput for UTF-8 byte arrays.
@@ -954,6 +988,14 @@ class MachineUTF8Input extends MachineInputBase {
954
988
  this.start = start;
955
989
  this.end = end;
956
990
  }
991
+ hasString(prefilter, pos) {
992
+ const target = prefilter.bytes;
993
+ if (target.length === 0) return true;
994
+
995
+ // Reuse the high-speed indexOf method already implemented below
996
+ const idx = this.indexOf(this.bytes, target, this.start + pos);
997
+ return idx !== -1 && idx <= this.end - target.length;
998
+ }
957
999
 
958
1000
  // Returns the rune at the specified index; the units are
959
1001
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1030,10 +1072,10 @@ class MachineUTF8Input extends MachineInputBase {
1030
1072
  indexOf(source, target, fromIndex = 0) {
1031
1073
  let targetLength = target.length;
1032
1074
  if (targetLength === 0) {
1033
- return -1;
1075
+ return fromIndex <= this.end ? fromIndex : -1;
1034
1076
  }
1035
- let sourceLength = source.length;
1036
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1077
+ let limit = this.end - targetLength;
1078
+ for (let i = fromIndex; i <= limit; i++) {
1037
1079
  for (let j = 0; j < targetLength; j++) {
1038
1080
  if (source[i + j] !== target[j]) {
1039
1081
  break;
@@ -1044,6 +1086,9 @@ class MachineUTF8Input extends MachineInputBase {
1044
1086
  }
1045
1087
  return -1;
1046
1088
  }
1089
+ prefixLength(re2) {
1090
+ return re2.prefixUTF8.length;
1091
+ }
1047
1092
  }
1048
1093
 
1049
1094
  // |pos| and |width| are in JS "char" units.
@@ -1054,6 +1099,10 @@ class MachineUTF16Input extends MachineInputBase {
1054
1099
  this.start = start;
1055
1100
  this.end = end;
1056
1101
  }
1102
+ hasString(prefilter, pos) {
1103
+ const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
1104
+ return idx !== -1 && idx <= this.end - prefilter.str.length;
1105
+ }
1057
1106
 
1058
1107
  // Returns the rune at the specified index; the units are
1059
1108
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1099,6 +1148,9 @@ class MachineUTF16Input extends MachineInputBase {
1099
1148
  const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1100
1149
  return Utils.emptyOpContext(r1, r2);
1101
1150
  }
1151
+ prefixLength(re2) {
1152
+ return re2.prefix.length;
1153
+ }
1102
1154
  }
1103
1155
  class MachineInput {
1104
1156
  static fromUTF8(bytes, start = 0, end = bytes.length) {
@@ -1189,6 +1241,17 @@ class RE2JSFlagsException extends RE2JSException {
1189
1241
  }
1190
1242
  }
1191
1243
 
1244
+ /**
1245
+ * An exception thrown for internal engine errors, such as corrupted bytecodes.
1246
+ */
1247
+ class RE2JSInternalException extends RE2JSException {
1248
+ /** @param {string} message */
1249
+ constructor(message) {
1250
+ super(message);
1251
+ this.name = 'RE2JSInternalException';
1252
+ }
1253
+ }
1254
+
1192
1255
  /**
1193
1256
  * A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
1194
1257
  *
@@ -1391,6 +1454,23 @@ class Matcher {
1391
1454
  }
1392
1455
  return this.substring(start, end);
1393
1456
  }
1457
+
1458
+ /**
1459
+ * Returns a dictionary map of all named capturing groups and their matched values.
1460
+ * If a group was not matched, its value will be `null`.
1461
+ * @returns {Record<string, string|null>}
1462
+ */
1463
+ getNamedGroups() {
1464
+ if (!this.hasMatch) {
1465
+ throw new RE2JSGroupException('perhaps no match attempted');
1466
+ }
1467
+ const result = Object.create(null);
1468
+ for (const name of Object.keys(this.namedGroups)) {
1469
+ result[name] = this.group(name);
1470
+ }
1471
+ return result;
1472
+ }
1473
+
1394
1474
  /**
1395
1475
  * Returns the number of subgroups in this pattern.
1396
1476
  *
@@ -1815,16 +1895,20 @@ class Inst {
1815
1895
  }
1816
1896
  return r === r0;
1817
1897
  }
1818
- // Peek at the first few pairs.
1819
- // Should handle ASCII well.
1820
- for (let j = 0; j < this.runes.length && j <= 8; j += 2) {
1821
- if (r < this.runes[j]) {
1822
- return false;
1823
- }
1824
- if (r <= this.runes[j + 1]) {
1825
- return true;
1898
+ const len = this.runes.length;
1899
+ // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1900
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1901
+ for (let j = 0; j < len; j += 2) {
1902
+ if (r < this.runes[j]) {
1903
+ return false;
1904
+ }
1905
+ if (r <= this.runes[j + 1]) {
1906
+ return true;
1907
+ }
1826
1908
  }
1909
+ return false; // Stop here
1827
1910
  }
1911
+
1828
1912
  // Otherwise binary search.
1829
1913
  let lo = 0;
1830
1914
  let hi = this.runes.length / 2 | 0;
@@ -1842,6 +1926,40 @@ class Inst {
1842
1926
  }
1843
1927
  return false;
1844
1928
  }
1929
+
1930
+ // matchRunePos checks whether the instruction matches (and consumes) r.
1931
+ // If so, it returns the index of the matching rune pair.
1932
+ // If not, it returns -1.
1933
+ matchRunePos(r) {
1934
+ if (this.runes.length === 1) {
1935
+ const r0 = this.runes[0];
1936
+ if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
1937
+ return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
1938
+ }
1939
+ return r === r0 ? 0 : -1;
1940
+ }
1941
+ const len = this.runes.length;
1942
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1943
+ for (let j = 0; j < len; j += 2) {
1944
+ if (r < this.runes[j]) return -1;
1945
+ if (r <= this.runes[j + 1]) return Math.floor(j / 2);
1946
+ }
1947
+ return -1;
1948
+ }
1949
+ let lo = 0;
1950
+ let hi = Math.floor(len / 2);
1951
+ while (lo < hi) {
1952
+ const m = lo + hi >> 1;
1953
+ const c = this.runes[2 * m];
1954
+ if (c <= r) {
1955
+ if (r <= this.runes[2 * m + 1]) return m;
1956
+ lo = m + 1;
1957
+ } else {
1958
+ hi = m;
1959
+ }
1960
+ }
1961
+ return -1;
1962
+ }
1845
1963
  /**
1846
1964
  *
1847
1965
  * @returns {string}
@@ -1857,7 +1975,7 @@ class Inst {
1857
1975
  case Inst.EMPTY_WIDTH:
1858
1976
  return `empty ${this.arg} -> ${this.out}`;
1859
1977
  case Inst.MATCH:
1860
- return 'match';
1978
+ return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
1861
1979
  case Inst.FAIL:
1862
1980
  return 'fail';
1863
1981
  case Inst.NOP:
@@ -1883,7 +2001,7 @@ class Inst {
1883
2001
  class Thread {
1884
2002
  constructor() {
1885
2003
  this.inst = null;
1886
- this.cap = [];
2004
+ this.cap = null; // Initialized to Int32Array later
1887
2005
  }
1888
2006
  }
1889
2007
 
@@ -1911,9 +2029,11 @@ class Queue {
1911
2029
  return j;
1912
2030
  }
1913
2031
  clear() {
1914
- this.sparse = [];
1915
- this.densePcs = [];
1916
- this.denseThreads = [];
2032
+ // Prevent memory leaks by nulling out used object references
2033
+ for (let i = 0; i < this.size; i++) {
2034
+ this.denseThreads[i] = null;
2035
+ }
2036
+ // The sparse set logic safely ignores stale integers in Typed Arrays.
1917
2037
  this.size = 0;
1918
2038
  }
1919
2039
  toString() {
@@ -1942,7 +2062,8 @@ class Machine {
1942
2062
  m.pool = [];
1943
2063
  m.poolSize = 0;
1944
2064
  m.matched = false;
1945
- m.matchcap = Array(m.prog.numCap < 2 ? 2 : m.prog.numCap).fill(0);
2065
+ // Use Int32Array instead of standard JS array
2066
+ m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
1946
2067
  m.ncap = 0;
1947
2068
  return m;
1948
2069
  }
@@ -1956,27 +2077,30 @@ class Machine {
1956
2077
  if (ncap > this.matchcap.length) {
1957
2078
  this.initNewCap(ncap);
1958
2079
  } else {
1959
- this.resetCap(ncap);
2080
+ this.resetCap();
1960
2081
  }
1961
2082
  }
1962
- resetCap(ncap) {
2083
+
2084
+ // Wipes existing typed array memory without reallocating
2085
+ resetCap() {
1963
2086
  for (let i = 0; i < this.poolSize; i++) {
1964
2087
  const t = this.pool[i];
1965
- t.cap = Array(ncap).fill(0);
2088
+ t.cap.fill(0);
1966
2089
  }
1967
2090
  }
1968
2091
  initNewCap(ncap) {
1969
2092
  for (let i = 0; i < this.poolSize; i++) {
1970
2093
  const t = this.pool[i];
1971
- t.cap = Array(ncap).fill(0);
2094
+ t.cap = new Int32Array(ncap);
1972
2095
  }
1973
- this.matchcap = Array(ncap).fill(0);
2096
+ this.matchcap = new Int32Array(ncap);
1974
2097
  }
1975
2098
  submatches() {
1976
2099
  if (this.ncap === 0) {
1977
2100
  return Utils.emptyInts();
1978
2101
  }
1979
- return this.matchcap.slice(0, this.ncap);
2102
+ // Use subarray() to create a zero-allocation view before converting
2103
+ return Array.from(this.matchcap.subarray(0, this.ncap));
1980
2104
  }
1981
2105
 
1982
2106
  // alloc() allocates a new thread with the given instruction.
@@ -1988,6 +2112,7 @@ class Machine {
1988
2112
  t = this.pool[this.poolSize];
1989
2113
  } else {
1990
2114
  t = new Thread();
2115
+ t.cap = new Int32Array(this.matchcap.length);
1991
2116
  }
1992
2117
  t.inst = inst;
1993
2118
  return t;
@@ -2017,7 +2142,7 @@ class Machine {
2017
2142
  return false;
2018
2143
  }
2019
2144
  this.matched = false;
2020
- this.matchcap = Array(this.prog.numCap).fill(-1);
2145
+ this.matchcap.fill(-1);
2021
2146
  let runq = this.q0;
2022
2147
  let nextq = this.q1;
2023
2148
  let r = input.step(pos);
@@ -2088,6 +2213,85 @@ class Machine {
2088
2213
  this.freeQueue(nextq);
2089
2214
  return this.matched;
2090
2215
  }
2216
+ matchSet(input, pos, anchor) {
2217
+ const startCond = this.re2.cond;
2218
+ if (startCond === Utils.EMPTY_ALL) return [];
2219
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2220
+ return [];
2221
+ }
2222
+ let runq = this.q0;
2223
+ let nextq = this.q1;
2224
+ let r = input.step(pos);
2225
+ let rune = r >> 3;
2226
+ let width = r & 7;
2227
+ let rune1 = -1;
2228
+ let width1 = 0;
2229
+ if (r !== MachineInputBase.EOF()) {
2230
+ r = input.step(pos + width);
2231
+ rune1 = r >> 3;
2232
+ width1 = r & 7;
2233
+ }
2234
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
2235
+ const matches = new Set();
2236
+ while (true) {
2237
+ if (runq.isEmpty()) {
2238
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2239
+ }
2240
+ if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2241
+ this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
2242
+ }
2243
+ const nextPos = pos + width;
2244
+ flag = input.context(nextPos);
2245
+ for (let j = 0; j < runq.size; j++) {
2246
+ let t = runq.denseThreads[j];
2247
+ if (t === null) continue;
2248
+ const i = t.inst;
2249
+ let add = false;
2250
+ switch (i.op) {
2251
+ case Inst.MATCH:
2252
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
2253
+ matches.add(i.arg); // Record the matched Set ID
2254
+ break;
2255
+ case Inst.RUNE:
2256
+ add = i.matchRune(rune);
2257
+ break;
2258
+ case Inst.RUNE1:
2259
+ add = rune === i.runes[0];
2260
+ break;
2261
+ case Inst.RUNE_ANY:
2262
+ add = true;
2263
+ break;
2264
+ case Inst.RUNE_ANY_NOT_NL:
2265
+ add = rune !== Codepoint.CODES.get('\n');
2266
+ break;
2267
+ default:
2268
+ throw new RE2JSInternalException('bad inst');
2269
+ }
2270
+ if (add) {
2271
+ t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
2272
+ }
2273
+ if (t !== null) {
2274
+ this.freeThread(t);
2275
+ runq.denseThreads[j] = null;
2276
+ }
2277
+ }
2278
+ runq.clear();
2279
+ if (width === 0) break;
2280
+ pos += width;
2281
+ rune = rune1;
2282
+ width = width1;
2283
+ if (rune !== -1) {
2284
+ r = input.step(pos + width);
2285
+ rune1 = r >> 3;
2286
+ width1 = r & 7;
2287
+ }
2288
+ const tmpq = runq;
2289
+ runq = nextq;
2290
+ nextq = tmpq;
2291
+ }
2292
+ this.freeQueue(nextq);
2293
+ return Array.from(matches).sort((a, b) => a - b);
2294
+ }
2091
2295
  step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
2092
2296
  const longest = this.re2.longest;
2093
2297
  for (let j = 0; j < runq.size; j++) {
@@ -2108,7 +2312,9 @@ class Machine {
2108
2312
  }
2109
2313
  if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
2110
2314
  t.cap[1] = pos;
2111
- this.matchcap = t.cap.slice(0, this.ncap);
2315
+ // Using subarray creates a fast view, avoiding a full array copy
2316
+ // until the submatches are finalized at the very end.
2317
+ this.matchcap.set(t.cap.subarray(0, this.ncap));
2112
2318
  }
2113
2319
  if (!longest) {
2114
2320
  this.freeQueue(runq, j + 1);
@@ -2128,7 +2334,7 @@ class Machine {
2128
2334
  add = c !== Codepoint.CODES.get('\n');
2129
2335
  break;
2130
2336
  default:
2131
- throw new Error('bad inst');
2337
+ throw new RE2JSInternalException('bad inst');
2132
2338
  }
2133
2339
  if (add) {
2134
2340
  t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
@@ -2186,6 +2392,7 @@ class Machine {
2186
2392
  t.inst = inst;
2187
2393
  }
2188
2394
  if (this.ncap > 0 && t.cap !== cap) {
2395
+ // Direct assignment utilizing Typed Array performance
2189
2396
  for (let c = 0; c < this.ncap; c++) {
2190
2397
  t.cap[c] = cap[c];
2191
2398
  }
@@ -2220,20 +2427,23 @@ const arraysEqual = (a, b) => {
2220
2427
  return true;
2221
2428
  };
2222
2429
  class DFAState {
2223
- constructor(nfaStates, isMatch) {
2430
+ constructor(nfaStates, isMatch, matchIDs = []) {
2224
2431
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2225
2432
  this.isMatch = isMatch; // Boolean
2433
+ this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2226
2434
  this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2227
2435
  this.nextMap = new Map(); // Cache of Char -> DFAState
2228
2436
  }
2229
2437
  }
2230
2438
  class DFA {
2439
+ static MAX_CACHE_CLEARS = 5;
2231
2440
  constructor(prog) {
2232
2441
  this.prog = prog;
2233
2442
  this.stateCache = new Map(); // hash(number) -> DFAState[]
2234
2443
  this.stateCount = 0; // Tracks total states for memory limits
2235
2444
  this.startState = null;
2236
2445
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2446
+ this.cacheClears = 0; // Track thrashing
2237
2447
  this.failed = false; // mark if DFA cannot work with provided prog
2238
2448
  }
2239
2449
 
@@ -2242,6 +2452,7 @@ class DFA {
2242
2452
  const closure = new Set();
2243
2453
  const stack = [...pcs];
2244
2454
  let isMatch = false;
2455
+ const matchIDs = [];
2245
2456
  while (stack.length > 0) {
2246
2457
  const pc = stack.pop();
2247
2458
  if (closure.has(pc)) continue;
@@ -2250,6 +2461,7 @@ class DFA {
2250
2461
  switch (inst.op) {
2251
2462
  case Inst.MATCH:
2252
2463
  isMatch = true;
2464
+ if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
2253
2465
  break;
2254
2466
  case Inst.ALT:
2255
2467
  case Inst.ALT_MATCH:
@@ -2267,9 +2479,11 @@ class DFA {
2267
2479
  }
2268
2480
  }
2269
2481
  const sortedPCs = Int32Array.from(closure).sort();
2482
+ matchIDs.sort((a, b) => a - b);
2270
2483
  return {
2271
2484
  pcs: sortedPCs,
2272
- isMatch
2485
+ isMatch,
2486
+ matchIDs
2273
2487
  };
2274
2488
  }
2275
2489
 
@@ -2295,6 +2509,8 @@ class DFA {
2295
2509
  bucket = [];
2296
2510
  this.stateCache.set(hash, bucket);
2297
2511
  }
2512
+
2513
+ // DFA already failed once - exit
2298
2514
  if (this.failed) return null;
2299
2515
 
2300
2516
  // Safety: prevent memory exhaustion from state explosion
@@ -2303,12 +2519,18 @@ class DFA {
2303
2519
  this.stateCache.clear();
2304
2520
  this.stateCount = 0;
2305
2521
  this.startState = null;
2306
- this.failed = true;
2522
+ this.cacheClears++;
2523
+
2524
+ // If this regex causes continuous cache thrashing, permanently fall back to NFA
2525
+ // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2526
+ if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2527
+ this.failed = true;
2528
+ }
2307
2529
  return null;
2308
2530
  }
2309
2531
 
2310
2532
  // State not found, create it and add to bucket
2311
- const state = new DFAState(sortedPCs, closureResult.isMatch);
2533
+ const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2312
2534
  bucket.push(state);
2313
2535
  this.stateCount++;
2314
2536
  return state;
@@ -2328,76 +2550,808 @@ class DFA {
2328
2550
  return state.nextMap.get(key);
2329
2551
  }
2330
2552
  }
2331
- const nextPCs = [];
2332
- for (let i = 0; i < state.nfaStates.length; i++) {
2333
- const pc = state.nfaStates[i];
2334
- const inst = this.prog.getInst(pc);
2335
- if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2336
- nextPCs.push(inst.out);
2553
+ const nextPCs = [];
2554
+ for (let i = 0; i < state.nfaStates.length; i++) {
2555
+ const pc = state.nfaStates[i];
2556
+ const inst = this.prog.getInst(pc);
2557
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2558
+ nextPCs.push(inst.out);
2559
+ }
2560
+ }
2561
+ if (anchor === RE2Flags.UNANCHORED) {
2562
+ nextPCs.push(this.prog.start);
2563
+ }
2564
+ const nextState = this.getState(nextPCs);
2565
+
2566
+ // Cache the result
2567
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2568
+ state.nextAscii[charCode] = nextState;
2569
+ } else {
2570
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2571
+ state.nextMap.set(key, nextState);
2572
+ }
2573
+ return nextState;
2574
+ }
2575
+
2576
+ // The hot loop: Execute the Lazy DFA
2577
+ match(input, pos, anchor) {
2578
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2579
+ return false;
2580
+ }
2581
+ if (!this.startState) {
2582
+ this.startState = this.getState([this.prog.start]);
2583
+ if (!this.startState) return null; // Fallback to NFA
2584
+ }
2585
+ let endPos = input.endPos();
2586
+ let currentState = this.startState;
2587
+ if (currentState.isMatch) {
2588
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2589
+ if (pos === endPos) return true;
2590
+ } else {
2591
+ return true;
2592
+ }
2593
+ }
2594
+ let i = pos;
2595
+ while (i < endPos) {
2596
+ const r = input.step(i);
2597
+ const rune = r >> 3;
2598
+ const width = r & 7;
2599
+
2600
+ // prevent infinite loop on EOF
2601
+ if (width === 0) {
2602
+ break;
2603
+ }
2604
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2605
+
2606
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2607
+ if (currentState === null) return null;
2608
+ if (currentState.isMatch) {
2609
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2610
+ if (i + width === endPos) return true;
2611
+ } else {
2612
+ return true;
2613
+ }
2614
+ }
2615
+
2616
+ // If we hit a dead end, and anchored, fail early
2617
+ if (currentState.nfaStates.length === 0) {
2618
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2619
+ }
2620
+ i += width;
2621
+ }
2622
+ return false;
2623
+ }
2624
+
2625
+ // The hot loop for evaluating Multi-Pattern Sets
2626
+ matchSet(input, pos, anchor) {
2627
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2628
+ return [];
2629
+ }
2630
+ if (!this.startState) {
2631
+ this.startState = this.getState([this.prog.start]);
2632
+ if (!this.startState) return null; // Fallback to NFA
2633
+ }
2634
+ let endPos = input.endPos();
2635
+ let currentState = this.startState;
2636
+ const matches = new Set();
2637
+ const checkMatch = (state, currentPos) => {
2638
+ if (state.isMatch) {
2639
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2640
+ if (currentPos === endPos) {
2641
+ state.matchIDs.forEach(id => matches.add(id));
2642
+ }
2643
+ } else {
2644
+ state.matchIDs.forEach(id => matches.add(id));
2645
+ }
2646
+ }
2647
+ };
2648
+ checkMatch(currentState, pos);
2649
+ let i = pos;
2650
+ while (i < endPos) {
2651
+ const r = input.step(i);
2652
+ const rune = r >> 3;
2653
+ const width = r & 7;
2654
+ if (width === 0) break;
2655
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2656
+ if (currentState === null) return null; // Bailout to NFA
2657
+
2658
+ i += width;
2659
+ checkMatch(currentState, i);
2660
+ if (currentState.nfaStates.length === 0) {
2661
+ if (anchor !== RE2Flags.UNANCHORED) break;
2662
+ }
2663
+ }
2664
+ return Array.from(matches).sort((a, b) => a - b);
2665
+ }
2666
+ }
2667
+
2668
+ const VISITED_BITS = 32;
2669
+ const MAX_BACKTRACK_PROG = 500;
2670
+ const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
2671
+ const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
2672
+
2673
+ class BitState {
2674
+ constructor() {
2675
+ this.end = 0;
2676
+ this.cap = new Int32Array(0);
2677
+ this.matchcap = new Int32Array(0);
2678
+ this.ncap = 0;
2679
+
2680
+ // Parallel arrays acting as the backtrack job stack
2681
+ this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
2682
+ this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
2683
+ this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
2684
+ this.jobLen = 0;
2685
+ this.visited = new Uint32Array(0);
2686
+ }
2687
+ reset(prog, end, ncap) {
2688
+ this.end = end;
2689
+ this.jobLen = 0;
2690
+ this.ncap = ncap;
2691
+
2692
+ // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2693
+ const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2694
+ if (this.visited.length < visitedSize) {
2695
+ this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2696
+ } else {
2697
+ this.visited.fill(0, 0, visitedSize);
2698
+ }
2699
+ if (this.cap.length < ncap) {
2700
+ // Must explicitly fill with -1 as Int32Array defaults to 0
2701
+ this.cap = new Int32Array(ncap).fill(-1);
2702
+ } else {
2703
+ this.cap.fill(-1, 0, ncap);
2704
+ }
2705
+ if (this.matchcap.length < ncap) {
2706
+ this.matchcap = new Int32Array(ncap).fill(-1);
2707
+ } else {
2708
+ this.matchcap.fill(-1, 0, ncap);
2709
+ }
2710
+ }
2711
+ shouldVisit(pc, pos) {
2712
+ const n = pc * (this.end + 1) + pos;
2713
+ const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
2714
+ const mask = 1 << (n & 31); // Equivalent to n % 32
2715
+
2716
+ if ((this.visited[idx] & mask) !== 0) {
2717
+ return false;
2718
+ }
2719
+ this.visited[idx] |= mask;
2720
+ return true;
2721
+ }
2722
+ push(re2, pc, pos, arg) {
2723
+ if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
2724
+ if (this.jobLen >= this.jobPc.length) {
2725
+ const newSize = this.jobPc.length * 2;
2726
+ const newPc = new Int32Array(newSize);
2727
+ newPc.set(this.jobPc);
2728
+ this.jobPc = newPc;
2729
+ const newArg = new Uint8Array(newSize);
2730
+ newArg.set(this.jobArg);
2731
+ this.jobArg = newArg;
2732
+ const newPos = new Int32Array(newSize);
2733
+ newPos.set(this.jobPos);
2734
+ this.jobPos = newPos;
2735
+ }
2736
+ this.jobPc[this.jobLen] = pc;
2737
+ this.jobArg[this.jobLen] = arg ? 1 : 0;
2738
+ this.jobPos[this.jobLen] = pos;
2739
+ this.jobLen++;
2740
+ }
2741
+ }
2742
+ tryBacktrack(re2, input, pc, pos, anchor) {
2743
+ const longest = re2.longest;
2744
+ this.push(re2, pc, pos, false);
2745
+ while (this.jobLen > 0) {
2746
+ this.jobLen--;
2747
+ let currentPc = this.jobPc[this.jobLen];
2748
+ let arg = this.jobArg[this.jobLen] === 1;
2749
+ let currentPos = this.jobPos[this.jobLen];
2750
+ let skipShouldVisit = true;
2751
+ while (true) {
2752
+ if (!skipShouldVisit) {
2753
+ if (!this.shouldVisit(currentPc, currentPos)) {
2754
+ break;
2755
+ }
2756
+ }
2757
+ skipShouldVisit = false;
2758
+ const inst = re2.prog.getInst(currentPc);
2759
+ switch (inst.op) {
2760
+ case Inst.FAIL:
2761
+ {
2762
+ throw new RE2JSInternalException('unexpected InstFail');
2763
+ }
2764
+ case Inst.ALT:
2765
+ {
2766
+ if (arg) {
2767
+ arg = false;
2768
+ currentPc = inst.arg;
2769
+ continue;
2770
+ } else {
2771
+ this.push(re2, currentPc, currentPos, true);
2772
+ currentPc = inst.out;
2773
+ continue;
2774
+ }
2775
+ }
2776
+ case Inst.ALT_MATCH:
2777
+ {
2778
+ const outInst = re2.prog.getInst(inst.out);
2779
+ if (Inst.isRuneOp(outInst.op)) {
2780
+ this.push(re2, inst.arg, currentPos, false);
2781
+ currentPc = inst.out;
2782
+ continue;
2783
+ }
2784
+ this.push(re2, inst.out, this.end, false);
2785
+ currentPc = inst.arg;
2786
+ continue;
2787
+ }
2788
+ case Inst.RUNE:
2789
+ {
2790
+ const r = input.step(currentPos);
2791
+ if (r === MachineInputBase.EOF()) break;
2792
+ if (!inst.matchRune(r >> 3)) break;
2793
+ currentPos += r & 7;
2794
+ currentPc = inst.out;
2795
+ continue;
2796
+ }
2797
+ case Inst.RUNE1:
2798
+ {
2799
+ const r = input.step(currentPos);
2800
+ if (r === MachineInputBase.EOF()) break;
2801
+ if (r >> 3 !== inst.runes[0]) break;
2802
+ currentPos += r & 7;
2803
+ currentPc = inst.out;
2804
+ continue;
2805
+ }
2806
+ case Inst.RUNE_ANY_NOT_NL:
2807
+ {
2808
+ const r = input.step(currentPos);
2809
+ if (r === MachineInputBase.EOF()) break;
2810
+ if (r >> 3 === 10) break;
2811
+ currentPos += r & 7;
2812
+ currentPc = inst.out;
2813
+ continue;
2814
+ }
2815
+ case Inst.RUNE_ANY:
2816
+ {
2817
+ const r = input.step(currentPos);
2818
+ if (r === MachineInputBase.EOF()) break;
2819
+ currentPos += r & 7;
2820
+ currentPc = inst.out;
2821
+ continue;
2822
+ }
2823
+ case Inst.CAPTURE:
2824
+ {
2825
+ if (arg) {
2826
+ this.cap[inst.arg] = currentPos;
2827
+ break;
2828
+ } else {
2829
+ if (inst.arg < this.ncap) {
2830
+ this.push(re2, currentPc, this.cap[inst.arg], true);
2831
+ this.cap[inst.arg] = currentPos;
2832
+ }
2833
+ currentPc = inst.out;
2834
+ continue;
2835
+ }
2836
+ }
2837
+ case Inst.EMPTY_WIDTH:
2838
+ {
2839
+ const flag = input.context(currentPos);
2840
+ if ((inst.arg & ~flag) !== 0) break;
2841
+ currentPc = inst.out;
2842
+ continue;
2843
+ }
2844
+ case Inst.NOP:
2845
+ {
2846
+ currentPc = inst.out;
2847
+ continue;
2848
+ }
2849
+ case Inst.MATCH:
2850
+ {
2851
+ if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
2852
+ break;
2853
+ }
2854
+ if (this.ncap === 0) return true;
2855
+ if (this.ncap > 1) {
2856
+ this.cap[1] = currentPos;
2857
+ }
2858
+ const old = this.matchcap[1];
2859
+ if (old === -1 || longest && currentPos > 0 && currentPos > old) {
2860
+ this.matchcap.set(this.cap);
2861
+ }
2862
+ if (!longest) return true;
2863
+ if (currentPos === this.end) return true;
2864
+ break;
2865
+ }
2866
+ default:
2867
+ {
2868
+ throw new RE2JSInternalException('bad inst');
2869
+ }
2870
+ }
2871
+ break;
2872
+ }
2873
+ }
2874
+ return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
2875
+ }
2876
+ }
2877
+ const bitStatePool = [];
2878
+ class Backtracker {
2879
+ static shouldBacktrack(prog) {
2880
+ return prog.numInst() <= MAX_BACKTRACK_PROG;
2881
+ }
2882
+ static maxBitStateLen(prog) {
2883
+ if (!Backtracker.shouldBacktrack(prog)) {
2884
+ return 0;
2885
+ }
2886
+ return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
2887
+ }
2888
+ static execute(re2, input, pos, anchor, ncap) {
2889
+ const startCond = re2.cond;
2890
+ if (startCond === Utils.EMPTY_ALL) {
2891
+ return null;
2892
+ }
2893
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2894
+ return null;
2895
+ }
2896
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2897
+ return null;
2898
+ }
2899
+ const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
2900
+ const end = input.endPos();
2901
+ b.reset(re2.prog, end, ncap);
2902
+ let matched = false;
2903
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
2904
+ if (b.ncap > 0) {
2905
+ b.cap[0] = pos;
2906
+ }
2907
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2908
+ matched = true;
2909
+ }
2910
+ } else {
2911
+ let width = -1;
2912
+ for (; pos <= end && width !== 0; pos += width) {
2913
+ if (re2.prefix.length > 0) {
2914
+ const advance = input.index(re2, pos);
2915
+ if (advance < 0) {
2916
+ break;
2917
+ }
2918
+ pos += advance;
2919
+ }
2920
+ if (b.ncap > 0) {
2921
+ b.cap[0] = pos;
2922
+ }
2923
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2924
+ matched = true;
2925
+ break;
2926
+ }
2927
+ const r = input.step(pos);
2928
+ width = r === MachineInputBase.EOF() ? 0 : r & 7;
2929
+ }
2930
+ }
2931
+ if (!matched) {
2932
+ bitStatePool.push(b);
2933
+ return null;
2934
+ }
2935
+
2936
+ // Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
2937
+ const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
2938
+ bitStatePool.push(b);
2939
+ return result;
2940
+ }
2941
+ }
2942
+
2943
+ class QueueOnePass {
2944
+ constructor(size) {
2945
+ this.sparse = new Uint32Array(size);
2946
+ this.dense = new Uint32Array(size);
2947
+ this.size = 0;
2948
+ this.nextIndex = 0;
2949
+ }
2950
+ empty() {
2951
+ return this.nextIndex >= this.size;
2952
+ }
2953
+ next() {
2954
+ return this.dense[this.nextIndex++];
2955
+ }
2956
+ clear() {
2957
+ this.size = 0;
2958
+ this.nextIndex = 0;
2959
+ }
2960
+ contains(u) {
2961
+ return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
2962
+ }
2963
+ insert(u) {
2964
+ if (!this.contains(u)) this.insertNew(u);
2965
+ }
2966
+ insertNew(u) {
2967
+ if (u >= this.sparse.length) return;
2968
+ this.sparse[u] = this.size;
2969
+ this.dense[this.size] = u;
2970
+ this.size++;
2971
+ }
2972
+ }
2973
+ const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
2974
+ const leftLen = leftRunes.length;
2975
+ const rightLen = rightRunes.length;
2976
+ let lx = 0,
2977
+ rx = 0;
2978
+ const merged = [];
2979
+ const next = [];
2980
+ let ok = true;
2981
+ let ix = -1;
2982
+ const extend = isLeft => {
2983
+ const newArray = isLeft ? leftRunes : rightRunes;
2984
+ const low = isLeft ? lx : rx;
2985
+ const pc = isLeft ? leftPC : rightPC;
2986
+ if (ix > 0 && newArray[low] <= merged[ix]) return false;
2987
+ merged.push(newArray[low], newArray[low + 1]);
2988
+ if (isLeft) lx += 2;else rx += 2;
2989
+ ix += 2;
2990
+ next.push(pc);
2991
+ return true;
2992
+ };
2993
+ while (lx < leftLen || rx < rightLen) {
2994
+ if (rx >= rightLen) {
2995
+ ok = extend(true);
2996
+ } else if (lx >= leftLen) {
2997
+ ok = extend(false);
2998
+ } else if (rightRunes[rx] < leftRunes[lx]) {
2999
+ ok = extend(false);
3000
+ } else {
3001
+ ok = extend(true);
3002
+ }
3003
+ if (!ok) return null;
3004
+ }
3005
+ return {
3006
+ merged,
3007
+ next
3008
+ };
3009
+ };
3010
+ class OnePassProg {
3011
+ constructor(prog) {
3012
+ this.start = prog.start;
3013
+ this.numCap = prog.numCap;
3014
+ this.inst = new Array(prog.inst.length);
3015
+ for (let i = 0; i < prog.inst.length; i++) {
3016
+ const orig = prog.inst[i];
3017
+ const inst = new Inst(orig.op);
3018
+ inst.out = orig.out;
3019
+ inst.arg = orig.arg;
3020
+ inst.runes = orig.runes ? orig.runes.slice() : [];
3021
+ inst.next = null;
3022
+ this.inst[i] = inst;
3023
+ }
3024
+ }
3025
+ }
3026
+ const onePassCopy = prog => {
3027
+ const p = new OnePassProg(prog);
3028
+
3029
+ // Rewrites one or more common Prog constructs that enable some otherwise
3030
+ // non-onepass Progs to be onepass.
3031
+ for (let pc = 0; pc < p.inst.length; pc++) {
3032
+ const inst = p.inst[pc];
3033
+ if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
3034
+ let pAOther = 'out';
3035
+ let pAAlt = 'arg';
3036
+ let instAlt = p.inst[inst[pAAlt]];
3037
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
3038
+ pAOther = 'arg';
3039
+ pAAlt = 'out';
3040
+ instAlt = p.inst[inst[pAAlt]];
3041
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
3042
+ }
3043
+ const instOther = p.inst[inst[pAOther]];
3044
+ if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
3045
+ let pBAlt = 'out';
3046
+ let pBOther = 'arg';
3047
+ let patch = false;
3048
+ if (instAlt.out === pc) {
3049
+ patch = true;
3050
+ } else if (instAlt.arg === pc) {
3051
+ patch = true;
3052
+ pBAlt = 'arg';
3053
+ pBOther = 'out';
3054
+ }
3055
+ if (patch) instAlt[pBAlt] = inst[pAOther];
3056
+ if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
3057
+ }
3058
+ return p;
3059
+ };
3060
+ const makeOnePass = p => {
3061
+ if (p.inst.length >= 1000) return null;
3062
+ const instQueue = new QueueOnePass(p.inst.length);
3063
+ const visitQueue = new QueueOnePass(p.inst.length);
3064
+ const onePassRunes = new Array(p.inst.length);
3065
+ const m = new Array(p.inst.length).fill(false);
3066
+ const check = pc => {
3067
+ let ok = true;
3068
+ const inst = p.inst[pc];
3069
+ if (visitQueue.contains(pc)) return true;
3070
+ visitQueue.insert(pc);
3071
+ switch (inst.op) {
3072
+ case Inst.ALT:
3073
+ case Inst.ALT_MATCH:
3074
+ {
3075
+ ok = check(inst.out) && check(inst.arg);
3076
+ let matchOut = m[inst.out];
3077
+ let matchArg = m[inst.arg];
3078
+ if (matchOut && matchArg) return false;
3079
+ if (matchArg) {
3080
+ const tempOut = inst.out;
3081
+ inst.out = inst.arg;
3082
+ inst.arg = tempOut;
3083
+ const tempMatch = matchOut;
3084
+ matchOut = matchArg;
3085
+ matchArg = tempMatch;
3086
+ }
3087
+ if (matchOut) {
3088
+ m[pc] = true;
3089
+ inst.op = Inst.ALT_MATCH;
3090
+ }
3091
+ const leftRunes = onePassRunes[inst.out] || [];
3092
+ const rightRunes = onePassRunes[inst.arg] || [];
3093
+ const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
3094
+ if (!mergeRes) return false;
3095
+ onePassRunes[pc] = mergeRes.merged;
3096
+ inst.next = new Uint32Array(mergeRes.next);
3097
+ break;
3098
+ }
3099
+ case Inst.CAPTURE:
3100
+ case Inst.EMPTY_WIDTH:
3101
+ case Inst.NOP:
3102
+ {
3103
+ ok = check(inst.out);
3104
+ m[pc] = m[inst.out];
3105
+ onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
3106
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3107
+ break;
3108
+ }
3109
+ case Inst.MATCH:
3110
+ case Inst.FAIL:
3111
+ {
3112
+ m[pc] = inst.op === Inst.MATCH;
3113
+ break;
3114
+ }
3115
+ case Inst.RUNE:
3116
+ {
3117
+ m[pc] = false;
3118
+ if (inst.next && inst.next.length > 0) break;
3119
+ instQueue.insert(inst.out);
3120
+ if (!inst.runes || inst.runes.length === 0) {
3121
+ onePassRunes[pc] = [];
3122
+ inst.next = new Uint32Array([inst.out]);
3123
+ break;
3124
+ }
3125
+ let runes = [];
3126
+ if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3127
+ const r0 = inst.runes[0];
3128
+ runes.push(r0, r0);
3129
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3130
+ runes.push(r1, r1);
3131
+ }
3132
+ runes.sort((a, b) => a - b);
3133
+ } else {
3134
+ runes.push(...inst.runes);
3135
+ }
3136
+ onePassRunes[pc] = runes;
3137
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3138
+ inst.op = Inst.RUNE;
3139
+ break;
3140
+ }
3141
+ case Inst.RUNE1:
3142
+ {
3143
+ m[pc] = false;
3144
+ if (inst.next && inst.next.length > 0) break;
3145
+ instQueue.insert(inst.out);
3146
+ let runes = [];
3147
+ if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3148
+ const r0 = inst.runes[0];
3149
+ runes.push(r0, r0);
3150
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3151
+ runes.push(r1, r1);
3152
+ }
3153
+ runes.sort((a, b) => a - b);
3154
+ } else {
3155
+ runes.push(inst.runes[0], inst.runes[0]);
3156
+ }
3157
+ onePassRunes[pc] = runes;
3158
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3159
+ inst.op = Inst.RUNE;
3160
+ break;
3161
+ }
3162
+ case Inst.RUNE_ANY:
3163
+ {
3164
+ m[pc] = false;
3165
+ if (inst.next && inst.next.length > 0) break;
3166
+ instQueue.insert(inst.out);
3167
+ onePassRunes[pc] = [0, Unicode.MAX_RUNE];
3168
+ inst.next = new Uint32Array([inst.out]);
3169
+ break;
3170
+ }
3171
+ case Inst.RUNE_ANY_NOT_NL:
3172
+ {
3173
+ m[pc] = false;
3174
+ if (inst.next && inst.next.length > 0) break;
3175
+ instQueue.insert(inst.out);
3176
+ onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
3177
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3178
+ break;
3179
+ }
3180
+ }
3181
+ return ok;
3182
+ };
3183
+ instQueue.clear();
3184
+ instQueue.insert(p.start);
3185
+ while (!instQueue.empty()) {
3186
+ visitQueue.clear();
3187
+ const pc = instQueue.next();
3188
+ if (!check(pc)) return null;
3189
+ }
3190
+ for (let i = 0; i < p.inst.length; i++) {
3191
+ if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
3192
+ }
3193
+ return p;
3194
+ };
3195
+ const cleanupOnePass = (p, original) => {
3196
+ for (let ix = 0; ix < original.inst.length; ix++) {
3197
+ const instOriginal = original.inst[ix];
3198
+ switch (instOriginal.op) {
3199
+ case Inst.ALT:
3200
+ case Inst.ALT_MATCH:
3201
+ case Inst.RUNE:
3202
+ break;
3203
+ case Inst.CAPTURE:
3204
+ case Inst.EMPTY_WIDTH:
3205
+ case Inst.NOP:
3206
+ case Inst.MATCH:
3207
+ case Inst.FAIL:
3208
+ p.inst[ix].next = null;
3209
+ break;
3210
+ case Inst.RUNE1:
3211
+ case Inst.RUNE_ANY:
3212
+ case Inst.RUNE_ANY_NOT_NL:
3213
+ p.inst[ix].next = null;
3214
+ p.inst[ix].op = instOriginal.op;
3215
+ p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
3216
+ break;
3217
+ }
3218
+ }
3219
+ };
3220
+ class OnePass {
3221
+ static compile(prog) {
3222
+ if (prog.start === 0) return null;
3223
+ const startInst = prog.inst[prog.start];
3224
+ // onepass regexps must be strictly anchored
3225
+ if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
3226
+ return null;
3227
+ }
3228
+ let hasAlt = false;
3229
+ for (let i = 0; i < prog.inst.length; i++) {
3230
+ if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
3231
+ hasAlt = true;
3232
+ break;
2337
3233
  }
2338
3234
  }
2339
- if (anchor === RE2Flags.UNANCHORED) {
2340
- nextPCs.push(this.prog.start);
3235
+ for (let i = 0; i < prog.inst.length; i++) {
3236
+ const inst = prog.inst[i];
3237
+ const opOut = prog.inst[inst.out].op;
3238
+ switch (inst.op) {
3239
+ case Inst.ALT:
3240
+ case Inst.ALT_MATCH:
3241
+ if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
3242
+ return null;
3243
+ }
3244
+ break;
3245
+ case Inst.EMPTY_WIDTH:
3246
+ if (opOut === Inst.MATCH) {
3247
+ if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
3248
+ continue;
3249
+ }
3250
+ return null;
3251
+ }
3252
+ break;
3253
+ default:
3254
+ if (opOut === Inst.MATCH && hasAlt) {
3255
+ return null;
3256
+ }
3257
+ break;
3258
+ }
2341
3259
  }
2342
- const nextState = this.getState(nextPCs);
2343
-
2344
- // Cache the result
2345
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2346
- state.nextAscii[charCode] = nextState;
2347
- } else {
2348
- const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2349
- state.nextMap.set(key, nextState);
3260
+ let p = onePassCopy(prog);
3261
+ p = makeOnePass(p);
3262
+ if (p !== null) {
3263
+ cleanupOnePass(p, prog);
2350
3264
  }
2351
- return nextState;
3265
+ return p;
2352
3266
  }
2353
-
2354
- // The hot loop: Execute the Lazy DFA
2355
- match(input, pos, anchor) {
2356
- if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2357
- return false;
2358
- }
2359
- if (!this.startState) {
2360
- this.startState = this.getState([this.prog.start]);
2361
- if (!this.startState) return null; // Fallback to NFA
2362
- }
2363
- let endPos = input.endPos();
2364
- let currentState = this.startState;
2365
- if (currentState.isMatch) {
2366
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2367
- if (pos === endPos) return true;
2368
- } else {
2369
- return true;
3267
+ static next(inst, r) {
3268
+ const nextIdx = inst.matchRunePos(r);
3269
+ if (nextIdx >= 0) return inst.next[nextIdx];
3270
+ if (inst.op === Inst.ALT_MATCH) return inst.out;
3271
+ return 0; // fail
3272
+ }
3273
+ static execute(re2, input, pos, anchor, ncap) {
3274
+ const onepass = re2.onepass;
3275
+ if (!onepass) return null;
3276
+ const matchcap = new Int32Array(ncap).fill(-1);
3277
+ let matched = false;
3278
+ let r = input.step(pos);
3279
+ let rune = r >> 3;
3280
+ let width = r & 7;
3281
+ let r1 = MachineInputBase.EOF();
3282
+ let rune1 = -1;
3283
+ let width1 = 0;
3284
+ if (r !== MachineInputBase.EOF()) {
3285
+ r1 = input.step(pos + width);
3286
+ if (r1 !== MachineInputBase.EOF()) {
3287
+ rune1 = r1 >> 3;
3288
+ width1 = r1 & 7;
2370
3289
  }
2371
3290
  }
2372
- let i = pos;
2373
- while (i < endPos) {
2374
- const r = input.step(i);
2375
- const rune = r >> 3;
2376
- const width = r & 7;
2377
-
2378
- // prevent infinite loop on EOF
2379
- if (width === 0) {
2380
- break;
3291
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
3292
+ let pc = onepass.start;
3293
+ let inst;
3294
+ while (true) {
3295
+ inst = onepass.inst[pc];
3296
+ pc = inst.out;
3297
+ switch (inst.op) {
3298
+ case Inst.MATCH:
3299
+ {
3300
+ matched = true;
3301
+ if (matchcap.length > 0) {
3302
+ matchcap[0] = 0;
3303
+ matchcap[1] = pos;
3304
+ }
3305
+ return ncap === 0 ? [] : Array.from(matchcap);
3306
+ }
3307
+ case Inst.RUNE:
3308
+ if (!inst.matchRune(rune)) return null;
3309
+ break;
3310
+ case Inst.RUNE1:
3311
+ if (rune !== inst.runes[0]) return null;
3312
+ break;
3313
+ case Inst.RUNE_ANY:
3314
+ break;
3315
+ case Inst.RUNE_ANY_NOT_NL:
3316
+ if (rune === 10) return null;
3317
+ break;
3318
+ case Inst.ALT:
3319
+ case Inst.ALT_MATCH:
3320
+ pc = OnePass.next(inst, rune);
3321
+ continue;
3322
+ case Inst.FAIL:
3323
+ return null;
3324
+ case Inst.NOP:
3325
+ continue;
3326
+ case Inst.EMPTY_WIDTH:
3327
+ if ((inst.arg & ~flag) !== 0) return null;
3328
+ continue;
3329
+ case Inst.CAPTURE:
3330
+ if (inst.arg < matchcap.length) {
3331
+ matchcap[inst.arg] = pos;
3332
+ }
3333
+ continue;
3334
+ default:
3335
+ throw new RE2JSInternalException('bad inst');
2381
3336
  }
2382
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2383
-
2384
- // If we hit an unrecoverable DFA error or bailout, signal fallback
2385
- if (currentState === null) return null;
2386
- if (currentState.isMatch) {
2387
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2388
- if (i + width === endPos) return true;
3337
+ if (width === 0) break;
3338
+ flag = Utils.emptyOpContext(rune, rune1);
3339
+ pos += width;
3340
+ rune = rune1;
3341
+ width = width1;
3342
+ if (rune !== -1) {
3343
+ r1 = input.step(pos + width);
3344
+ if (r1 !== MachineInputBase.EOF()) {
3345
+ rune1 = r1 >> 3;
3346
+ width1 = r1 & 7;
2389
3347
  } else {
2390
- return true;
3348
+ rune1 = -1;
3349
+ width1 = 0;
2391
3350
  }
2392
3351
  }
2393
-
2394
- // If we hit a dead end, and anchored, fail early
2395
- if (currentState.nfaStates.length === 0) {
2396
- if (anchor !== RE2Flags.UNANCHORED) return false;
2397
- }
2398
- i += width;
2399
3352
  }
2400
- return false;
3353
+ if (!matched) return null;
3354
+ return ncap === 0 ? [] : Array.from(matchcap);
2401
3355
  }
2402
3356
  }
2403
3357
 
@@ -2482,7 +3436,7 @@ class Regexp {
2482
3436
  this.max = 0; // max for REPEAT
2483
3437
  this.cap = 0; // capturing index, for CAPTURE
2484
3438
  this.name = null; // capturing name, for CAPTURE
2485
- this.namedGroups = {}; // map of group name -> capturing index
3439
+ this.namedGroups = Object.create(null); // map of group name -> capturing index
2486
3440
  }
2487
3441
  reinit() {
2488
3442
  this.flags = 0;
@@ -2492,7 +3446,7 @@ class Regexp {
2492
3446
  this.min = 0;
2493
3447
  this.max = 0;
2494
3448
  this.name = null;
2495
- this.namedGroups = {};
3449
+ this.namedGroups = Object.create(null);
2496
3450
  }
2497
3451
  toString() {
2498
3452
  return this.appendTo();
@@ -2752,6 +3706,188 @@ class Regexp {
2752
3706
  }
2753
3707
  }
2754
3708
 
3709
+ class Prefilter {
3710
+ static Type = {
3711
+ NONE: 0,
3712
+ EXACT: 1,
3713
+ AND: 2,
3714
+ OR: 3
3715
+ };
3716
+ constructor(type) {
3717
+ this.type = type;
3718
+ this.subs = [];
3719
+ this.str = '';
3720
+ this.bytes = null;
3721
+ }
3722
+ eval(input, pos) {
3723
+ switch (this.type) {
3724
+ case Prefilter.Type.NONE:
3725
+ return true;
3726
+ case Prefilter.Type.EXACT:
3727
+ return input.hasString(this, pos);
3728
+ case Prefilter.Type.AND:
3729
+ for (let i = 0; i < this.subs.length; i++) {
3730
+ if (!this.subs[i].eval(input, pos)) return false;
3731
+ }
3732
+ return true;
3733
+ case Prefilter.Type.OR:
3734
+ for (let i = 0; i < this.subs.length; i++) {
3735
+ if (this.subs[i].eval(input, pos)) return true;
3736
+ }
3737
+ return false;
3738
+ default:
3739
+ return true;
3740
+ }
3741
+ }
3742
+ }
3743
+ class PrefilterTree {
3744
+ static build(re) {
3745
+ const pf = PrefilterTree.fromRegexp(re);
3746
+ return PrefilterTree.simplify(pf);
3747
+ }
3748
+ static fromRegexp(re) {
3749
+ if (!re) return new Prefilter(Prefilter.Type.NONE);
3750
+ switch (re.op) {
3751
+ case Regexp.Op.NO_MATCH:
3752
+ case Regexp.Op.EMPTY_MATCH:
3753
+ case Regexp.Op.BEGIN_LINE:
3754
+ case Regexp.Op.END_LINE:
3755
+ case Regexp.Op.BEGIN_TEXT:
3756
+ case Regexp.Op.END_TEXT:
3757
+ case Regexp.Op.WORD_BOUNDARY:
3758
+ case Regexp.Op.NO_WORD_BOUNDARY:
3759
+ case Regexp.Op.CHAR_CLASS:
3760
+ case Regexp.Op.ANY_CHAR_NOT_NL:
3761
+ case Regexp.Op.ANY_CHAR:
3762
+ {
3763
+ return new Prefilter(Prefilter.Type.NONE);
3764
+ }
3765
+ case Regexp.Op.LITERAL:
3766
+ {
3767
+ if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
3768
+ // Skip case-folded literals for simplicity
3769
+ return new Prefilter(Prefilter.Type.NONE);
3770
+ }
3771
+ const pf = new Prefilter(Prefilter.Type.EXACT);
3772
+ let str = '';
3773
+ for (let i = 0; i < re.runes.length; i++) {
3774
+ str += String.fromCodePoint(re.runes[i]);
3775
+ }
3776
+ pf.str = str;
3777
+ pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
3778
+ return pf;
3779
+ }
3780
+ case Regexp.Op.CAPTURE:
3781
+ case Regexp.Op.PLUS:
3782
+ {
3783
+ return PrefilterTree.fromRegexp(re.subs[0]);
3784
+ }
3785
+ case Regexp.Op.REPEAT:
3786
+ {
3787
+ if (re.min >= 1) {
3788
+ return PrefilterTree.fromRegexp(re.subs[0]);
3789
+ }
3790
+ return new Prefilter(Prefilter.Type.NONE);
3791
+ }
3792
+ case Regexp.Op.CONCAT:
3793
+ {
3794
+ const pf = new Prefilter(Prefilter.Type.AND);
3795
+ for (const sub of re.subs) {
3796
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3797
+ }
3798
+ return pf;
3799
+ }
3800
+ case Regexp.Op.ALTERNATE:
3801
+ {
3802
+ const pf = new Prefilter(Prefilter.Type.OR);
3803
+ for (const sub of re.subs) {
3804
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3805
+ }
3806
+ return pf;
3807
+ }
3808
+ default:
3809
+ return new Prefilter(Prefilter.Type.NONE);
3810
+ }
3811
+ }
3812
+ static simplify(pf) {
3813
+ if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
3814
+ return pf;
3815
+ }
3816
+ if (pf.type === Prefilter.Type.AND) {
3817
+ const newSubs = [];
3818
+ for (const sub of pf.subs) {
3819
+ const s = PrefilterTree.simplify(sub);
3820
+ if (s.type !== Prefilter.Type.NONE) {
3821
+ if (s.type === Prefilter.Type.AND) {
3822
+ newSubs.push(...s.subs);
3823
+ } else {
3824
+ newSubs.push(s);
3825
+ }
3826
+ }
3827
+ }
3828
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3829
+ if (newSubs.length === 1) return newSubs[0];
3830
+ pf.subs = newSubs;
3831
+ return pf;
3832
+ }
3833
+ if (pf.type === Prefilter.Type.OR) {
3834
+ const newSubs = [];
3835
+ for (const sub of pf.subs) {
3836
+ const s = PrefilterTree.simplify(sub);
3837
+ if (s.type === Prefilter.Type.NONE) {
3838
+ // If any branch of an OR has no requirements, the whole OR has no requirements
3839
+ return new Prefilter(Prefilter.Type.NONE);
3840
+ }
3841
+ if (s.type === Prefilter.Type.OR) {
3842
+ newSubs.push(...s.subs);
3843
+ } else {
3844
+ newSubs.push(s);
3845
+ }
3846
+ }
3847
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3848
+ if (newSubs.length === 1) return newSubs[0];
3849
+
3850
+ // De-duplicate EXACT branches
3851
+ const seen = new Set();
3852
+ const uniqueSubs = [];
3853
+ for (const sub of newSubs) {
3854
+ if (sub.type === Prefilter.Type.EXACT) {
3855
+ if (!seen.has(sub.str)) {
3856
+ seen.add(sub.str);
3857
+ uniqueSubs.push(sub);
3858
+ }
3859
+ } else {
3860
+ uniqueSubs.push(sub);
3861
+ }
3862
+ }
3863
+ pf.subs = uniqueSubs;
3864
+ return pf;
3865
+ }
3866
+ return pf;
3867
+ }
3868
+ }
3869
+
3870
+ /**
3871
+ * A list of instruction pointers waiting to be patched.
3872
+ * Tracks both `head` and `tail` to allow O(1) appending during compilation.
3873
+ * * Values are encoded integers, not standard memory pointers:
3874
+ * - Program instruction index: `l >> 1`
3875
+ * - Patch `.out` field if: `(l & 1) === 0`
3876
+ * - Patch `.arg` field if: `(l & 1) === 1`
3877
+ * - `0` denotes an empty list.
3878
+ * * @see https://swtch.com/~rsc/regexp/regexp1.html
3879
+ */
3880
+ class PatchList {
3881
+ /**
3882
+ * @param {number} head - Encoded pointer to the start of the patch list.
3883
+ * @param {number} tail - Encoded pointer to the end of the patch list.
3884
+ */
3885
+ constructor(head = 0, tail = 0) {
3886
+ this.head = head;
3887
+ this.tail = tail;
3888
+ }
3889
+ }
3890
+
2755
3891
  /**
2756
3892
  * A Prog is a compiled regular expression program.
2757
3893
  */
@@ -2853,39 +3989,30 @@ class Prog {
2853
3989
  return i.arg;
2854
3990
  }
2855
3991
  patch(l, val) {
2856
- while (l !== 0) {
2857
- const i = this.inst[l >> 1];
2858
- if ((l & 1) === 0) {
2859
- l = i.out;
3992
+ let head = l.head;
3993
+ while (head !== 0) {
3994
+ const i = this.inst[head >> 1];
3995
+ if ((head & 1) === 0) {
3996
+ head = i.out;
2860
3997
  i.out = val;
2861
3998
  } else {
2862
- l = i.arg;
3999
+ head = i.arg;
2863
4000
  i.arg = val;
2864
4001
  }
2865
4002
  }
2866
4003
  }
2867
4004
  append(l1, l2) {
2868
- if (l1 === 0) {
2869
- return l2;
2870
- }
2871
- if (l2 === 0) {
2872
- return l1;
2873
- }
2874
- let last = l1;
2875
- for (;;) {
2876
- const next = this.next(last);
2877
- if (next === 0) {
2878
- break;
2879
- }
2880
- last = next;
2881
- }
2882
- const i = this.inst[last >> 1];
2883
- if ((last & 1) === 0) {
2884
- i.out = l2;
4005
+ if (l1.head === 0) return l2;
4006
+ if (l2.head === 0) return l1;
4007
+
4008
+ // We know exactly where the tail is
4009
+ const i = this.inst[l1.tail >> 1];
4010
+ if ((l1.tail & 1) === 0) {
4011
+ i.out = l2.head;
2885
4012
  } else {
2886
- i.arg = l2;
4013
+ i.arg = l2.head;
2887
4014
  }
2888
- return l1;
4015
+ return new PatchList(l1.head, l2.tail);
2889
4016
  }
2890
4017
  /**
2891
4018
  *
@@ -2914,7 +4041,7 @@ class Prog {
2914
4041
  * @class
2915
4042
  */
2916
4043
  class Frag {
2917
- constructor(i = 0, out = 0, nullable = false) {
4044
+ constructor(i = 0, out = new PatchList(), nullable = false) {
2918
4045
  this.i = i; // an instruction address (pc).
2919
4046
  this.out = out; // a patch list; see explanation in Prog.js
2920
4047
  this.nullable = nullable; // whether the fragment can match the empty string
@@ -2939,6 +4066,33 @@ class Compiler {
2939
4066
  c.prog.start = f.i;
2940
4067
  return c.prog;
2941
4068
  }
4069
+ static compileSet(regexps) {
4070
+ const c = new Compiler();
4071
+ if (regexps.length === 0) {
4072
+ c.prog.start = c.newInst(Inst.FAIL).i;
4073
+ return c.prog;
4074
+ }
4075
+ let starts = [];
4076
+ for (let i = 0; i < regexps.length; i++) {
4077
+ const f = c.compile(regexps[i]);
4078
+ const m = c.newInst(Inst.MATCH);
4079
+ c.prog.getInst(m.i).arg = i; // Store the regex index
4080
+ c.prog.patch(f.out, m.i);
4081
+ starts.push(f.i);
4082
+ }
4083
+
4084
+ // Link starts together via ALT
4085
+ let start = starts[0];
4086
+ for (let i = 1; i < starts.length; i++) {
4087
+ const f = c.newInst(Inst.ALT);
4088
+ const inst = c.prog.getInst(f.i);
4089
+ inst.out = start;
4090
+ inst.arg = starts[i];
4091
+ start = f.i;
4092
+ }
4093
+ c.prog.start = start;
4094
+ return c.prog;
4095
+ }
2942
4096
  constructor() {
2943
4097
  this.prog = new Prog();
2944
4098
  this.newInst(Inst.FAIL);
@@ -2951,7 +4105,7 @@ class Compiler {
2951
4105
  // Returns a no-op fragment. Sometimes unavoidable.
2952
4106
  nop() {
2953
4107
  const f = this.newInst(Inst.NOP);
2954
- f.out = f.i << 1;
4108
+ f.out = new PatchList(f.i << 1, f.i << 1);
2955
4109
  return f;
2956
4110
  }
2957
4111
  fail() {
@@ -2962,7 +4116,7 @@ class Compiler {
2962
4116
  // Given a fragment a, returns a fragment with capturing parens around a.
2963
4117
  cap(arg) {
2964
4118
  const f = this.newInst(Inst.CAPTURE);
2965
- f.out = f.i << 1;
4119
+ f.out = new PatchList(f.i << 1, f.i << 1);
2966
4120
  this.prog.getInst(f.i).arg = arg;
2967
4121
  if (this.prog.numCap < arg + 1) {
2968
4122
  this.prog.numCap = arg + 1;
@@ -3010,10 +4164,10 @@ class Compiler {
3010
4164
  const i = this.prog.getInst(f.i);
3011
4165
  if (nongreedy) {
3012
4166
  i.arg = f1.i;
3013
- f.out = f.i << 1;
4167
+ f.out = new PatchList(f.i << 1, f.i << 1);
3014
4168
  } else {
3015
4169
  i.out = f1.i;
3016
- f.out = f.i << 1 | 1;
4170
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3017
4171
  }
3018
4172
  this.prog.patch(f1.out, f.i);
3019
4173
  return f;
@@ -3025,10 +4179,10 @@ class Compiler {
3025
4179
  const i = this.prog.getInst(f.i);
3026
4180
  if (nongreedy) {
3027
4181
  i.arg = f1.i;
3028
- f.out = f.i << 1;
4182
+ f.out = new PatchList(f.i << 1, f.i << 1);
3029
4183
  } else {
3030
4184
  i.out = f1.i;
3031
- f.out = f.i << 1 | 1;
4185
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3032
4186
  }
3033
4187
  f.out = this.prog.append(f.out, f1.out);
3034
4188
  return f;
@@ -3051,7 +4205,7 @@ class Compiler {
3051
4205
  empty(op) {
3052
4206
  const f = this.newInst(Inst.EMPTY_WIDTH);
3053
4207
  this.prog.getInst(f.i).arg = op;
3054
- f.out = f.i << 1;
4208
+ f.out = new PatchList(f.i << 1, f.i << 1);
3055
4209
  return f;
3056
4210
  }
3057
4211
 
@@ -3066,7 +4220,7 @@ class Compiler {
3066
4220
  flags &= -2;
3067
4221
  }
3068
4222
  i.arg = flags;
3069
- f.out = f.i << 1;
4223
+ f.out = new PatchList(f.i << 1, f.i << 1);
3070
4224
  if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
3071
4225
  i.op = Inst.RUNE1;
3072
4226
  } else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
@@ -3171,23 +4325,92 @@ class Simplify {
3171
4325
  }
3172
4326
  switch (re.op) {
3173
4327
  case Regexp.Op.CAPTURE:
4328
+ {
4329
+ const sub = Simplify.simplify(re.subs[0]);
4330
+ if (sub !== re.subs[0]) {
4331
+ const nre = Regexp.fromRegexp(re);
4332
+ nre.runes = [];
4333
+ nre.subs = [sub];
4334
+ return nre;
4335
+ }
4336
+ return re;
4337
+ }
3174
4338
  case Regexp.Op.CONCAT:
3175
4339
  case Regexp.Op.ALTERNATE:
3176
4340
  {
3177
- let nre = re;
4341
+ const newSubs = [];
4342
+ let changed = false;
3178
4343
  for (let i = 0; i < re.subs.length; i++) {
3179
4344
  const sub = re.subs[i];
3180
4345
  const nsub = Simplify.simplify(sub);
3181
- if (nre === re && nsub !== sub) {
3182
- nre = Regexp.fromRegexp(re);
3183
- nre.runes = [];
3184
- nre.subs = re.subs.slice(0, re.subs.length);
4346
+ if (nsub !== sub) {
4347
+ changed = true;
4348
+ }
4349
+ if (re.op === Regexp.Op.CONCAT) {
4350
+ // If any part of a CONCAT is mathematically impossible,
4351
+ // the entire CONCAT sequence becomes impossible.
4352
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4353
+ return new Regexp(Regexp.Op.NO_MATCH);
4354
+ }
4355
+ // Drop empty 0-width match nodes entirely from sequences
4356
+ if (nsub.op === Regexp.Op.EMPTY_MATCH) {
4357
+ changed = true;
4358
+ continue;
4359
+ }
4360
+ // Flatten nested concatenations
4361
+ if (nsub.op === Regexp.Op.CONCAT) {
4362
+ changed = true;
4363
+ newSubs.push(...nsub.subs);
4364
+ continue;
4365
+ }
4366
+ } else if (re.op === Regexp.Op.ALTERNATE) {
4367
+ // Drop impossible branches from alternations
4368
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4369
+ changed = true;
4370
+ continue;
4371
+ }
4372
+ // Flatten nested alternations
4373
+ if (nsub.op === Regexp.Op.ALTERNATE) {
4374
+ changed = true;
4375
+ newSubs.push(...nsub.subs);
4376
+ continue;
4377
+ }
3185
4378
  }
3186
- if (nre !== re) {
3187
- nre.subs[i] = nsub;
4379
+ newSubs.push(nsub);
4380
+ }
4381
+ if (changed) {
4382
+ // If we filtered out all nodes, return the mathematically correct fallback
4383
+ if (newSubs.length === 0) {
4384
+ return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
4385
+ }
4386
+ // If only 1 node remains, we don't need a CONCAT/ALT container at all
4387
+ if (newSubs.length === 1) {
4388
+ return newSubs[0];
3188
4389
  }
4390
+ const nre = Regexp.fromRegexp(re);
4391
+ nre.runes = [];
4392
+ nre.subs = newSubs;
4393
+ return nre;
4394
+ }
4395
+ return re;
4396
+ }
4397
+ case Regexp.Op.CHAR_CLASS:
4398
+ {
4399
+ if (re.runes === null) return re;
4400
+
4401
+ // Empty character classes match nothing.
4402
+ if (re.runes.length === 0) {
4403
+ return new Regexp(Regexp.Op.NO_MATCH);
3189
4404
  }
3190
- return nre;
4405
+ // Full character classes match everything.
4406
+ if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
4407
+ return new Regexp(Regexp.Op.ANY_CHAR);
4408
+ }
4409
+ // Standard catch-all except newline
4410
+ if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
4411
+ return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
4412
+ }
4413
+ return re;
3191
4414
  }
3192
4415
  case Regexp.Op.STAR:
3193
4416
  case Regexp.Op.PLUS:
@@ -3224,7 +4447,9 @@ class Simplify {
3224
4447
  }
3225
4448
  subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
3226
4449
  nre.subs = subs.slice(0);
3227
- return nre;
4450
+
4451
+ // Ensure newly created CONCAT is properly flattened
4452
+ return Simplify.simplify(nre);
3228
4453
  }
3229
4454
  // Special case x{0} handled above.
3230
4455
 
@@ -3262,7 +4487,8 @@ class Simplify {
3262
4487
  if (prefixSubs !== null) {
3263
4488
  const prefix = new Regexp(Regexp.Op.CONCAT);
3264
4489
  prefix.subs = prefixSubs.slice(0);
3265
- return prefix;
4490
+ // Ensure newly created CONCAT is properly flattened
4491
+ return Simplify.simplify(prefix);
3266
4492
  }
3267
4493
 
3268
4494
  // Some degenerate case like min > max or min < max < 0.
@@ -3295,6 +4521,13 @@ class Simplify {
3295
4521
  return sub;
3296
4522
  }
3297
4523
 
4524
+ // Handle impossible targets gracefully.
4525
+ // e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
4526
+ if (sub.op === Regexp.Op.NO_MATCH) {
4527
+ if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
4528
+ return new Regexp(Regexp.Op.EMPTY_MATCH);
4529
+ }
4530
+
3298
4531
  // The operators are idempotent if the flags match.
3299
4532
  if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
3300
4533
  return sub;
@@ -3302,10 +4535,10 @@ class Simplify {
3302
4535
  if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
3303
4536
  return re;
3304
4537
  }
3305
- re = new Regexp(op);
3306
- re.flags = flags;
3307
- re.subs = [sub];
3308
- return re;
4538
+ const nre = new Regexp(op);
4539
+ nre.flags = flags;
4540
+ nre.subs = [sub];
4541
+ return nre;
3309
4542
  }
3310
4543
  }
3311
4544
 
@@ -3651,16 +4884,6 @@ class CharClass {
3651
4884
  }
3652
4885
  }
3653
4886
 
3654
- class Pair {
3655
- static of(first, second) {
3656
- return new Pair(first, second);
3657
- }
3658
- constructor(first, second) {
3659
- this.first = first;
3660
- this.second = second;
3661
- }
3662
- }
3663
-
3664
4887
  // StringIterator: a stream of runes with an opaque cursor, permitting
3665
4888
  // rewinding. The units of the cursor are not specified beyond the
3666
4889
  // fact that ASCII characters are single width. (Cursor positions
@@ -3807,18 +5030,59 @@ class Parser {
3807
5030
  // stride).
3808
5031
  static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
3809
5032
 
5033
+ // Ascii tables
5034
+ static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
5035
+ static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
5036
+ // Old English long s (ſ), folds to S/s.
5037
+ 0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
5038
+ ]));
5039
+
3810
5040
  // unicodeTable() returns the Unicode RangeTable identified by name
3811
5041
  // and the table of additional fold-equivalent code points.
3812
5042
  // Returns null if |name| does not identify a Unicode character range.
3813
5043
  static unicodeTable(name) {
3814
5044
  if (name === 'Any') {
3815
- return Pair.of(Parser.ANY_TABLE, Parser.ANY_TABLE);
5045
+ return {
5046
+ tab: Parser.ANY_TABLE,
5047
+ fold: Parser.ANY_TABLE,
5048
+ sign: 1
5049
+ };
5050
+ }
5051
+ if (name === 'Ascii') {
5052
+ return {
5053
+ tab: Parser.ASCII_TABLE,
5054
+ fold: Parser.ASCII_FOLD_TABLE,
5055
+ sign: 1
5056
+ };
5057
+ }
5058
+ if (name === 'Assigned') {
5059
+ // Assigned is the mathematical inversion of Cn (Unassigned)
5060
+ return {
5061
+ tab: UnicodeTables.CATEGORIES.get('Cn'),
5062
+ fold: UnicodeTables.CATEGORIES.get('Cn'),
5063
+ sign: -1
5064
+ };
5065
+ }
5066
+ if (name === 'Lc') {
5067
+ return {
5068
+ tab: UnicodeTables.CATEGORIES.get('LC'),
5069
+ fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
5070
+ sign: 1
5071
+ };
3816
5072
  }
3817
5073
  if (UnicodeTables.CATEGORIES.has(name)) {
3818
- return Pair.of(UnicodeTables.CATEGORIES.get(name), UnicodeTables.FOLD_CATEGORIES.get(name));
5074
+ return {
5075
+ tab: UnicodeTables.CATEGORIES.get(name),
5076
+ fold: UnicodeTables.FOLD_CATEGORIES.get(name),
5077
+ sign: 1
5078
+ };
3819
5079
  }
3820
5080
  if (UnicodeTables.SCRIPTS.has(name)) {
3821
- return Pair.of(UnicodeTables.SCRIPTS.get(name), UnicodeTables.FOLD_SCRIPT.get(name));
5081
+ return {
5082
+ tab: UnicodeTables.SCRIPTS.get(name),
5083
+ fold: UnicodeTables.FOLD_SCRIPT.get(name),
5084
+ sign: 1
5085
+ };
3822
5086
  }
3823
5087
  return null;
3824
5088
  }
@@ -4127,7 +5391,7 @@ class Parser {
4127
5391
  this.flags = flags;
4128
5392
  // number of capturing groups seen
4129
5393
  this.numCap = 0;
4130
- this.namedGroups = {};
5394
+ this.namedGroups = Object.create(null);
4131
5395
  // Stack of parsed expressions.
4132
5396
  this.stack = [];
4133
5397
  this.free = null;
@@ -4971,9 +6235,11 @@ class Parser {
4971
6235
  const i = lit.indexOf('\\E');
4972
6236
  if (i >= 0) {
4973
6237
  lit = lit.substring(0, i);
6238
+ t.skipString(lit);
6239
+ t.skipString('\\E');
6240
+ } else {
6241
+ t.skipString(lit);
4974
6242
  }
4975
- t.skipString(lit);
4976
- t.skipString('\\E');
4977
6243
  let j = 0;
4978
6244
  while (j < lit.length) {
4979
6245
  const codepoint = lit.codePointAt(j);
@@ -4989,6 +6255,9 @@ class Parser {
4989
6255
  t.rewindTo(savedPos);
4990
6256
  break;
4991
6257
  }
6258
+ } else {
6259
+ // Unconditionally rewind if PERL_X is off, or if string ended abruptly
6260
+ t.rewindTo(savedPos);
4992
6261
  }
4993
6262
  const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
4994
6263
  re.flags = this.flags;
@@ -5314,8 +6583,11 @@ class Parser {
5314
6583
  if (pair === null) {
5315
6584
  throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
5316
6585
  }
5317
- const tab = pair.first;
5318
- const fold = pair.second; // fold-equivalent table
6586
+ if (pair.sign < 0) {
6587
+ sign = 0 - sign;
6588
+ }
6589
+ const tab = pair.tab;
6590
+ const fold = pair.fold; // fold-equivalent table
5319
6591
  // Variation of CharClass.appendGroup() for tables.
5320
6592
  if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
5321
6593
  cc.appendTableWithSign(tab, sign);
@@ -5459,6 +6731,7 @@ class RE2 {
5459
6731
  res.prefixUTF8 = re2.prefixUTF8;
5460
6732
  res.prefixComplete = re2.prefixComplete;
5461
6733
  res.prefixRune = re2.prefixRune;
6734
+ res.prefilter = re2.prefilter;
5462
6735
  return res;
5463
6736
  }
5464
6737
 
@@ -5501,8 +6774,10 @@ class RE2 {
5501
6774
  let re = Parser.parse(expr, mode);
5502
6775
  const maxCap = re.maxCap();
5503
6776
  re = Simplify.simplify(re);
6777
+ const prefilter = PrefilterTree.build(re);
5504
6778
  const prog = Compiler.compileRegexp(re);
5505
6779
  const re2 = new RE2(expr, prog, maxCap, longest);
6780
+ re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
5506
6781
  const [prefixCompl, prefixStr] = prog.prefix();
5507
6782
  re2.prefixComplete = prefixCompl;
5508
6783
  re2.prefix = prefixStr;
@@ -5534,12 +6809,78 @@ class RE2 {
5534
6809
  this.prefixComplete = false; // true if prefix is the entire regexp
5535
6810
  this.prefixRune = 0; // first rune in prefix
5536
6811
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5537
- this.dfa = new DFA(prog); // Initialize the Lazy DFA
6812
+ this.dfa = new DFA(this.prog); // initialize Lazy DFA
6813
+ this.onepass = OnePass.compile(this.prog); // compile OnePass
6814
+ this.prefilter = null;
6815
+ }
6816
+ matchPrefixComplete(input, pos, anchor, ncap) {
6817
+ // If strictly anchored, execution must start at 0
6818
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
6819
+ return null;
6820
+ }
6821
+ let matchStart = -1;
6822
+ let matchEnd = -1;
6823
+ const pLen = input.prefixLength(this);
6824
+ if (anchor === RE2Flags.UNANCHORED) {
6825
+ const idx = input.index(this, pos);
6826
+ if (idx < 0) return null;
6827
+ matchStart = pos + idx;
6828
+ matchEnd = matchStart + pLen;
6829
+ } else if (anchor === RE2Flags.ANCHOR_BOTH) {
6830
+ if (input.endPos() !== pLen) return null;
6831
+ const idx = input.index(this, 0);
6832
+ if (idx !== 0) return null;
6833
+ matchStart = 0;
6834
+ matchEnd = pLen;
6835
+ } else if (anchor === RE2Flags.ANCHOR_START) {
6836
+ const idx = input.index(this, 0);
6837
+ if (idx !== 0) return null;
6838
+ matchStart = 0;
6839
+ matchEnd = pLen;
6840
+ }
6841
+ if (matchStart < 0) return null;
6842
+
6843
+ // If captures are requested (e.g. findSubmatch instead of test), populate bounds
6844
+ if (ncap > 0) {
6845
+ const matchcap = new Int32Array(ncap).fill(-1);
6846
+ matchcap[0] = matchStart;
6847
+ matchcap[1] = matchEnd;
6848
+ return Array.from(matchcap);
6849
+ }
6850
+ return []; // Matched successfully, but no capture data requested
5538
6851
  }
5539
6852
  executeEngine(input, pos, anchor, ncap) {
6853
+ // LITERAL FAST PATH
6854
+ // If the entire regex is just a literal string (and no nested capture boundaries are requested),
6855
+ // bypass all state machines and execute via V8's blistering fast native indexOf
6856
+ if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
6857
+ return this.matchPrefixComplete(input, pos, anchor, ncap);
6858
+ }
6859
+
6860
+ // PREFILTER FAST PATH
6861
+ // If the unanchored query requires specific literal strings (e.g. "a.*b"),
6862
+ // verify those strings exist using high-speed JS string searches before waking up the state engines.
6863
+ if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
6864
+ if (!this.prefilter.eval(input, pos)) {
6865
+ return null;
6866
+ }
6867
+ }
6868
+
6869
+ // FAST PATH: OnePass DFA engine.
6870
+ // If compiled successfully, it perfectly supports capture groups
6871
+ // and is blisteringly fast since it skips thread queues completely.
6872
+ if (this.onepass !== null) {
6873
+ return OnePass.execute(this, input, pos, anchor, ncap);
6874
+ }
6875
+
5540
6876
  // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5541
6877
  // We must use the NFA.
5542
6878
  if (ncap > 0) {
6879
+ // Backtracker bit-state execution bounds check
6880
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6881
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6882
+ }
6883
+ // NFA execution
5543
6884
  return this.doExecuteNFA(input, pos, anchor, ncap);
5544
6885
  }
5545
6886
  const dfaResult = this.dfa.match(input, pos, anchor);
@@ -5548,6 +6889,11 @@ class RE2 {
5548
6889
  return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5549
6890
  }
5550
6891
 
6892
+ // Backtracker bit-state execution bounds check
6893
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6894
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6895
+ }
6896
+
5551
6897
  // Fallback to NFA
5552
6898
  return this.doExecuteNFA(input, pos, anchor, ncap);
5553
6899
  }
@@ -6128,6 +7474,50 @@ class RE2 {
6128
7474
  }
6129
7475
  }
6130
7476
 
7477
+ class RE2Set {
7478
+ constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
7479
+ this.anchor = anchor;
7480
+ this.flags = flags;
7481
+ this.regexps = [];
7482
+ this.prog = null;
7483
+ this.dfa = null;
7484
+ this.dummyRe2 = null;
7485
+ }
7486
+ add(pattern) {
7487
+ if (this.prog) {
7488
+ throw new RE2JSCompileException('Cannot add patterns after compile');
7489
+ }
7490
+ const re = Parser.parse(pattern, this.flags);
7491
+ this.regexps.push(Simplify.simplify(re));
7492
+ return this.regexps.length - 1;
7493
+ }
7494
+ compile() {
7495
+ if (this.prog) return;
7496
+ this.prog = Compiler.compileSet(this.regexps);
7497
+ this.dfa = new DFA(this.prog);
7498
+ this.dummyRe2 = {
7499
+ prog: this.prog,
7500
+ cond: this.prog.startCond(),
7501
+ prefix: '',
7502
+ prefixRune: 0,
7503
+ longest: false
7504
+ };
7505
+ }
7506
+ match(input) {
7507
+ if (!this.prog) this.compile();
7508
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7509
+
7510
+ // Fast path: Try the blistering fast DFA
7511
+ const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
7512
+ if (dfaResult !== null) return dfaResult;
7513
+
7514
+ // Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
7515
+ const machine = Machine.fromRE2(this.dummyRe2);
7516
+ machine.init(0);
7517
+ return machine.matchSet(machineInput, 0, this.anchor);
7518
+ }
7519
+ }
7520
+
6131
7521
  /**
6132
7522
  * Transform JS regex string to RE2 regex string
6133
7523
  */
@@ -6210,7 +7600,8 @@ class TranslateRegExpString {
6210
7600
  default:
6211
7601
  {
6212
7602
  result += '\\';
6213
- let symSize = Utils.charCount(ch.codePointAt(0));
7603
+ let cp = data.codePointAt(i + 1);
7604
+ let symSize = Utils.charCount(cp);
6214
7605
  result += data.substring(i + 1, i + 1 + symSize);
6215
7606
  i += symSize + 1;
6216
7607
  continue;
@@ -6230,7 +7621,8 @@ class TranslateRegExpString {
6230
7621
  continue;
6231
7622
  }
6232
7623
  }
6233
- let symSize = Utils.charCount(ch.codePointAt(0));
7624
+ let cp = data.codePointAt(i);
7625
+ let symSize = Utils.charCount(cp);
6234
7626
  result += data.substring(i, i + symSize);
6235
7627
  i += symSize;
6236
7628
  }
@@ -6587,5 +7979,5 @@ class RE2JS {
6587
7979
  }
6588
7980
  }
6589
7981
 
6590
- export { Matcher, RE2JS, RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSSyntaxException };
7982
+ export { Matcher, RE2JS, RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, RE2Set };
6591
7983
  //# sourceMappingURL=index.esm.js.map