re2js 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.0.2
5
+ * @version v2.1.0
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -60,6 +60,23 @@
60
60
  /**
61
61
  * Various constants and helper for unicode codepoints.
62
62
  */
63
+ const ASCII_SIZE = 128;
64
+ const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
65
+ const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
66
+ for (let i = 0; i < ASCII_SIZE; i++) {
67
+ if (i >= 97 && i <= 122) {
68
+ // a-z
69
+ ASCII_TO_UPPER[i] = i - 32;
70
+ } else {
71
+ ASCII_TO_UPPER[i] = i;
72
+ }
73
+ if (i >= 65 && i <= 90) {
74
+ // A-Z
75
+ ASCII_TO_LOWER[i] = i + 32;
76
+ } else {
77
+ ASCII_TO_LOWER[i] = i;
78
+ }
79
+ }
63
80
  class Codepoint {
64
81
  // codePointAt(0)
65
82
  static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
@@ -67,6 +84,7 @@
67
84
  // convert unicode codepoint to upper case codepoint
68
85
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
69
86
  static toUpperCase(codepoint) {
87
+ if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
70
88
  const s = String.fromCodePoint(codepoint).toUpperCase();
71
89
  if (s.length > 1) {
72
90
  return codepoint;
@@ -81,6 +99,7 @@
81
99
  // convert unicode codepoint to lower case codepoint
82
100
  // return same codepoint, if cannot do it (or codepoint not have lower variation)
83
101
  static toLowerCase(codepoint) {
102
+ if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
84
103
  const s = String.fromCodePoint(codepoint).toLowerCase();
85
104
  if (s.length > 1) {
86
105
  return codepoint;
@@ -203,9 +222,11 @@
203
222
  C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
204
223
  Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
205
224
  Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
225
+ Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
206
226
  Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
207
227
  Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
208
228
  L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
229
+ LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
209
230
  Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
210
231
  Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
211
232
  Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
@@ -414,8 +435,11 @@
414
435
  Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
415
436
  });
416
437
  static FOLD_CATEGORIES = new LazyMap({
417
- L: () => new UnicodeRangeTable(decodeRanges('laA', true)),
418
- Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEEGGCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
438
+ C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
439
+ Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
440
+ L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
441
+ LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
442
+ Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
419
443
  Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
420
444
  Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
421
445
  M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
@@ -424,7 +448,9 @@
424
448
  static FOLD_SCRIPT = new LazyMap({
425
449
  Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
426
450
  Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
427
- Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
451
+ Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
452
+ Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
453
+ Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
428
454
  });
429
455
  }
430
456
 
@@ -678,7 +704,7 @@
678
704
 
679
705
  // Returns the array of runes in the specified Java UTF-16 string.
680
706
  static stringToRunes(str) {
681
- return String(str).split('').map(s => s.codePointAt(0));
707
+ return Array.from(String(str)).map(s => s.codePointAt(0));
682
708
  }
683
709
 
684
710
  // Returns the Java UTF-16 string containing the single rune |r|.
@@ -949,6 +975,14 @@
949
975
  endPos() {
950
976
  return this.end;
951
977
  }
978
+ hasString() {
979
+ return false;
980
+ }
981
+
982
+ // Helper for the exact-literal fast-path execution router
983
+ prefixLength() {
984
+ return 0;
985
+ }
952
986
  }
953
987
 
954
988
  // An implementation of MachineInput for UTF-8 byte arrays.
@@ -960,6 +994,14 @@
960
994
  this.start = start;
961
995
  this.end = end;
962
996
  }
997
+ hasString(prefilter, pos) {
998
+ const target = prefilter.bytes;
999
+ if (target.length === 0) return true;
1000
+
1001
+ // Reuse the high-speed indexOf method already implemented below
1002
+ const idx = this.indexOf(this.bytes, target, this.start + pos);
1003
+ return idx !== -1 && idx <= this.end - target.length;
1004
+ }
963
1005
 
964
1006
  // Returns the rune at the specified index; the units are
965
1007
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1036,10 +1078,10 @@
1036
1078
  indexOf(source, target, fromIndex = 0) {
1037
1079
  let targetLength = target.length;
1038
1080
  if (targetLength === 0) {
1039
- return -1;
1081
+ return fromIndex <= this.end ? fromIndex : -1;
1040
1082
  }
1041
- let sourceLength = source.length;
1042
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1083
+ let limit = this.end - targetLength;
1084
+ for (let i = fromIndex; i <= limit; i++) {
1043
1085
  for (let j = 0; j < targetLength; j++) {
1044
1086
  if (source[i + j] !== target[j]) {
1045
1087
  break;
@@ -1050,6 +1092,9 @@
1050
1092
  }
1051
1093
  return -1;
1052
1094
  }
1095
+ prefixLength(re2) {
1096
+ return re2.prefixUTF8.length;
1097
+ }
1053
1098
  }
1054
1099
 
1055
1100
  // |pos| and |width| are in JS "char" units.
@@ -1060,6 +1105,10 @@
1060
1105
  this.start = start;
1061
1106
  this.end = end;
1062
1107
  }
1108
+ hasString(prefilter, pos) {
1109
+ const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
1110
+ return idx !== -1 && idx <= this.end - prefilter.str.length;
1111
+ }
1063
1112
 
1064
1113
  // Returns the rune at the specified index; the units are
1065
1114
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1105,6 +1154,9 @@
1105
1154
  const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1106
1155
  return Utils.emptyOpContext(r1, r2);
1107
1156
  }
1157
+ prefixLength(re2) {
1158
+ return re2.prefix.length;
1159
+ }
1108
1160
  }
1109
1161
  class MachineInput {
1110
1162
  static fromUTF8(bytes, start = 0, end = bytes.length) {
@@ -1195,6 +1247,17 @@
1195
1247
  }
1196
1248
  }
1197
1249
 
1250
+ /**
1251
+ * An exception thrown for internal engine errors, such as corrupted bytecodes.
1252
+ */
1253
+ class RE2JSInternalException extends RE2JSException {
1254
+ /** @param {string} message */
1255
+ constructor(message) {
1256
+ super(message);
1257
+ this.name = 'RE2JSInternalException';
1258
+ }
1259
+ }
1260
+
1198
1261
  /**
1199
1262
  * A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
1200
1263
  *
@@ -1397,6 +1460,23 @@
1397
1460
  }
1398
1461
  return this.substring(start, end);
1399
1462
  }
1463
+
1464
+ /**
1465
+ * Returns a dictionary map of all named capturing groups and their matched values.
1466
+ * If a group was not matched, its value will be `null`.
1467
+ * @returns {Record<string, string|null>}
1468
+ */
1469
+ getNamedGroups() {
1470
+ if (!this.hasMatch) {
1471
+ throw new RE2JSGroupException('perhaps no match attempted');
1472
+ }
1473
+ const result = {};
1474
+ for (const name of Object.keys(this.namedGroups)) {
1475
+ result[name] = this.group(name);
1476
+ }
1477
+ return result;
1478
+ }
1479
+
1400
1480
  /**
1401
1481
  * Returns the number of subgroups in this pattern.
1402
1482
  *
@@ -1821,16 +1901,20 @@
1821
1901
  }
1822
1902
  return r === r0;
1823
1903
  }
1824
- // Peek at the first few pairs.
1825
- // Should handle ASCII well.
1826
- for (let j = 0; j < this.runes.length && j <= 8; j += 2) {
1827
- if (r < this.runes[j]) {
1828
- return false;
1829
- }
1830
- if (r <= this.runes[j + 1]) {
1831
- return true;
1904
+ const len = this.runes.length;
1905
+ // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1906
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1907
+ for (let j = 0; j < len; j += 2) {
1908
+ if (r < this.runes[j]) {
1909
+ return false;
1910
+ }
1911
+ if (r <= this.runes[j + 1]) {
1912
+ return true;
1913
+ }
1832
1914
  }
1915
+ return false; // Stop here
1833
1916
  }
1917
+
1834
1918
  // Otherwise binary search.
1835
1919
  let lo = 0;
1836
1920
  let hi = this.runes.length / 2 | 0;
@@ -1848,6 +1932,40 @@
1848
1932
  }
1849
1933
  return false;
1850
1934
  }
1935
+
1936
+ // matchRunePos checks whether the instruction matches (and consumes) r.
1937
+ // If so, it returns the index of the matching rune pair.
1938
+ // If not, it returns -1.
1939
+ matchRunePos(r) {
1940
+ if (this.runes.length === 1) {
1941
+ const r0 = this.runes[0];
1942
+ if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
1943
+ return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
1944
+ }
1945
+ return r === r0 ? 0 : -1;
1946
+ }
1947
+ const len = this.runes.length;
1948
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1949
+ for (let j = 0; j < len; j += 2) {
1950
+ if (r < this.runes[j]) return -1;
1951
+ if (r <= this.runes[j + 1]) return Math.floor(j / 2);
1952
+ }
1953
+ return -1;
1954
+ }
1955
+ let lo = 0;
1956
+ let hi = Math.floor(len / 2);
1957
+ while (lo < hi) {
1958
+ const m = lo + hi >> 1;
1959
+ const c = this.runes[2 * m];
1960
+ if (c <= r) {
1961
+ if (r <= this.runes[2 * m + 1]) return m;
1962
+ lo = m + 1;
1963
+ } else {
1964
+ hi = m;
1965
+ }
1966
+ }
1967
+ return -1;
1968
+ }
1851
1969
  /**
1852
1970
  *
1853
1971
  * @returns {string}
@@ -1863,7 +1981,7 @@
1863
1981
  case Inst.EMPTY_WIDTH:
1864
1982
  return `empty ${this.arg} -> ${this.out}`;
1865
1983
  case Inst.MATCH:
1866
- return 'match';
1984
+ return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
1867
1985
  case Inst.FAIL:
1868
1986
  return 'fail';
1869
1987
  case Inst.NOP:
@@ -1889,7 +2007,7 @@
1889
2007
  class Thread {
1890
2008
  constructor() {
1891
2009
  this.inst = null;
1892
- this.cap = [];
2010
+ this.cap = null; // Initialized to Int32Array later
1893
2011
  }
1894
2012
  }
1895
2013
 
@@ -1917,9 +2035,11 @@
1917
2035
  return j;
1918
2036
  }
1919
2037
  clear() {
1920
- this.sparse = [];
1921
- this.densePcs = [];
1922
- this.denseThreads = [];
2038
+ // Prevent memory leaks by nulling out used object references
2039
+ for (let i = 0; i < this.size; i++) {
2040
+ this.denseThreads[i] = null;
2041
+ }
2042
+ // The sparse set logic safely ignores stale integers in Typed Arrays.
1923
2043
  this.size = 0;
1924
2044
  }
1925
2045
  toString() {
@@ -1948,7 +2068,8 @@
1948
2068
  m.pool = [];
1949
2069
  m.poolSize = 0;
1950
2070
  m.matched = false;
1951
- m.matchcap = Array(m.prog.numCap < 2 ? 2 : m.prog.numCap).fill(0);
2071
+ // Use Int32Array instead of standard JS array
2072
+ m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
1952
2073
  m.ncap = 0;
1953
2074
  return m;
1954
2075
  }
@@ -1962,27 +2083,30 @@
1962
2083
  if (ncap > this.matchcap.length) {
1963
2084
  this.initNewCap(ncap);
1964
2085
  } else {
1965
- this.resetCap(ncap);
2086
+ this.resetCap();
1966
2087
  }
1967
2088
  }
1968
- resetCap(ncap) {
2089
+
2090
+ // Wipes existing typed array memory without reallocating
2091
+ resetCap() {
1969
2092
  for (let i = 0; i < this.poolSize; i++) {
1970
2093
  const t = this.pool[i];
1971
- t.cap = Array(ncap).fill(0);
2094
+ t.cap.fill(0);
1972
2095
  }
1973
2096
  }
1974
2097
  initNewCap(ncap) {
1975
2098
  for (let i = 0; i < this.poolSize; i++) {
1976
2099
  const t = this.pool[i];
1977
- t.cap = Array(ncap).fill(0);
2100
+ t.cap = new Int32Array(ncap);
1978
2101
  }
1979
- this.matchcap = Array(ncap).fill(0);
2102
+ this.matchcap = new Int32Array(ncap);
1980
2103
  }
1981
2104
  submatches() {
1982
2105
  if (this.ncap === 0) {
1983
2106
  return Utils.emptyInts();
1984
2107
  }
1985
- return this.matchcap.slice(0, this.ncap);
2108
+ // Use subarray() to create a zero-allocation view before converting
2109
+ return Array.from(this.matchcap.subarray(0, this.ncap));
1986
2110
  }
1987
2111
 
1988
2112
  // alloc() allocates a new thread with the given instruction.
@@ -1994,6 +2118,7 @@
1994
2118
  t = this.pool[this.poolSize];
1995
2119
  } else {
1996
2120
  t = new Thread();
2121
+ t.cap = new Int32Array(this.matchcap.length);
1997
2122
  }
1998
2123
  t.inst = inst;
1999
2124
  return t;
@@ -2023,7 +2148,7 @@
2023
2148
  return false;
2024
2149
  }
2025
2150
  this.matched = false;
2026
- this.matchcap = Array(this.prog.numCap).fill(-1);
2151
+ this.matchcap.fill(-1);
2027
2152
  let runq = this.q0;
2028
2153
  let nextq = this.q1;
2029
2154
  let r = input.step(pos);
@@ -2094,6 +2219,85 @@
2094
2219
  this.freeQueue(nextq);
2095
2220
  return this.matched;
2096
2221
  }
2222
+ matchSet(input, pos, anchor) {
2223
+ const startCond = this.re2.cond;
2224
+ if (startCond === Utils.EMPTY_ALL) return [];
2225
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2226
+ return [];
2227
+ }
2228
+ let runq = this.q0;
2229
+ let nextq = this.q1;
2230
+ let r = input.step(pos);
2231
+ let rune = r >> 3;
2232
+ let width = r & 7;
2233
+ let rune1 = -1;
2234
+ let width1 = 0;
2235
+ if (r !== MachineInputBase.EOF()) {
2236
+ r = input.step(pos + width);
2237
+ rune1 = r >> 3;
2238
+ width1 = r & 7;
2239
+ }
2240
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
2241
+ const matches = new Set();
2242
+ while (true) {
2243
+ if (runq.isEmpty()) {
2244
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2245
+ }
2246
+ if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2247
+ this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
2248
+ }
2249
+ const nextPos = pos + width;
2250
+ flag = input.context(nextPos);
2251
+ for (let j = 0; j < runq.size; j++) {
2252
+ let t = runq.denseThreads[j];
2253
+ if (t === null) continue;
2254
+ const i = t.inst;
2255
+ let add = false;
2256
+ switch (i.op) {
2257
+ case Inst.MATCH:
2258
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
2259
+ matches.add(i.arg); // Record the matched Set ID
2260
+ break;
2261
+ case Inst.RUNE:
2262
+ add = i.matchRune(rune);
2263
+ break;
2264
+ case Inst.RUNE1:
2265
+ add = rune === i.runes[0];
2266
+ break;
2267
+ case Inst.RUNE_ANY:
2268
+ add = true;
2269
+ break;
2270
+ case Inst.RUNE_ANY_NOT_NL:
2271
+ add = rune !== Codepoint.CODES.get('\n');
2272
+ break;
2273
+ default:
2274
+ throw new RE2JSInternalException('bad inst');
2275
+ }
2276
+ if (add) {
2277
+ t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
2278
+ }
2279
+ if (t !== null) {
2280
+ this.freeThread(t);
2281
+ runq.denseThreads[j] = null;
2282
+ }
2283
+ }
2284
+ runq.clear();
2285
+ if (width === 0) break;
2286
+ pos += width;
2287
+ rune = rune1;
2288
+ width = width1;
2289
+ if (rune !== -1) {
2290
+ r = input.step(pos + width);
2291
+ rune1 = r >> 3;
2292
+ width1 = r & 7;
2293
+ }
2294
+ const tmpq = runq;
2295
+ runq = nextq;
2296
+ nextq = tmpq;
2297
+ }
2298
+ this.freeQueue(nextq);
2299
+ return Array.from(matches).sort((a, b) => a - b);
2300
+ }
2097
2301
  step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
2098
2302
  const longest = this.re2.longest;
2099
2303
  for (let j = 0; j < runq.size; j++) {
@@ -2114,7 +2318,9 @@
2114
2318
  }
2115
2319
  if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
2116
2320
  t.cap[1] = pos;
2117
- this.matchcap = t.cap.slice(0, this.ncap);
2321
+ // Using subarray creates a fast view, avoiding a full array copy
2322
+ // until the submatches are finalized at the very end.
2323
+ this.matchcap.set(t.cap.subarray(0, this.ncap));
2118
2324
  }
2119
2325
  if (!longest) {
2120
2326
  this.freeQueue(runq, j + 1);
@@ -2134,7 +2340,7 @@
2134
2340
  add = c !== Codepoint.CODES.get('\n');
2135
2341
  break;
2136
2342
  default:
2137
- throw new Error('bad inst');
2343
+ throw new RE2JSInternalException('bad inst');
2138
2344
  }
2139
2345
  if (add) {
2140
2346
  t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
@@ -2192,6 +2398,7 @@
2192
2398
  t.inst = inst;
2193
2399
  }
2194
2400
  if (this.ncap > 0 && t.cap !== cap) {
2401
+ // Direct assignment utilizing Typed Array performance
2195
2402
  for (let c = 0; c < this.ncap; c++) {
2196
2403
  t.cap[c] = cap[c];
2197
2404
  }
@@ -2226,20 +2433,23 @@
2226
2433
  return true;
2227
2434
  };
2228
2435
  class DFAState {
2229
- constructor(nfaStates, isMatch) {
2436
+ constructor(nfaStates, isMatch, matchIDs = []) {
2230
2437
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2231
2438
  this.isMatch = isMatch; // Boolean
2439
+ this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2232
2440
  this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2233
2441
  this.nextMap = new Map(); // Cache of Char -> DFAState
2234
2442
  }
2235
2443
  }
2236
2444
  class DFA {
2445
+ static MAX_CACHE_CLEARS = 5;
2237
2446
  constructor(prog) {
2238
2447
  this.prog = prog;
2239
2448
  this.stateCache = new Map(); // hash(number) -> DFAState[]
2240
2449
  this.stateCount = 0; // Tracks total states for memory limits
2241
2450
  this.startState = null;
2242
2451
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2452
+ this.cacheClears = 0; // Track thrashing
2243
2453
  this.failed = false; // mark if DFA cannot work with provided prog
2244
2454
  }
2245
2455
 
@@ -2248,6 +2458,7 @@
2248
2458
  const closure = new Set();
2249
2459
  const stack = [...pcs];
2250
2460
  let isMatch = false;
2461
+ const matchIDs = [];
2251
2462
  while (stack.length > 0) {
2252
2463
  const pc = stack.pop();
2253
2464
  if (closure.has(pc)) continue;
@@ -2256,6 +2467,7 @@
2256
2467
  switch (inst.op) {
2257
2468
  case Inst.MATCH:
2258
2469
  isMatch = true;
2470
+ if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
2259
2471
  break;
2260
2472
  case Inst.ALT:
2261
2473
  case Inst.ALT_MATCH:
@@ -2273,9 +2485,11 @@
2273
2485
  }
2274
2486
  }
2275
2487
  const sortedPCs = Int32Array.from(closure).sort();
2488
+ matchIDs.sort((a, b) => a - b);
2276
2489
  return {
2277
2490
  pcs: sortedPCs,
2278
- isMatch
2491
+ isMatch,
2492
+ matchIDs
2279
2493
  };
2280
2494
  }
2281
2495
 
@@ -2301,6 +2515,8 @@
2301
2515
  bucket = [];
2302
2516
  this.stateCache.set(hash, bucket);
2303
2517
  }
2518
+
2519
+ // DFA already failed once - exit
2304
2520
  if (this.failed) return null;
2305
2521
 
2306
2522
  // Safety: prevent memory exhaustion from state explosion
@@ -2309,12 +2525,18 @@
2309
2525
  this.stateCache.clear();
2310
2526
  this.stateCount = 0;
2311
2527
  this.startState = null;
2312
- this.failed = true;
2528
+ this.cacheClears++;
2529
+
2530
+ // If this regex causes continuous cache thrashing, permanently fall back to NFA
2531
+ // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2532
+ if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2533
+ this.failed = true;
2534
+ }
2313
2535
  return null;
2314
2536
  }
2315
2537
 
2316
2538
  // State not found, create it and add to bucket
2317
- const state = new DFAState(sortedPCs, closureResult.isMatch);
2539
+ const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2318
2540
  bucket.push(state);
2319
2541
  this.stateCount++;
2320
2542
  return state;
@@ -2334,76 +2556,808 @@
2334
2556
  return state.nextMap.get(key);
2335
2557
  }
2336
2558
  }
2337
- const nextPCs = [];
2338
- for (let i = 0; i < state.nfaStates.length; i++) {
2339
- const pc = state.nfaStates[i];
2340
- const inst = this.prog.getInst(pc);
2341
- if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2342
- nextPCs.push(inst.out);
2559
+ const nextPCs = [];
2560
+ for (let i = 0; i < state.nfaStates.length; i++) {
2561
+ const pc = state.nfaStates[i];
2562
+ const inst = this.prog.getInst(pc);
2563
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2564
+ nextPCs.push(inst.out);
2565
+ }
2566
+ }
2567
+ if (anchor === RE2Flags.UNANCHORED) {
2568
+ nextPCs.push(this.prog.start);
2569
+ }
2570
+ const nextState = this.getState(nextPCs);
2571
+
2572
+ // Cache the result
2573
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2574
+ state.nextAscii[charCode] = nextState;
2575
+ } else {
2576
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2577
+ state.nextMap.set(key, nextState);
2578
+ }
2579
+ return nextState;
2580
+ }
2581
+
2582
+ // The hot loop: Execute the Lazy DFA
2583
+ match(input, pos, anchor) {
2584
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2585
+ return false;
2586
+ }
2587
+ if (!this.startState) {
2588
+ this.startState = this.getState([this.prog.start]);
2589
+ if (!this.startState) return null; // Fallback to NFA
2590
+ }
2591
+ let endPos = input.endPos();
2592
+ let currentState = this.startState;
2593
+ if (currentState.isMatch) {
2594
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2595
+ if (pos === endPos) return true;
2596
+ } else {
2597
+ return true;
2598
+ }
2599
+ }
2600
+ let i = pos;
2601
+ while (i < endPos) {
2602
+ const r = input.step(i);
2603
+ const rune = r >> 3;
2604
+ const width = r & 7;
2605
+
2606
+ // prevent infinite loop on EOF
2607
+ if (width === 0) {
2608
+ break;
2609
+ }
2610
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2611
+
2612
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2613
+ if (currentState === null) return null;
2614
+ if (currentState.isMatch) {
2615
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2616
+ if (i + width === endPos) return true;
2617
+ } else {
2618
+ return true;
2619
+ }
2620
+ }
2621
+
2622
+ // If we hit a dead end, and anchored, fail early
2623
+ if (currentState.nfaStates.length === 0) {
2624
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2625
+ }
2626
+ i += width;
2627
+ }
2628
+ return false;
2629
+ }
2630
+
2631
+ // The hot loop for evaluating Multi-Pattern Sets
2632
+ matchSet(input, pos, anchor) {
2633
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2634
+ return [];
2635
+ }
2636
+ if (!this.startState) {
2637
+ this.startState = this.getState([this.prog.start]);
2638
+ if (!this.startState) return null; // Fallback to NFA
2639
+ }
2640
+ let endPos = input.endPos();
2641
+ let currentState = this.startState;
2642
+ const matches = new Set();
2643
+ const checkMatch = (state, currentPos) => {
2644
+ if (state.isMatch) {
2645
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2646
+ if (currentPos === endPos) {
2647
+ state.matchIDs.forEach(id => matches.add(id));
2648
+ }
2649
+ } else {
2650
+ state.matchIDs.forEach(id => matches.add(id));
2651
+ }
2652
+ }
2653
+ };
2654
+ checkMatch(currentState, pos);
2655
+ let i = pos;
2656
+ while (i < endPos) {
2657
+ const r = input.step(i);
2658
+ const rune = r >> 3;
2659
+ const width = r & 7;
2660
+ if (width === 0) break;
2661
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2662
+ if (currentState === null) return null; // Bailout to NFA
2663
+
2664
+ i += width;
2665
+ checkMatch(currentState, i);
2666
+ if (currentState.nfaStates.length === 0) {
2667
+ if (anchor !== RE2Flags.UNANCHORED) break;
2668
+ }
2669
+ }
2670
+ return Array.from(matches).sort((a, b) => a - b);
2671
+ }
2672
+ }
2673
+
2674
+ const VISITED_BITS = 32;
2675
+ const MAX_BACKTRACK_PROG = 500;
2676
+ const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
2677
+ const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
2678
+
2679
+ class BitState {
2680
+ constructor() {
2681
+ this.end = 0;
2682
+ this.cap = new Int32Array(0);
2683
+ this.matchcap = new Int32Array(0);
2684
+ this.ncap = 0;
2685
+
2686
+ // Parallel arrays acting as the backtrack job stack
2687
+ this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
2688
+ this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
2689
+ this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
2690
+ this.jobLen = 0;
2691
+ this.visited = new Uint32Array(0);
2692
+ }
2693
+ reset(prog, end, ncap) {
2694
+ this.end = end;
2695
+ this.jobLen = 0;
2696
+ this.ncap = ncap;
2697
+
2698
+ // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2699
+ const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2700
+ if (this.visited.length < visitedSize) {
2701
+ this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2702
+ } else {
2703
+ this.visited.fill(0, 0, visitedSize);
2704
+ }
2705
+ if (this.cap.length < ncap) {
2706
+ // Must explicitly fill with -1 as Int32Array defaults to 0
2707
+ this.cap = new Int32Array(ncap).fill(-1);
2708
+ } else {
2709
+ this.cap.fill(-1, 0, ncap);
2710
+ }
2711
+ if (this.matchcap.length < ncap) {
2712
+ this.matchcap = new Int32Array(ncap).fill(-1);
2713
+ } else {
2714
+ this.matchcap.fill(-1, 0, ncap);
2715
+ }
2716
+ }
2717
+ shouldVisit(pc, pos) {
2718
+ const n = pc * (this.end + 1) + pos;
2719
+ const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
2720
+ const mask = 1 << (n & 31); // Equivalent to n % 32
2721
+
2722
+ if ((this.visited[idx] & mask) !== 0) {
2723
+ return false;
2724
+ }
2725
+ this.visited[idx] |= mask;
2726
+ return true;
2727
+ }
2728
+ push(re2, pc, pos, arg) {
2729
+ if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
2730
+ if (this.jobLen >= this.jobPc.length) {
2731
+ const newSize = this.jobPc.length * 2;
2732
+ const newPc = new Int32Array(newSize);
2733
+ newPc.set(this.jobPc);
2734
+ this.jobPc = newPc;
2735
+ const newArg = new Uint8Array(newSize);
2736
+ newArg.set(this.jobArg);
2737
+ this.jobArg = newArg;
2738
+ const newPos = new Int32Array(newSize);
2739
+ newPos.set(this.jobPos);
2740
+ this.jobPos = newPos;
2741
+ }
2742
+ this.jobPc[this.jobLen] = pc;
2743
+ this.jobArg[this.jobLen] = arg ? 1 : 0;
2744
+ this.jobPos[this.jobLen] = pos;
2745
+ this.jobLen++;
2746
+ }
2747
+ }
2748
+ tryBacktrack(re2, input, pc, pos, anchor) {
2749
+ const longest = re2.longest;
2750
+ this.push(re2, pc, pos, false);
2751
+ while (this.jobLen > 0) {
2752
+ this.jobLen--;
2753
+ let currentPc = this.jobPc[this.jobLen];
2754
+ let arg = this.jobArg[this.jobLen] === 1;
2755
+ let currentPos = this.jobPos[this.jobLen];
2756
+ let skipShouldVisit = true;
2757
+ while (true) {
2758
+ if (!skipShouldVisit) {
2759
+ if (!this.shouldVisit(currentPc, currentPos)) {
2760
+ break;
2761
+ }
2762
+ }
2763
+ skipShouldVisit = false;
2764
+ const inst = re2.prog.getInst(currentPc);
2765
+ switch (inst.op) {
2766
+ case Inst.FAIL:
2767
+ {
2768
+ throw new RE2JSInternalException('unexpected InstFail');
2769
+ }
2770
+ case Inst.ALT:
2771
+ {
2772
+ if (arg) {
2773
+ arg = false;
2774
+ currentPc = inst.arg;
2775
+ continue;
2776
+ } else {
2777
+ this.push(re2, currentPc, currentPos, true);
2778
+ currentPc = inst.out;
2779
+ continue;
2780
+ }
2781
+ }
2782
+ case Inst.ALT_MATCH:
2783
+ {
2784
+ const outInst = re2.prog.getInst(inst.out);
2785
+ if (Inst.isRuneOp(outInst.op)) {
2786
+ this.push(re2, inst.arg, currentPos, false);
2787
+ currentPc = inst.out;
2788
+ continue;
2789
+ }
2790
+ this.push(re2, inst.out, this.end, false);
2791
+ currentPc = inst.arg;
2792
+ continue;
2793
+ }
2794
+ case Inst.RUNE:
2795
+ {
2796
+ const r = input.step(currentPos);
2797
+ if (r === MachineInputBase.EOF()) break;
2798
+ if (!inst.matchRune(r >> 3)) break;
2799
+ currentPos += r & 7;
2800
+ currentPc = inst.out;
2801
+ continue;
2802
+ }
2803
+ case Inst.RUNE1:
2804
+ {
2805
+ const r = input.step(currentPos);
2806
+ if (r === MachineInputBase.EOF()) break;
2807
+ if (r >> 3 !== inst.runes[0]) break;
2808
+ currentPos += r & 7;
2809
+ currentPc = inst.out;
2810
+ continue;
2811
+ }
2812
+ case Inst.RUNE_ANY_NOT_NL:
2813
+ {
2814
+ const r = input.step(currentPos);
2815
+ if (r === MachineInputBase.EOF()) break;
2816
+ if (r >> 3 === 10) break;
2817
+ currentPos += r & 7;
2818
+ currentPc = inst.out;
2819
+ continue;
2820
+ }
2821
+ case Inst.RUNE_ANY:
2822
+ {
2823
+ const r = input.step(currentPos);
2824
+ if (r === MachineInputBase.EOF()) break;
2825
+ currentPos += r & 7;
2826
+ currentPc = inst.out;
2827
+ continue;
2828
+ }
2829
+ case Inst.CAPTURE:
2830
+ {
2831
+ if (arg) {
2832
+ this.cap[inst.arg] = currentPos;
2833
+ break;
2834
+ } else {
2835
+ if (inst.arg < this.ncap) {
2836
+ this.push(re2, currentPc, this.cap[inst.arg], true);
2837
+ this.cap[inst.arg] = currentPos;
2838
+ }
2839
+ currentPc = inst.out;
2840
+ continue;
2841
+ }
2842
+ }
2843
+ case Inst.EMPTY_WIDTH:
2844
+ {
2845
+ const flag = input.context(currentPos);
2846
+ if ((inst.arg & ~flag) !== 0) break;
2847
+ currentPc = inst.out;
2848
+ continue;
2849
+ }
2850
+ case Inst.NOP:
2851
+ {
2852
+ currentPc = inst.out;
2853
+ continue;
2854
+ }
2855
+ case Inst.MATCH:
2856
+ {
2857
+ if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
2858
+ break;
2859
+ }
2860
+ if (this.ncap === 0) return true;
2861
+ if (this.ncap > 1) {
2862
+ this.cap[1] = currentPos;
2863
+ }
2864
+ const old = this.matchcap[1];
2865
+ if (old === -1 || longest && currentPos > 0 && currentPos > old) {
2866
+ this.matchcap.set(this.cap);
2867
+ }
2868
+ if (!longest) return true;
2869
+ if (currentPos === this.end) return true;
2870
+ break;
2871
+ }
2872
+ default:
2873
+ {
2874
+ throw new RE2JSInternalException('bad inst');
2875
+ }
2876
+ }
2877
+ break;
2878
+ }
2879
+ }
2880
+ return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
2881
+ }
2882
+ }
2883
+ const bitStatePool = [];
2884
+ class Backtracker {
2885
+ static shouldBacktrack(prog) {
2886
+ return prog.numInst() <= MAX_BACKTRACK_PROG;
2887
+ }
2888
+ static maxBitStateLen(prog) {
2889
+ if (!Backtracker.shouldBacktrack(prog)) {
2890
+ return 0;
2891
+ }
2892
+ return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
2893
+ }
2894
+ static execute(re2, input, pos, anchor, ncap) {
2895
+ const startCond = re2.cond;
2896
+ if (startCond === Utils.EMPTY_ALL) {
2897
+ return null;
2898
+ }
2899
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2900
+ return null;
2901
+ }
2902
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2903
+ return null;
2904
+ }
2905
+ const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
2906
+ const end = input.endPos();
2907
+ b.reset(re2.prog, end, ncap);
2908
+ let matched = false;
2909
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
2910
+ if (b.ncap > 0) {
2911
+ b.cap[0] = pos;
2912
+ }
2913
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2914
+ matched = true;
2915
+ }
2916
+ } else {
2917
+ let width = -1;
2918
+ for (; pos <= end && width !== 0; pos += width) {
2919
+ if (re2.prefix.length > 0) {
2920
+ const advance = input.index(re2, pos);
2921
+ if (advance < 0) {
2922
+ break;
2923
+ }
2924
+ pos += advance;
2925
+ }
2926
+ if (b.ncap > 0) {
2927
+ b.cap[0] = pos;
2928
+ }
2929
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2930
+ matched = true;
2931
+ break;
2932
+ }
2933
+ const r = input.step(pos);
2934
+ width = r === MachineInputBase.EOF() ? 0 : r & 7;
2935
+ }
2936
+ }
2937
+ if (!matched) {
2938
+ bitStatePool.push(b);
2939
+ return null;
2940
+ }
2941
+
2942
+ // Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
2943
+ const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
2944
+ bitStatePool.push(b);
2945
+ return result;
2946
+ }
2947
+ }
2948
+
2949
+ class QueueOnePass {
2950
+ constructor(size) {
2951
+ this.sparse = new Uint32Array(size);
2952
+ this.dense = new Uint32Array(size);
2953
+ this.size = 0;
2954
+ this.nextIndex = 0;
2955
+ }
2956
+ empty() {
2957
+ return this.nextIndex >= this.size;
2958
+ }
2959
+ next() {
2960
+ return this.dense[this.nextIndex++];
2961
+ }
2962
+ clear() {
2963
+ this.size = 0;
2964
+ this.nextIndex = 0;
2965
+ }
2966
+ contains(u) {
2967
+ return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
2968
+ }
2969
+ insert(u) {
2970
+ if (!this.contains(u)) this.insertNew(u);
2971
+ }
2972
+ insertNew(u) {
2973
+ if (u >= this.sparse.length) return;
2974
+ this.sparse[u] = this.size;
2975
+ this.dense[this.size] = u;
2976
+ this.size++;
2977
+ }
2978
+ }
2979
+ const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
2980
+ const leftLen = leftRunes.length;
2981
+ const rightLen = rightRunes.length;
2982
+ let lx = 0,
2983
+ rx = 0;
2984
+ const merged = [];
2985
+ const next = [];
2986
+ let ok = true;
2987
+ let ix = -1;
2988
+ const extend = isLeft => {
2989
+ const newArray = isLeft ? leftRunes : rightRunes;
2990
+ const low = isLeft ? lx : rx;
2991
+ const pc = isLeft ? leftPC : rightPC;
2992
+ if (ix > 0 && newArray[low] <= merged[ix]) return false;
2993
+ merged.push(newArray[low], newArray[low + 1]);
2994
+ if (isLeft) lx += 2;else rx += 2;
2995
+ ix += 2;
2996
+ next.push(pc);
2997
+ return true;
2998
+ };
2999
+ while (lx < leftLen || rx < rightLen) {
3000
+ if (rx >= rightLen) {
3001
+ ok = extend(true);
3002
+ } else if (lx >= leftLen) {
3003
+ ok = extend(false);
3004
+ } else if (rightRunes[rx] < leftRunes[lx]) {
3005
+ ok = extend(false);
3006
+ } else {
3007
+ ok = extend(true);
3008
+ }
3009
+ if (!ok) return null;
3010
+ }
3011
+ return {
3012
+ merged,
3013
+ next
3014
+ };
3015
+ };
3016
+ class OnePassProg {
3017
+ constructor(prog) {
3018
+ this.start = prog.start;
3019
+ this.numCap = prog.numCap;
3020
+ this.inst = new Array(prog.inst.length);
3021
+ for (let i = 0; i < prog.inst.length; i++) {
3022
+ const orig = prog.inst[i];
3023
+ const inst = new Inst(orig.op);
3024
+ inst.out = orig.out;
3025
+ inst.arg = orig.arg;
3026
+ inst.runes = orig.runes ? orig.runes.slice() : [];
3027
+ inst.next = null;
3028
+ this.inst[i] = inst;
3029
+ }
3030
+ }
3031
+ }
3032
+ const onePassCopy = prog => {
3033
+ const p = new OnePassProg(prog);
3034
+
3035
+ // Rewrites one or more common Prog constructs that enable some otherwise
3036
+ // non-onepass Progs to be onepass.
3037
+ for (let pc = 0; pc < p.inst.length; pc++) {
3038
+ const inst = p.inst[pc];
3039
+ if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
3040
+ let pAOther = 'out';
3041
+ let pAAlt = 'arg';
3042
+ let instAlt = p.inst[inst[pAAlt]];
3043
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
3044
+ pAOther = 'arg';
3045
+ pAAlt = 'out';
3046
+ instAlt = p.inst[inst[pAAlt]];
3047
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
3048
+ }
3049
+ const instOther = p.inst[inst[pAOther]];
3050
+ if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
3051
+ let pBAlt = 'out';
3052
+ let pBOther = 'arg';
3053
+ let patch = false;
3054
+ if (instAlt.out === pc) {
3055
+ patch = true;
3056
+ } else if (instAlt.arg === pc) {
3057
+ patch = true;
3058
+ pBAlt = 'arg';
3059
+ pBOther = 'out';
3060
+ }
3061
+ if (patch) instAlt[pBAlt] = inst[pAOther];
3062
+ if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
3063
+ }
3064
+ return p;
3065
+ };
3066
+ const makeOnePass = p => {
3067
+ if (p.inst.length >= 1000) return null;
3068
+ const instQueue = new QueueOnePass(p.inst.length);
3069
+ const visitQueue = new QueueOnePass(p.inst.length);
3070
+ const onePassRunes = new Array(p.inst.length);
3071
+ const m = new Array(p.inst.length).fill(false);
3072
+ const check = pc => {
3073
+ let ok = true;
3074
+ const inst = p.inst[pc];
3075
+ if (visitQueue.contains(pc)) return true;
3076
+ visitQueue.insert(pc);
3077
+ switch (inst.op) {
3078
+ case Inst.ALT:
3079
+ case Inst.ALT_MATCH:
3080
+ {
3081
+ ok = check(inst.out) && check(inst.arg);
3082
+ let matchOut = m[inst.out];
3083
+ let matchArg = m[inst.arg];
3084
+ if (matchOut && matchArg) return false;
3085
+ if (matchArg) {
3086
+ const tempOut = inst.out;
3087
+ inst.out = inst.arg;
3088
+ inst.arg = tempOut;
3089
+ const tempMatch = matchOut;
3090
+ matchOut = matchArg;
3091
+ matchArg = tempMatch;
3092
+ }
3093
+ if (matchOut) {
3094
+ m[pc] = true;
3095
+ inst.op = Inst.ALT_MATCH;
3096
+ }
3097
+ const leftRunes = onePassRunes[inst.out] || [];
3098
+ const rightRunes = onePassRunes[inst.arg] || [];
3099
+ const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
3100
+ if (!mergeRes) return false;
3101
+ onePassRunes[pc] = mergeRes.merged;
3102
+ inst.next = new Uint32Array(mergeRes.next);
3103
+ break;
3104
+ }
3105
+ case Inst.CAPTURE:
3106
+ case Inst.EMPTY_WIDTH:
3107
+ case Inst.NOP:
3108
+ {
3109
+ ok = check(inst.out);
3110
+ m[pc] = m[inst.out];
3111
+ onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
3112
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3113
+ break;
3114
+ }
3115
+ case Inst.MATCH:
3116
+ case Inst.FAIL:
3117
+ {
3118
+ m[pc] = inst.op === Inst.MATCH;
3119
+ break;
3120
+ }
3121
+ case Inst.RUNE:
3122
+ {
3123
+ m[pc] = false;
3124
+ if (inst.next && inst.next.length > 0) break;
3125
+ instQueue.insert(inst.out);
3126
+ if (!inst.runes || inst.runes.length === 0) {
3127
+ onePassRunes[pc] = [];
3128
+ inst.next = new Uint32Array([inst.out]);
3129
+ break;
3130
+ }
3131
+ let runes = [];
3132
+ if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3133
+ const r0 = inst.runes[0];
3134
+ runes.push(r0, r0);
3135
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3136
+ runes.push(r1, r1);
3137
+ }
3138
+ runes.sort((a, b) => a - b);
3139
+ } else {
3140
+ runes.push(...inst.runes);
3141
+ }
3142
+ onePassRunes[pc] = runes;
3143
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3144
+ inst.op = Inst.RUNE;
3145
+ break;
3146
+ }
3147
+ case Inst.RUNE1:
3148
+ {
3149
+ m[pc] = false;
3150
+ if (inst.next && inst.next.length > 0) break;
3151
+ instQueue.insert(inst.out);
3152
+ let runes = [];
3153
+ if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3154
+ const r0 = inst.runes[0];
3155
+ runes.push(r0, r0);
3156
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3157
+ runes.push(r1, r1);
3158
+ }
3159
+ runes.sort((a, b) => a - b);
3160
+ } else {
3161
+ runes.push(inst.runes[0], inst.runes[0]);
3162
+ }
3163
+ onePassRunes[pc] = runes;
3164
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3165
+ inst.op = Inst.RUNE;
3166
+ break;
3167
+ }
3168
+ case Inst.RUNE_ANY:
3169
+ {
3170
+ m[pc] = false;
3171
+ if (inst.next && inst.next.length > 0) break;
3172
+ instQueue.insert(inst.out);
3173
+ onePassRunes[pc] = [0, Unicode.MAX_RUNE];
3174
+ inst.next = new Uint32Array([inst.out]);
3175
+ break;
3176
+ }
3177
+ case Inst.RUNE_ANY_NOT_NL:
3178
+ {
3179
+ m[pc] = false;
3180
+ if (inst.next && inst.next.length > 0) break;
3181
+ instQueue.insert(inst.out);
3182
+ onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
3183
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3184
+ break;
3185
+ }
3186
+ }
3187
+ return ok;
3188
+ };
3189
+ instQueue.clear();
3190
+ instQueue.insert(p.start);
3191
+ while (!instQueue.empty()) {
3192
+ visitQueue.clear();
3193
+ const pc = instQueue.next();
3194
+ if (!check(pc)) return null;
3195
+ }
3196
+ for (let i = 0; i < p.inst.length; i++) {
3197
+ if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
3198
+ }
3199
+ return p;
3200
+ };
3201
+ const cleanupOnePass = (p, original) => {
3202
+ for (let ix = 0; ix < original.inst.length; ix++) {
3203
+ const instOriginal = original.inst[ix];
3204
+ switch (instOriginal.op) {
3205
+ case Inst.ALT:
3206
+ case Inst.ALT_MATCH:
3207
+ case Inst.RUNE:
3208
+ break;
3209
+ case Inst.CAPTURE:
3210
+ case Inst.EMPTY_WIDTH:
3211
+ case Inst.NOP:
3212
+ case Inst.MATCH:
3213
+ case Inst.FAIL:
3214
+ p.inst[ix].next = null;
3215
+ break;
3216
+ case Inst.RUNE1:
3217
+ case Inst.RUNE_ANY:
3218
+ case Inst.RUNE_ANY_NOT_NL:
3219
+ p.inst[ix].next = null;
3220
+ p.inst[ix].op = instOriginal.op;
3221
+ p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
3222
+ break;
3223
+ }
3224
+ }
3225
+ };
3226
+ class OnePass {
3227
+ static compile(prog) {
3228
+ if (prog.start === 0) return null;
3229
+ const startInst = prog.inst[prog.start];
3230
+ // onepass regexps must be strictly anchored
3231
+ if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
3232
+ return null;
3233
+ }
3234
+ let hasAlt = false;
3235
+ for (let i = 0; i < prog.inst.length; i++) {
3236
+ if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
3237
+ hasAlt = true;
3238
+ break;
2343
3239
  }
2344
3240
  }
2345
- if (anchor === RE2Flags.UNANCHORED) {
2346
- nextPCs.push(this.prog.start);
3241
+ for (let i = 0; i < prog.inst.length; i++) {
3242
+ const inst = prog.inst[i];
3243
+ const opOut = prog.inst[inst.out].op;
3244
+ switch (inst.op) {
3245
+ case Inst.ALT:
3246
+ case Inst.ALT_MATCH:
3247
+ if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
3248
+ return null;
3249
+ }
3250
+ break;
3251
+ case Inst.EMPTY_WIDTH:
3252
+ if (opOut === Inst.MATCH) {
3253
+ if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
3254
+ continue;
3255
+ }
3256
+ return null;
3257
+ }
3258
+ break;
3259
+ default:
3260
+ if (opOut === Inst.MATCH && hasAlt) {
3261
+ return null;
3262
+ }
3263
+ break;
3264
+ }
2347
3265
  }
2348
- const nextState = this.getState(nextPCs);
2349
-
2350
- // Cache the result
2351
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2352
- state.nextAscii[charCode] = nextState;
2353
- } else {
2354
- const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2355
- state.nextMap.set(key, nextState);
3266
+ let p = onePassCopy(prog);
3267
+ p = makeOnePass(p);
3268
+ if (p !== null) {
3269
+ cleanupOnePass(p, prog);
2356
3270
  }
2357
- return nextState;
3271
+ return p;
2358
3272
  }
2359
-
2360
- // The hot loop: Execute the Lazy DFA
2361
- match(input, pos, anchor) {
2362
- if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2363
- return false;
2364
- }
2365
- if (!this.startState) {
2366
- this.startState = this.getState([this.prog.start]);
2367
- if (!this.startState) return null; // Fallback to NFA
2368
- }
2369
- let endPos = input.endPos();
2370
- let currentState = this.startState;
2371
- if (currentState.isMatch) {
2372
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2373
- if (pos === endPos) return true;
2374
- } else {
2375
- return true;
3273
+ static next(inst, r) {
3274
+ const nextIdx = inst.matchRunePos(r);
3275
+ if (nextIdx >= 0) return inst.next[nextIdx];
3276
+ if (inst.op === Inst.ALT_MATCH) return inst.out;
3277
+ return 0; // fail
3278
+ }
3279
+ static execute(re2, input, pos, anchor, ncap) {
3280
+ const onepass = re2.onepass;
3281
+ if (!onepass) return null;
3282
+ const matchcap = new Int32Array(ncap).fill(-1);
3283
+ let matched = false;
3284
+ let r = input.step(pos);
3285
+ let rune = r >> 3;
3286
+ let width = r & 7;
3287
+ let r1 = MachineInputBase.EOF();
3288
+ let rune1 = -1;
3289
+ let width1 = 0;
3290
+ if (r !== MachineInputBase.EOF()) {
3291
+ r1 = input.step(pos + width);
3292
+ if (r1 !== MachineInputBase.EOF()) {
3293
+ rune1 = r1 >> 3;
3294
+ width1 = r1 & 7;
2376
3295
  }
2377
3296
  }
2378
- let i = pos;
2379
- while (i < endPos) {
2380
- const r = input.step(i);
2381
- const rune = r >> 3;
2382
- const width = r & 7;
2383
-
2384
- // prevent infinite loop on EOF
2385
- if (width === 0) {
2386
- break;
3297
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
3298
+ let pc = onepass.start;
3299
+ let inst;
3300
+ while (true) {
3301
+ inst = onepass.inst[pc];
3302
+ pc = inst.out;
3303
+ switch (inst.op) {
3304
+ case Inst.MATCH:
3305
+ {
3306
+ matched = true;
3307
+ if (matchcap.length > 0) {
3308
+ matchcap[0] = 0;
3309
+ matchcap[1] = pos;
3310
+ }
3311
+ return ncap === 0 ? [] : Array.from(matchcap);
3312
+ }
3313
+ case Inst.RUNE:
3314
+ if (!inst.matchRune(rune)) return null;
3315
+ break;
3316
+ case Inst.RUNE1:
3317
+ if (rune !== inst.runes[0]) return null;
3318
+ break;
3319
+ case Inst.RUNE_ANY:
3320
+ break;
3321
+ case Inst.RUNE_ANY_NOT_NL:
3322
+ if (rune === 10) return null;
3323
+ break;
3324
+ case Inst.ALT:
3325
+ case Inst.ALT_MATCH:
3326
+ pc = OnePass.next(inst, rune);
3327
+ continue;
3328
+ case Inst.FAIL:
3329
+ return null;
3330
+ case Inst.NOP:
3331
+ continue;
3332
+ case Inst.EMPTY_WIDTH:
3333
+ if ((inst.arg & ~flag) !== 0) return null;
3334
+ continue;
3335
+ case Inst.CAPTURE:
3336
+ if (inst.arg < matchcap.length) {
3337
+ matchcap[inst.arg] = pos;
3338
+ }
3339
+ continue;
3340
+ default:
3341
+ throw new RE2JSInternalException('bad inst');
2387
3342
  }
2388
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2389
-
2390
- // If we hit an unrecoverable DFA error or bailout, signal fallback
2391
- if (currentState === null) return null;
2392
- if (currentState.isMatch) {
2393
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2394
- if (i + width === endPos) return true;
3343
+ if (width === 0) break;
3344
+ flag = Utils.emptyOpContext(rune, rune1);
3345
+ pos += width;
3346
+ rune = rune1;
3347
+ width = width1;
3348
+ if (rune !== -1) {
3349
+ r1 = input.step(pos + width);
3350
+ if (r1 !== MachineInputBase.EOF()) {
3351
+ rune1 = r1 >> 3;
3352
+ width1 = r1 & 7;
2395
3353
  } else {
2396
- return true;
3354
+ rune1 = -1;
3355
+ width1 = 0;
2397
3356
  }
2398
3357
  }
2399
-
2400
- // If we hit a dead end, and anchored, fail early
2401
- if (currentState.nfaStates.length === 0) {
2402
- if (anchor !== RE2Flags.UNANCHORED) return false;
2403
- }
2404
- i += width;
2405
3358
  }
2406
- return false;
3359
+ if (!matched) return null;
3360
+ return ncap === 0 ? [] : Array.from(matchcap);
2407
3361
  }
2408
3362
  }
2409
3363
 
@@ -2488,7 +3442,7 @@
2488
3442
  this.max = 0; // max for REPEAT
2489
3443
  this.cap = 0; // capturing index, for CAPTURE
2490
3444
  this.name = null; // capturing name, for CAPTURE
2491
- this.namedGroups = {}; // map of group name -> capturing index
3445
+ this.namedGroups = Object.create(null); // map of group name -> capturing index
2492
3446
  }
2493
3447
  reinit() {
2494
3448
  this.flags = 0;
@@ -2498,7 +3452,7 @@
2498
3452
  this.min = 0;
2499
3453
  this.max = 0;
2500
3454
  this.name = null;
2501
- this.namedGroups = {};
3455
+ this.namedGroups = Object.create(null);
2502
3456
  }
2503
3457
  toString() {
2504
3458
  return this.appendTo();
@@ -2758,6 +3712,188 @@
2758
3712
  }
2759
3713
  }
2760
3714
 
3715
+ class Prefilter {
3716
+ static Type = {
3717
+ NONE: 0,
3718
+ EXACT: 1,
3719
+ AND: 2,
3720
+ OR: 3
3721
+ };
3722
+ constructor(type) {
3723
+ this.type = type;
3724
+ this.subs = [];
3725
+ this.str = '';
3726
+ this.bytes = null;
3727
+ }
3728
+ eval(input, pos) {
3729
+ switch (this.type) {
3730
+ case Prefilter.Type.NONE:
3731
+ return true;
3732
+ case Prefilter.Type.EXACT:
3733
+ return input.hasString(this, pos);
3734
+ case Prefilter.Type.AND:
3735
+ for (let i = 0; i < this.subs.length; i++) {
3736
+ if (!this.subs[i].eval(input, pos)) return false;
3737
+ }
3738
+ return true;
3739
+ case Prefilter.Type.OR:
3740
+ for (let i = 0; i < this.subs.length; i++) {
3741
+ if (this.subs[i].eval(input, pos)) return true;
3742
+ }
3743
+ return false;
3744
+ default:
3745
+ return true;
3746
+ }
3747
+ }
3748
+ }
3749
+ class PrefilterTree {
3750
+ static build(re) {
3751
+ const pf = PrefilterTree.fromRegexp(re);
3752
+ return PrefilterTree.simplify(pf);
3753
+ }
3754
+ static fromRegexp(re) {
3755
+ if (!re) return new Prefilter(Prefilter.Type.NONE);
3756
+ switch (re.op) {
3757
+ case Regexp.Op.NO_MATCH:
3758
+ case Regexp.Op.EMPTY_MATCH:
3759
+ case Regexp.Op.BEGIN_LINE:
3760
+ case Regexp.Op.END_LINE:
3761
+ case Regexp.Op.BEGIN_TEXT:
3762
+ case Regexp.Op.END_TEXT:
3763
+ case Regexp.Op.WORD_BOUNDARY:
3764
+ case Regexp.Op.NO_WORD_BOUNDARY:
3765
+ case Regexp.Op.CHAR_CLASS:
3766
+ case Regexp.Op.ANY_CHAR_NOT_NL:
3767
+ case Regexp.Op.ANY_CHAR:
3768
+ {
3769
+ return new Prefilter(Prefilter.Type.NONE);
3770
+ }
3771
+ case Regexp.Op.LITERAL:
3772
+ {
3773
+ if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
3774
+ // Skip case-folded literals for simplicity
3775
+ return new Prefilter(Prefilter.Type.NONE);
3776
+ }
3777
+ const pf = new Prefilter(Prefilter.Type.EXACT);
3778
+ let str = '';
3779
+ for (let i = 0; i < re.runes.length; i++) {
3780
+ str += String.fromCodePoint(re.runes[i]);
3781
+ }
3782
+ pf.str = str;
3783
+ pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
3784
+ return pf;
3785
+ }
3786
+ case Regexp.Op.CAPTURE:
3787
+ case Regexp.Op.PLUS:
3788
+ {
3789
+ return PrefilterTree.fromRegexp(re.subs[0]);
3790
+ }
3791
+ case Regexp.Op.REPEAT:
3792
+ {
3793
+ if (re.min >= 1) {
3794
+ return PrefilterTree.fromRegexp(re.subs[0]);
3795
+ }
3796
+ return new Prefilter(Prefilter.Type.NONE);
3797
+ }
3798
+ case Regexp.Op.CONCAT:
3799
+ {
3800
+ const pf = new Prefilter(Prefilter.Type.AND);
3801
+ for (const sub of re.subs) {
3802
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3803
+ }
3804
+ return pf;
3805
+ }
3806
+ case Regexp.Op.ALTERNATE:
3807
+ {
3808
+ const pf = new Prefilter(Prefilter.Type.OR);
3809
+ for (const sub of re.subs) {
3810
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3811
+ }
3812
+ return pf;
3813
+ }
3814
+ default:
3815
+ return new Prefilter(Prefilter.Type.NONE);
3816
+ }
3817
+ }
3818
+ static simplify(pf) {
3819
+ if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
3820
+ return pf;
3821
+ }
3822
+ if (pf.type === Prefilter.Type.AND) {
3823
+ const newSubs = [];
3824
+ for (const sub of pf.subs) {
3825
+ const s = PrefilterTree.simplify(sub);
3826
+ if (s.type !== Prefilter.Type.NONE) {
3827
+ if (s.type === Prefilter.Type.AND) {
3828
+ newSubs.push(...s.subs);
3829
+ } else {
3830
+ newSubs.push(s);
3831
+ }
3832
+ }
3833
+ }
3834
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3835
+ if (newSubs.length === 1) return newSubs[0];
3836
+ pf.subs = newSubs;
3837
+ return pf;
3838
+ }
3839
+ if (pf.type === Prefilter.Type.OR) {
3840
+ const newSubs = [];
3841
+ for (const sub of pf.subs) {
3842
+ const s = PrefilterTree.simplify(sub);
3843
+ if (s.type === Prefilter.Type.NONE) {
3844
+ // If any branch of an OR has no requirements, the whole OR has no requirements
3845
+ return new Prefilter(Prefilter.Type.NONE);
3846
+ }
3847
+ if (s.type === Prefilter.Type.OR) {
3848
+ newSubs.push(...s.subs);
3849
+ } else {
3850
+ newSubs.push(s);
3851
+ }
3852
+ }
3853
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3854
+ if (newSubs.length === 1) return newSubs[0];
3855
+
3856
+ // De-duplicate EXACT branches
3857
+ const seen = new Set();
3858
+ const uniqueSubs = [];
3859
+ for (const sub of newSubs) {
3860
+ if (sub.type === Prefilter.Type.EXACT) {
3861
+ if (!seen.has(sub.str)) {
3862
+ seen.add(sub.str);
3863
+ uniqueSubs.push(sub);
3864
+ }
3865
+ } else {
3866
+ uniqueSubs.push(sub);
3867
+ }
3868
+ }
3869
+ pf.subs = uniqueSubs;
3870
+ return pf;
3871
+ }
3872
+ return pf;
3873
+ }
3874
+ }
3875
+
3876
+ /**
3877
+ * A list of instruction pointers waiting to be patched.
3878
+ * Tracks both `head` and `tail` to allow O(1) appending during compilation.
3879
+ * * Values are encoded integers, not standard memory pointers:
3880
+ * - Program instruction index: `l >> 1`
3881
+ * - Patch `.out` field if: `(l & 1) === 0`
3882
+ * - Patch `.arg` field if: `(l & 1) === 1`
3883
+ * - `0` denotes an empty list.
3884
+ * * @see https://swtch.com/~rsc/regexp/regexp1.html
3885
+ */
3886
+ class PatchList {
3887
+ /**
3888
+ * @param {number} head - Encoded pointer to the start of the patch list.
3889
+ * @param {number} tail - Encoded pointer to the end of the patch list.
3890
+ */
3891
+ constructor(head = 0, tail = 0) {
3892
+ this.head = head;
3893
+ this.tail = tail;
3894
+ }
3895
+ }
3896
+
2761
3897
  /**
2762
3898
  * A Prog is a compiled regular expression program.
2763
3899
  */
@@ -2859,39 +3995,30 @@
2859
3995
  return i.arg;
2860
3996
  }
2861
3997
  patch(l, val) {
2862
- while (l !== 0) {
2863
- const i = this.inst[l >> 1];
2864
- if ((l & 1) === 0) {
2865
- l = i.out;
3998
+ let head = l.head;
3999
+ while (head !== 0) {
4000
+ const i = this.inst[head >> 1];
4001
+ if ((head & 1) === 0) {
4002
+ head = i.out;
2866
4003
  i.out = val;
2867
4004
  } else {
2868
- l = i.arg;
4005
+ head = i.arg;
2869
4006
  i.arg = val;
2870
4007
  }
2871
4008
  }
2872
4009
  }
2873
4010
  append(l1, l2) {
2874
- if (l1 === 0) {
2875
- return l2;
2876
- }
2877
- if (l2 === 0) {
2878
- return l1;
2879
- }
2880
- let last = l1;
2881
- for (;;) {
2882
- const next = this.next(last);
2883
- if (next === 0) {
2884
- break;
2885
- }
2886
- last = next;
2887
- }
2888
- const i = this.inst[last >> 1];
2889
- if ((last & 1) === 0) {
2890
- i.out = l2;
4011
+ if (l1.head === 0) return l2;
4012
+ if (l2.head === 0) return l1;
4013
+
4014
+ // We know exactly where the tail is
4015
+ const i = this.inst[l1.tail >> 1];
4016
+ if ((l1.tail & 1) === 0) {
4017
+ i.out = l2.head;
2891
4018
  } else {
2892
- i.arg = l2;
4019
+ i.arg = l2.head;
2893
4020
  }
2894
- return l1;
4021
+ return new PatchList(l1.head, l2.tail);
2895
4022
  }
2896
4023
  /**
2897
4024
  *
@@ -2920,7 +4047,7 @@
2920
4047
  * @class
2921
4048
  */
2922
4049
  class Frag {
2923
- constructor(i = 0, out = 0, nullable = false) {
4050
+ constructor(i = 0, out = new PatchList(), nullable = false) {
2924
4051
  this.i = i; // an instruction address (pc).
2925
4052
  this.out = out; // a patch list; see explanation in Prog.js
2926
4053
  this.nullable = nullable; // whether the fragment can match the empty string
@@ -2945,6 +4072,33 @@
2945
4072
  c.prog.start = f.i;
2946
4073
  return c.prog;
2947
4074
  }
4075
+ static compileSet(regexps) {
4076
+ const c = new Compiler();
4077
+ if (regexps.length === 0) {
4078
+ c.prog.start = c.newInst(Inst.FAIL).i;
4079
+ return c.prog;
4080
+ }
4081
+ let starts = [];
4082
+ for (let i = 0; i < regexps.length; i++) {
4083
+ const f = c.compile(regexps[i]);
4084
+ const m = c.newInst(Inst.MATCH);
4085
+ c.prog.getInst(m.i).arg = i; // Store the regex index
4086
+ c.prog.patch(f.out, m.i);
4087
+ starts.push(f.i);
4088
+ }
4089
+
4090
+ // Link starts together via ALT
4091
+ let start = starts[0];
4092
+ for (let i = 1; i < starts.length; i++) {
4093
+ const f = c.newInst(Inst.ALT);
4094
+ const inst = c.prog.getInst(f.i);
4095
+ inst.out = start;
4096
+ inst.arg = starts[i];
4097
+ start = f.i;
4098
+ }
4099
+ c.prog.start = start;
4100
+ return c.prog;
4101
+ }
2948
4102
  constructor() {
2949
4103
  this.prog = new Prog();
2950
4104
  this.newInst(Inst.FAIL);
@@ -2957,7 +4111,7 @@
2957
4111
  // Returns a no-op fragment. Sometimes unavoidable.
2958
4112
  nop() {
2959
4113
  const f = this.newInst(Inst.NOP);
2960
- f.out = f.i << 1;
4114
+ f.out = new PatchList(f.i << 1, f.i << 1);
2961
4115
  return f;
2962
4116
  }
2963
4117
  fail() {
@@ -2968,7 +4122,7 @@
2968
4122
  // Given a fragment a, returns a fragment with capturing parens around a.
2969
4123
  cap(arg) {
2970
4124
  const f = this.newInst(Inst.CAPTURE);
2971
- f.out = f.i << 1;
4125
+ f.out = new PatchList(f.i << 1, f.i << 1);
2972
4126
  this.prog.getInst(f.i).arg = arg;
2973
4127
  if (this.prog.numCap < arg + 1) {
2974
4128
  this.prog.numCap = arg + 1;
@@ -3016,10 +4170,10 @@
3016
4170
  const i = this.prog.getInst(f.i);
3017
4171
  if (nongreedy) {
3018
4172
  i.arg = f1.i;
3019
- f.out = f.i << 1;
4173
+ f.out = new PatchList(f.i << 1, f.i << 1);
3020
4174
  } else {
3021
4175
  i.out = f1.i;
3022
- f.out = f.i << 1 | 1;
4176
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3023
4177
  }
3024
4178
  this.prog.patch(f1.out, f.i);
3025
4179
  return f;
@@ -3031,10 +4185,10 @@
3031
4185
  const i = this.prog.getInst(f.i);
3032
4186
  if (nongreedy) {
3033
4187
  i.arg = f1.i;
3034
- f.out = f.i << 1;
4188
+ f.out = new PatchList(f.i << 1, f.i << 1);
3035
4189
  } else {
3036
4190
  i.out = f1.i;
3037
- f.out = f.i << 1 | 1;
4191
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3038
4192
  }
3039
4193
  f.out = this.prog.append(f.out, f1.out);
3040
4194
  return f;
@@ -3057,7 +4211,7 @@
3057
4211
  empty(op) {
3058
4212
  const f = this.newInst(Inst.EMPTY_WIDTH);
3059
4213
  this.prog.getInst(f.i).arg = op;
3060
- f.out = f.i << 1;
4214
+ f.out = new PatchList(f.i << 1, f.i << 1);
3061
4215
  return f;
3062
4216
  }
3063
4217
 
@@ -3072,7 +4226,7 @@
3072
4226
  flags &= -2;
3073
4227
  }
3074
4228
  i.arg = flags;
3075
- f.out = f.i << 1;
4229
+ f.out = new PatchList(f.i << 1, f.i << 1);
3076
4230
  if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
3077
4231
  i.op = Inst.RUNE1;
3078
4232
  } else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
@@ -3177,23 +4331,92 @@
3177
4331
  }
3178
4332
  switch (re.op) {
3179
4333
  case Regexp.Op.CAPTURE:
4334
+ {
4335
+ const sub = Simplify.simplify(re.subs[0]);
4336
+ if (sub !== re.subs[0]) {
4337
+ const nre = Regexp.fromRegexp(re);
4338
+ nre.runes = [];
4339
+ nre.subs = [sub];
4340
+ return nre;
4341
+ }
4342
+ return re;
4343
+ }
3180
4344
  case Regexp.Op.CONCAT:
3181
4345
  case Regexp.Op.ALTERNATE:
3182
4346
  {
3183
- let nre = re;
4347
+ const newSubs = [];
4348
+ let changed = false;
3184
4349
  for (let i = 0; i < re.subs.length; i++) {
3185
4350
  const sub = re.subs[i];
3186
4351
  const nsub = Simplify.simplify(sub);
3187
- if (nre === re && nsub !== sub) {
3188
- nre = Regexp.fromRegexp(re);
3189
- nre.runes = [];
3190
- nre.subs = re.subs.slice(0, re.subs.length);
4352
+ if (nsub !== sub) {
4353
+ changed = true;
4354
+ }
4355
+ if (re.op === Regexp.Op.CONCAT) {
4356
+ // If any part of a CONCAT is mathematically impossible,
4357
+ // the entire CONCAT sequence becomes impossible.
4358
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4359
+ return new Regexp(Regexp.Op.NO_MATCH);
4360
+ }
4361
+ // Drop empty 0-width match nodes entirely from sequences
4362
+ if (nsub.op === Regexp.Op.EMPTY_MATCH) {
4363
+ changed = true;
4364
+ continue;
4365
+ }
4366
+ // Flatten nested concatenations
4367
+ if (nsub.op === Regexp.Op.CONCAT) {
4368
+ changed = true;
4369
+ newSubs.push(...nsub.subs);
4370
+ continue;
4371
+ }
4372
+ } else if (re.op === Regexp.Op.ALTERNATE) {
4373
+ // Drop impossible branches from alternations
4374
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4375
+ changed = true;
4376
+ continue;
4377
+ }
4378
+ // Flatten nested alternations
4379
+ if (nsub.op === Regexp.Op.ALTERNATE) {
4380
+ changed = true;
4381
+ newSubs.push(...nsub.subs);
4382
+ continue;
4383
+ }
3191
4384
  }
3192
- if (nre !== re) {
3193
- nre.subs[i] = nsub;
4385
+ newSubs.push(nsub);
4386
+ }
4387
+ if (changed) {
4388
+ // If we filtered out all nodes, return the mathematically correct fallback
4389
+ if (newSubs.length === 0) {
4390
+ return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
4391
+ }
4392
+ // If only 1 node remains, we don't need a CONCAT/ALT container at all
4393
+ if (newSubs.length === 1) {
4394
+ return newSubs[0];
3194
4395
  }
4396
+ const nre = Regexp.fromRegexp(re);
4397
+ nre.runes = [];
4398
+ nre.subs = newSubs;
4399
+ return nre;
4400
+ }
4401
+ return re;
4402
+ }
4403
+ case Regexp.Op.CHAR_CLASS:
4404
+ {
4405
+ if (re.runes === null) return re;
4406
+
4407
+ // Empty character classes match nothing.
4408
+ if (re.runes.length === 0) {
4409
+ return new Regexp(Regexp.Op.NO_MATCH);
3195
4410
  }
3196
- return nre;
4411
+ // Full character classes match everything.
4412
+ if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
4413
+ return new Regexp(Regexp.Op.ANY_CHAR);
4414
+ }
4415
+ // Standard catch-all except newline
4416
+ if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
4417
+ return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
4418
+ }
4419
+ return re;
3197
4420
  }
3198
4421
  case Regexp.Op.STAR:
3199
4422
  case Regexp.Op.PLUS:
@@ -3230,7 +4453,9 @@
3230
4453
  }
3231
4454
  subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
3232
4455
  nre.subs = subs.slice(0);
3233
- return nre;
4456
+
4457
+ // Ensure newly created CONCAT is properly flattened
4458
+ return Simplify.simplify(nre);
3234
4459
  }
3235
4460
  // Special case x{0} handled above.
3236
4461
 
@@ -3268,7 +4493,8 @@
3268
4493
  if (prefixSubs !== null) {
3269
4494
  const prefix = new Regexp(Regexp.Op.CONCAT);
3270
4495
  prefix.subs = prefixSubs.slice(0);
3271
- return prefix;
4496
+ // Ensure newly created CONCAT is properly flattened
4497
+ return Simplify.simplify(prefix);
3272
4498
  }
3273
4499
 
3274
4500
  // Some degenerate case like min > max or min < max < 0.
@@ -3301,6 +4527,13 @@
3301
4527
  return sub;
3302
4528
  }
3303
4529
 
4530
+ // Handle impossible targets gracefully.
4531
+ // e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
4532
+ if (sub.op === Regexp.Op.NO_MATCH) {
4533
+ if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
4534
+ return new Regexp(Regexp.Op.EMPTY_MATCH);
4535
+ }
4536
+
3304
4537
  // The operators are idempotent if the flags match.
3305
4538
  if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
3306
4539
  return sub;
@@ -3308,10 +4541,10 @@
3308
4541
  if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
3309
4542
  return re;
3310
4543
  }
3311
- re = new Regexp(op);
3312
- re.flags = flags;
3313
- re.subs = [sub];
3314
- return re;
4544
+ const nre = new Regexp(op);
4545
+ nre.flags = flags;
4546
+ nre.subs = [sub];
4547
+ return nre;
3315
4548
  }
3316
4549
  }
3317
4550
 
@@ -3657,16 +4890,6 @@
3657
4890
  }
3658
4891
  }
3659
4892
 
3660
- class Pair {
3661
- static of(first, second) {
3662
- return new Pair(first, second);
3663
- }
3664
- constructor(first, second) {
3665
- this.first = first;
3666
- this.second = second;
3667
- }
3668
- }
3669
-
3670
4893
  // StringIterator: a stream of runes with an opaque cursor, permitting
3671
4894
  // rewinding. The units of the cursor are not specified beyond the
3672
4895
  // fact that ASCII characters are single width. (Cursor positions
@@ -3813,18 +5036,59 @@
3813
5036
  // stride).
3814
5037
  static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
3815
5038
 
5039
+ // Ascii tables
5040
+ static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
5041
+ static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
5042
+ // Old English long s (ſ), folds to S/s.
5043
+ 0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
5044
+ ]));
5045
+
3816
5046
  // unicodeTable() returns the Unicode RangeTable identified by name
3817
5047
  // and the table of additional fold-equivalent code points.
3818
5048
  // Returns null if |name| does not identify a Unicode character range.
3819
5049
  static unicodeTable(name) {
3820
5050
  if (name === 'Any') {
3821
- return Pair.of(Parser.ANY_TABLE, Parser.ANY_TABLE);
5051
+ return {
5052
+ tab: Parser.ANY_TABLE,
5053
+ fold: Parser.ANY_TABLE,
5054
+ sign: 1
5055
+ };
5056
+ }
5057
+ if (name === 'Ascii') {
5058
+ return {
5059
+ tab: Parser.ASCII_TABLE,
5060
+ fold: Parser.ASCII_FOLD_TABLE,
5061
+ sign: 1
5062
+ };
5063
+ }
5064
+ if (name === 'Assigned') {
5065
+ // Assigned is the mathematical inversion of Cn (Unassigned)
5066
+ return {
5067
+ tab: UnicodeTables.CATEGORIES.get('Cn'),
5068
+ fold: UnicodeTables.CATEGORIES.get('Cn'),
5069
+ sign: -1
5070
+ };
5071
+ }
5072
+ if (name === 'Lc') {
5073
+ return {
5074
+ tab: UnicodeTables.CATEGORIES.get('LC'),
5075
+ fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
5076
+ sign: 1
5077
+ };
3822
5078
  }
3823
5079
  if (UnicodeTables.CATEGORIES.has(name)) {
3824
- return Pair.of(UnicodeTables.CATEGORIES.get(name), UnicodeTables.FOLD_CATEGORIES.get(name));
5080
+ return {
5081
+ tab: UnicodeTables.CATEGORIES.get(name),
5082
+ fold: UnicodeTables.FOLD_CATEGORIES.get(name),
5083
+ sign: 1
5084
+ };
3825
5085
  }
3826
5086
  if (UnicodeTables.SCRIPTS.has(name)) {
3827
- return Pair.of(UnicodeTables.SCRIPTS.get(name), UnicodeTables.FOLD_SCRIPT.get(name));
5087
+ return {
5088
+ tab: UnicodeTables.SCRIPTS.get(name),
5089
+ fold: UnicodeTables.FOLD_SCRIPT.get(name),
5090
+ sign: 1
5091
+ };
3828
5092
  }
3829
5093
  return null;
3830
5094
  }
@@ -4133,7 +5397,7 @@
4133
5397
  this.flags = flags;
4134
5398
  // number of capturing groups seen
4135
5399
  this.numCap = 0;
4136
- this.namedGroups = {};
5400
+ this.namedGroups = Object.create(null);
4137
5401
  // Stack of parsed expressions.
4138
5402
  this.stack = [];
4139
5403
  this.free = null;
@@ -4977,9 +6241,11 @@
4977
6241
  const i = lit.indexOf('\\E');
4978
6242
  if (i >= 0) {
4979
6243
  lit = lit.substring(0, i);
6244
+ t.skipString(lit);
6245
+ t.skipString('\\E');
6246
+ } else {
6247
+ t.skipString(lit);
4980
6248
  }
4981
- t.skipString(lit);
4982
- t.skipString('\\E');
4983
6249
  let j = 0;
4984
6250
  while (j < lit.length) {
4985
6251
  const codepoint = lit.codePointAt(j);
@@ -4995,6 +6261,9 @@
4995
6261
  t.rewindTo(savedPos);
4996
6262
  break;
4997
6263
  }
6264
+ } else {
6265
+ // Unconditionally rewind if PERL_X is off, or if string ended abruptly
6266
+ t.rewindTo(savedPos);
4998
6267
  }
4999
6268
  const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
5000
6269
  re.flags = this.flags;
@@ -5320,8 +6589,11 @@
5320
6589
  if (pair === null) {
5321
6590
  throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
5322
6591
  }
5323
- const tab = pair.first;
5324
- const fold = pair.second; // fold-equivalent table
6592
+ if (pair.sign < 0) {
6593
+ sign = 0 - sign;
6594
+ }
6595
+ const tab = pair.tab;
6596
+ const fold = pair.fold; // fold-equivalent table
5325
6597
  // Variation of CharClass.appendGroup() for tables.
5326
6598
  if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
5327
6599
  cc.appendTableWithSign(tab, sign);
@@ -5465,6 +6737,7 @@
5465
6737
  res.prefixUTF8 = re2.prefixUTF8;
5466
6738
  res.prefixComplete = re2.prefixComplete;
5467
6739
  res.prefixRune = re2.prefixRune;
6740
+ res.prefilter = re2.prefilter;
5468
6741
  return res;
5469
6742
  }
5470
6743
 
@@ -5507,8 +6780,10 @@
5507
6780
  let re = Parser.parse(expr, mode);
5508
6781
  const maxCap = re.maxCap();
5509
6782
  re = Simplify.simplify(re);
6783
+ const prefilter = PrefilterTree.build(re);
5510
6784
  const prog = Compiler.compileRegexp(re);
5511
6785
  const re2 = new RE2(expr, prog, maxCap, longest);
6786
+ re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
5512
6787
  const [prefixCompl, prefixStr] = prog.prefix();
5513
6788
  re2.prefixComplete = prefixCompl;
5514
6789
  re2.prefix = prefixStr;
@@ -5540,12 +6815,78 @@
5540
6815
  this.prefixComplete = false; // true if prefix is the entire regexp
5541
6816
  this.prefixRune = 0; // first rune in prefix
5542
6817
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5543
- this.dfa = new DFA(prog); // Initialize the Lazy DFA
6818
+ this.dfa = new DFA(this.prog); // initialize Lazy DFA
6819
+ this.onepass = OnePass.compile(this.prog); // compile OnePass
6820
+ this.prefilter = null;
6821
+ }
6822
+ matchPrefixComplete(input, pos, anchor, ncap) {
6823
+ // If strictly anchored, execution must start at 0
6824
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
6825
+ return null;
6826
+ }
6827
+ let matchStart = -1;
6828
+ let matchEnd = -1;
6829
+ const pLen = input.prefixLength(this);
6830
+ if (anchor === RE2Flags.UNANCHORED) {
6831
+ const idx = input.index(this, pos);
6832
+ if (idx < 0) return null;
6833
+ matchStart = pos + idx;
6834
+ matchEnd = matchStart + pLen;
6835
+ } else if (anchor === RE2Flags.ANCHOR_BOTH) {
6836
+ if (input.endPos() !== pLen) return null;
6837
+ const idx = input.index(this, 0);
6838
+ if (idx !== 0) return null;
6839
+ matchStart = 0;
6840
+ matchEnd = pLen;
6841
+ } else if (anchor === RE2Flags.ANCHOR_START) {
6842
+ const idx = input.index(this, 0);
6843
+ if (idx !== 0) return null;
6844
+ matchStart = 0;
6845
+ matchEnd = pLen;
6846
+ }
6847
+ if (matchStart < 0) return null;
6848
+
6849
+ // If captures are requested (e.g. findSubmatch instead of test), populate bounds
6850
+ if (ncap > 0) {
6851
+ const matchcap = new Int32Array(ncap).fill(-1);
6852
+ matchcap[0] = matchStart;
6853
+ matchcap[1] = matchEnd;
6854
+ return Array.from(matchcap);
6855
+ }
6856
+ return []; // Matched successfully, but no capture data requested
5544
6857
  }
5545
6858
  executeEngine(input, pos, anchor, ncap) {
6859
+ // LITERAL FAST PATH
6860
+ // If the entire regex is just a literal string (and no nested capture boundaries are requested),
6861
+ // bypass all state machines and execute via V8's blistering fast native indexOf
6862
+ if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
6863
+ return this.matchPrefixComplete(input, pos, anchor, ncap);
6864
+ }
6865
+
6866
+ // PREFILTER FAST PATH
6867
+ // If the unanchored query requires specific literal strings (e.g. "a.*b"),
6868
+ // verify those strings exist using high-speed JS string searches before waking up the state engines.
6869
+ if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
6870
+ if (!this.prefilter.eval(input, pos)) {
6871
+ return null;
6872
+ }
6873
+ }
6874
+
6875
+ // FAST PATH: OnePass DFA engine.
6876
+ // If compiled successfully, it perfectly supports capture groups
6877
+ // and is blisteringly fast since it skips thread queues completely.
6878
+ if (this.onepass !== null) {
6879
+ return OnePass.execute(this, input, pos, anchor, ncap);
6880
+ }
6881
+
5546
6882
  // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5547
6883
  // We must use the NFA.
5548
6884
  if (ncap > 0) {
6885
+ // Backtracker bit-state execution bounds check
6886
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6887
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6888
+ }
6889
+ // NFA execution
5549
6890
  return this.doExecuteNFA(input, pos, anchor, ncap);
5550
6891
  }
5551
6892
  const dfaResult = this.dfa.match(input, pos, anchor);
@@ -5554,6 +6895,11 @@
5554
6895
  return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5555
6896
  }
5556
6897
 
6898
+ // Backtracker bit-state execution bounds check
6899
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6900
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6901
+ }
6902
+
5557
6903
  // Fallback to NFA
5558
6904
  return this.doExecuteNFA(input, pos, anchor, ncap);
5559
6905
  }
@@ -6134,6 +7480,50 @@
6134
7480
  }
6135
7481
  }
6136
7482
 
7483
+ class RE2Set {
7484
+ constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
7485
+ this.anchor = anchor;
7486
+ this.flags = flags;
7487
+ this.regexps = [];
7488
+ this.prog = null;
7489
+ this.dfa = null;
7490
+ this.dummyRe2 = null;
7491
+ }
7492
+ add(pattern) {
7493
+ if (this.prog) {
7494
+ throw new RE2JSCompileException('Cannot add patterns after compile');
7495
+ }
7496
+ const re = Parser.parse(pattern, this.flags);
7497
+ this.regexps.push(Simplify.simplify(re));
7498
+ return this.regexps.length - 1;
7499
+ }
7500
+ compile() {
7501
+ if (this.prog) return;
7502
+ this.prog = Compiler.compileSet(this.regexps);
7503
+ this.dfa = new DFA(this.prog);
7504
+ this.dummyRe2 = {
7505
+ prog: this.prog,
7506
+ cond: this.prog.startCond(),
7507
+ prefix: '',
7508
+ prefixRune: 0,
7509
+ longest: false
7510
+ };
7511
+ }
7512
+ match(input) {
7513
+ if (!this.prog) this.compile();
7514
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7515
+
7516
+ // Fast path: Try the blistering fast DFA
7517
+ const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
7518
+ if (dfaResult !== null) return dfaResult;
7519
+
7520
+ // Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
7521
+ const machine = Machine.fromRE2(this.dummyRe2);
7522
+ machine.init(0);
7523
+ return machine.matchSet(machineInput, 0, this.anchor);
7524
+ }
7525
+ }
7526
+
6137
7527
  /**
6138
7528
  * Transform JS regex string to RE2 regex string
6139
7529
  */
@@ -6216,7 +7606,8 @@
6216
7606
  default:
6217
7607
  {
6218
7608
  result += '\\';
6219
- let symSize = Utils.charCount(ch.codePointAt(0));
7609
+ let cp = data.codePointAt(i + 1);
7610
+ let symSize = Utils.charCount(cp);
6220
7611
  result += data.substring(i + 1, i + 1 + symSize);
6221
7612
  i += symSize + 1;
6222
7613
  continue;
@@ -6236,7 +7627,8 @@
6236
7627
  continue;
6237
7628
  }
6238
7629
  }
6239
- let symSize = Utils.charCount(ch.codePointAt(0));
7630
+ let cp = data.codePointAt(i);
7631
+ let symSize = Utils.charCount(cp);
6240
7632
  result += data.substring(i, i + symSize);
6241
7633
  i += symSize;
6242
7634
  }
@@ -6599,7 +7991,9 @@
6599
7991
  exports.RE2JSException = RE2JSException;
6600
7992
  exports.RE2JSFlagsException = RE2JSFlagsException;
6601
7993
  exports.RE2JSGroupException = RE2JSGroupException;
7994
+ exports.RE2JSInternalException = RE2JSInternalException;
6602
7995
  exports.RE2JSSyntaxException = RE2JSSyntaxException;
7996
+ exports.RE2Set = RE2Set;
6603
7997
 
6604
7998
  }));
6605
7999
  //# sourceMappingURL=index.umd.js.map