re2js 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.0.2
5
+ * @version v2.1.0
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -56,6 +56,23 @@ class RE2Flags {
56
56
  /**
57
57
  * Various constants and helper for unicode codepoints.
58
58
  */
59
+ const ASCII_SIZE = 128;
60
+ const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
61
+ const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
62
+ for (let i = 0; i < ASCII_SIZE; i++) {
63
+ if (i >= 97 && i <= 122) {
64
+ // a-z
65
+ ASCII_TO_UPPER[i] = i - 32;
66
+ } else {
67
+ ASCII_TO_UPPER[i] = i;
68
+ }
69
+ if (i >= 65 && i <= 90) {
70
+ // A-Z
71
+ ASCII_TO_LOWER[i] = i + 32;
72
+ } else {
73
+ ASCII_TO_LOWER[i] = i;
74
+ }
75
+ }
59
76
  class Codepoint {
60
77
  // codePointAt(0)
61
78
  static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
@@ -63,6 +80,7 @@ class Codepoint {
63
80
  // convert unicode codepoint to upper case codepoint
64
81
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
65
82
  static toUpperCase(codepoint) {
83
+ if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
66
84
  const s = String.fromCodePoint(codepoint).toUpperCase();
67
85
  if (s.length > 1) {
68
86
  return codepoint;
@@ -77,6 +95,7 @@ class Codepoint {
77
95
  // convert unicode codepoint to lower case codepoint
78
96
  // return same codepoint, if cannot do it (or codepoint not have lower variation)
79
97
  static toLowerCase(codepoint) {
98
+ if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
80
99
  const s = String.fromCodePoint(codepoint).toLowerCase();
81
100
  if (s.length > 1) {
82
101
  return codepoint;
@@ -199,9 +218,11 @@ class UnicodeTables {
199
218
  C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
200
219
  Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
201
220
  Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
221
+ Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
202
222
  Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
203
223
  Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
204
224
  L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
225
+ LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
205
226
  Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
206
227
  Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
207
228
  Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
@@ -410,8 +431,11 @@ class UnicodeTables {
410
431
  Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
411
432
  });
412
433
  static FOLD_CATEGORIES = new LazyMap({
413
- L: () => new UnicodeRangeTable(decodeRanges('laA', true)),
414
- Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEEGGCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
434
+ C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
435
+ Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
436
+ L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
437
+ LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
438
+ Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
415
439
  Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
416
440
  Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
417
441
  M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
@@ -420,7 +444,9 @@ class UnicodeTables {
420
444
  static FOLD_SCRIPT = new LazyMap({
421
445
  Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
422
446
  Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
423
- Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
447
+ Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
448
+ Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
449
+ Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
424
450
  });
425
451
  }
426
452
 
@@ -674,7 +700,7 @@ class Utils {
674
700
 
675
701
  // Returns the array of runes in the specified Java UTF-16 string.
676
702
  static stringToRunes(str) {
677
- return String(str).split('').map(s => s.codePointAt(0));
703
+ return Array.from(String(str)).map(s => s.codePointAt(0));
678
704
  }
679
705
 
680
706
  // Returns the Java UTF-16 string containing the single rune |r|.
@@ -945,6 +971,14 @@ class MachineInputBase {
945
971
  endPos() {
946
972
  return this.end;
947
973
  }
974
+ hasString() {
975
+ return false;
976
+ }
977
+
978
+ // Helper for the exact-literal fast-path execution router
979
+ prefixLength() {
980
+ return 0;
981
+ }
948
982
  }
949
983
 
950
984
  // An implementation of MachineInput for UTF-8 byte arrays.
@@ -956,6 +990,14 @@ class MachineUTF8Input extends MachineInputBase {
956
990
  this.start = start;
957
991
  this.end = end;
958
992
  }
993
+ hasString(prefilter, pos) {
994
+ const target = prefilter.bytes;
995
+ if (target.length === 0) return true;
996
+
997
+ // Reuse the high-speed indexOf method already implemented below
998
+ const idx = this.indexOf(this.bytes, target, this.start + pos);
999
+ return idx !== -1 && idx <= this.end - target.length;
1000
+ }
959
1001
 
960
1002
  // Returns the rune at the specified index; the units are
961
1003
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1032,10 +1074,10 @@ class MachineUTF8Input extends MachineInputBase {
1032
1074
  indexOf(source, target, fromIndex = 0) {
1033
1075
  let targetLength = target.length;
1034
1076
  if (targetLength === 0) {
1035
- return -1;
1077
+ return fromIndex <= this.end ? fromIndex : -1;
1036
1078
  }
1037
- let sourceLength = source.length;
1038
- for (let i = fromIndex; i <= sourceLength - targetLength; i++) {
1079
+ let limit = this.end - targetLength;
1080
+ for (let i = fromIndex; i <= limit; i++) {
1039
1081
  for (let j = 0; j < targetLength; j++) {
1040
1082
  if (source[i + j] !== target[j]) {
1041
1083
  break;
@@ -1046,6 +1088,9 @@ class MachineUTF8Input extends MachineInputBase {
1046
1088
  }
1047
1089
  return -1;
1048
1090
  }
1091
+ prefixLength(re2) {
1092
+ return re2.prefixUTF8.length;
1093
+ }
1049
1094
  }
1050
1095
 
1051
1096
  // |pos| and |width| are in JS "char" units.
@@ -1056,6 +1101,10 @@ class MachineUTF16Input extends MachineInputBase {
1056
1101
  this.start = start;
1057
1102
  this.end = end;
1058
1103
  }
1104
+ hasString(prefilter, pos) {
1105
+ const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
1106
+ return idx !== -1 && idx <= this.end - prefilter.str.length;
1107
+ }
1059
1108
 
1060
1109
  // Returns the rune at the specified index; the units are
1061
1110
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
@@ -1101,6 +1150,9 @@ class MachineUTF16Input extends MachineInputBase {
1101
1150
  const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1102
1151
  return Utils.emptyOpContext(r1, r2);
1103
1152
  }
1153
+ prefixLength(re2) {
1154
+ return re2.prefix.length;
1155
+ }
1104
1156
  }
1105
1157
  class MachineInput {
1106
1158
  static fromUTF8(bytes, start = 0, end = bytes.length) {
@@ -1191,6 +1243,17 @@ class RE2JSFlagsException extends RE2JSException {
1191
1243
  }
1192
1244
  }
1193
1245
 
1246
+ /**
1247
+ * An exception thrown for internal engine errors, such as corrupted bytecodes.
1248
+ */
1249
+ class RE2JSInternalException extends RE2JSException {
1250
+ /** @param {string} message */
1251
+ constructor(message) {
1252
+ super(message);
1253
+ this.name = 'RE2JSInternalException';
1254
+ }
1255
+ }
1256
+
1194
1257
  /**
1195
1258
  * A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
1196
1259
  *
@@ -1393,6 +1456,23 @@ class Matcher {
1393
1456
  }
1394
1457
  return this.substring(start, end);
1395
1458
  }
1459
+
1460
+ /**
1461
+ * Returns a dictionary map of all named capturing groups and their matched values.
1462
+ * If a group was not matched, its value will be `null`.
1463
+ * @returns {Record<string, string|null>}
1464
+ */
1465
+ getNamedGroups() {
1466
+ if (!this.hasMatch) {
1467
+ throw new RE2JSGroupException('perhaps no match attempted');
1468
+ }
1469
+ const result = {};
1470
+ for (const name of Object.keys(this.namedGroups)) {
1471
+ result[name] = this.group(name);
1472
+ }
1473
+ return result;
1474
+ }
1475
+
1396
1476
  /**
1397
1477
  * Returns the number of subgroups in this pattern.
1398
1478
  *
@@ -1817,16 +1897,20 @@ class Inst {
1817
1897
  }
1818
1898
  return r === r0;
1819
1899
  }
1820
- // Peek at the first few pairs.
1821
- // Should handle ASCII well.
1822
- for (let j = 0; j < this.runes.length && j <= 8; j += 2) {
1823
- if (r < this.runes[j]) {
1824
- return false;
1825
- }
1826
- if (r <= this.runes[j + 1]) {
1827
- return true;
1900
+ const len = this.runes.length;
1901
+ // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
1902
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1903
+ for (let j = 0; j < len; j += 2) {
1904
+ if (r < this.runes[j]) {
1905
+ return false;
1906
+ }
1907
+ if (r <= this.runes[j + 1]) {
1908
+ return true;
1909
+ }
1828
1910
  }
1911
+ return false; // Stop here
1829
1912
  }
1913
+
1830
1914
  // Otherwise binary search.
1831
1915
  let lo = 0;
1832
1916
  let hi = this.runes.length / 2 | 0;
@@ -1844,6 +1928,40 @@ class Inst {
1844
1928
  }
1845
1929
  return false;
1846
1930
  }
1931
+
1932
+ // matchRunePos checks whether the instruction matches (and consumes) r.
1933
+ // If so, it returns the index of the matching rune pair.
1934
+ // If not, it returns -1.
1935
+ matchRunePos(r) {
1936
+ if (this.runes.length === 1) {
1937
+ const r0 = this.runes[0];
1938
+ if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
1939
+ return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
1940
+ }
1941
+ return r === r0 ? 0 : -1;
1942
+ }
1943
+ const len = this.runes.length;
1944
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
1945
+ for (let j = 0; j < len; j += 2) {
1946
+ if (r < this.runes[j]) return -1;
1947
+ if (r <= this.runes[j + 1]) return Math.floor(j / 2);
1948
+ }
1949
+ return -1;
1950
+ }
1951
+ let lo = 0;
1952
+ let hi = Math.floor(len / 2);
1953
+ while (lo < hi) {
1954
+ const m = lo + hi >> 1;
1955
+ const c = this.runes[2 * m];
1956
+ if (c <= r) {
1957
+ if (r <= this.runes[2 * m + 1]) return m;
1958
+ lo = m + 1;
1959
+ } else {
1960
+ hi = m;
1961
+ }
1962
+ }
1963
+ return -1;
1964
+ }
1847
1965
  /**
1848
1966
  *
1849
1967
  * @returns {string}
@@ -1859,7 +1977,7 @@ class Inst {
1859
1977
  case Inst.EMPTY_WIDTH:
1860
1978
  return `empty ${this.arg} -> ${this.out}`;
1861
1979
  case Inst.MATCH:
1862
- return 'match';
1980
+ return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
1863
1981
  case Inst.FAIL:
1864
1982
  return 'fail';
1865
1983
  case Inst.NOP:
@@ -1885,7 +2003,7 @@ class Inst {
1885
2003
  class Thread {
1886
2004
  constructor() {
1887
2005
  this.inst = null;
1888
- this.cap = [];
2006
+ this.cap = null; // Initialized to Int32Array later
1889
2007
  }
1890
2008
  }
1891
2009
 
@@ -1913,9 +2031,11 @@ class Queue {
1913
2031
  return j;
1914
2032
  }
1915
2033
  clear() {
1916
- this.sparse = [];
1917
- this.densePcs = [];
1918
- this.denseThreads = [];
2034
+ // Prevent memory leaks by nulling out used object references
2035
+ for (let i = 0; i < this.size; i++) {
2036
+ this.denseThreads[i] = null;
2037
+ }
2038
+ // The sparse set logic safely ignores stale integers in Typed Arrays.
1919
2039
  this.size = 0;
1920
2040
  }
1921
2041
  toString() {
@@ -1944,7 +2064,8 @@ class Machine {
1944
2064
  m.pool = [];
1945
2065
  m.poolSize = 0;
1946
2066
  m.matched = false;
1947
- m.matchcap = Array(m.prog.numCap < 2 ? 2 : m.prog.numCap).fill(0);
2067
+ // Use Int32Array instead of standard JS array
2068
+ m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
1948
2069
  m.ncap = 0;
1949
2070
  return m;
1950
2071
  }
@@ -1958,27 +2079,30 @@ class Machine {
1958
2079
  if (ncap > this.matchcap.length) {
1959
2080
  this.initNewCap(ncap);
1960
2081
  } else {
1961
- this.resetCap(ncap);
2082
+ this.resetCap();
1962
2083
  }
1963
2084
  }
1964
- resetCap(ncap) {
2085
+
2086
+ // Wipes existing typed array memory without reallocating
2087
+ resetCap() {
1965
2088
  for (let i = 0; i < this.poolSize; i++) {
1966
2089
  const t = this.pool[i];
1967
- t.cap = Array(ncap).fill(0);
2090
+ t.cap.fill(0);
1968
2091
  }
1969
2092
  }
1970
2093
  initNewCap(ncap) {
1971
2094
  for (let i = 0; i < this.poolSize; i++) {
1972
2095
  const t = this.pool[i];
1973
- t.cap = Array(ncap).fill(0);
2096
+ t.cap = new Int32Array(ncap);
1974
2097
  }
1975
- this.matchcap = Array(ncap).fill(0);
2098
+ this.matchcap = new Int32Array(ncap);
1976
2099
  }
1977
2100
  submatches() {
1978
2101
  if (this.ncap === 0) {
1979
2102
  return Utils.emptyInts();
1980
2103
  }
1981
- return this.matchcap.slice(0, this.ncap);
2104
+ // Use subarray() to create a zero-allocation view before converting
2105
+ return Array.from(this.matchcap.subarray(0, this.ncap));
1982
2106
  }
1983
2107
 
1984
2108
  // alloc() allocates a new thread with the given instruction.
@@ -1990,6 +2114,7 @@ class Machine {
1990
2114
  t = this.pool[this.poolSize];
1991
2115
  } else {
1992
2116
  t = new Thread();
2117
+ t.cap = new Int32Array(this.matchcap.length);
1993
2118
  }
1994
2119
  t.inst = inst;
1995
2120
  return t;
@@ -2019,7 +2144,7 @@ class Machine {
2019
2144
  return false;
2020
2145
  }
2021
2146
  this.matched = false;
2022
- this.matchcap = Array(this.prog.numCap).fill(-1);
2147
+ this.matchcap.fill(-1);
2023
2148
  let runq = this.q0;
2024
2149
  let nextq = this.q1;
2025
2150
  let r = input.step(pos);
@@ -2090,6 +2215,85 @@ class Machine {
2090
2215
  this.freeQueue(nextq);
2091
2216
  return this.matched;
2092
2217
  }
2218
+ matchSet(input, pos, anchor) {
2219
+ const startCond = this.re2.cond;
2220
+ if (startCond === Utils.EMPTY_ALL) return [];
2221
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2222
+ return [];
2223
+ }
2224
+ let runq = this.q0;
2225
+ let nextq = this.q1;
2226
+ let r = input.step(pos);
2227
+ let rune = r >> 3;
2228
+ let width = r & 7;
2229
+ let rune1 = -1;
2230
+ let width1 = 0;
2231
+ if (r !== MachineInputBase.EOF()) {
2232
+ r = input.step(pos + width);
2233
+ rune1 = r >> 3;
2234
+ width1 = r & 7;
2235
+ }
2236
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
2237
+ const matches = new Set();
2238
+ while (true) {
2239
+ if (runq.isEmpty()) {
2240
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2241
+ }
2242
+ if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2243
+ this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
2244
+ }
2245
+ const nextPos = pos + width;
2246
+ flag = input.context(nextPos);
2247
+ for (let j = 0; j < runq.size; j++) {
2248
+ let t = runq.denseThreads[j];
2249
+ if (t === null) continue;
2250
+ const i = t.inst;
2251
+ let add = false;
2252
+ switch (i.op) {
2253
+ case Inst.MATCH:
2254
+ if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
2255
+ matches.add(i.arg); // Record the matched Set ID
2256
+ break;
2257
+ case Inst.RUNE:
2258
+ add = i.matchRune(rune);
2259
+ break;
2260
+ case Inst.RUNE1:
2261
+ add = rune === i.runes[0];
2262
+ break;
2263
+ case Inst.RUNE_ANY:
2264
+ add = true;
2265
+ break;
2266
+ case Inst.RUNE_ANY_NOT_NL:
2267
+ add = rune !== Codepoint.CODES.get('\n');
2268
+ break;
2269
+ default:
2270
+ throw new RE2JSInternalException('bad inst');
2271
+ }
2272
+ if (add) {
2273
+ t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
2274
+ }
2275
+ if (t !== null) {
2276
+ this.freeThread(t);
2277
+ runq.denseThreads[j] = null;
2278
+ }
2279
+ }
2280
+ runq.clear();
2281
+ if (width === 0) break;
2282
+ pos += width;
2283
+ rune = rune1;
2284
+ width = width1;
2285
+ if (rune !== -1) {
2286
+ r = input.step(pos + width);
2287
+ rune1 = r >> 3;
2288
+ width1 = r & 7;
2289
+ }
2290
+ const tmpq = runq;
2291
+ runq = nextq;
2292
+ nextq = tmpq;
2293
+ }
2294
+ this.freeQueue(nextq);
2295
+ return Array.from(matches).sort((a, b) => a - b);
2296
+ }
2093
2297
  step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
2094
2298
  const longest = this.re2.longest;
2095
2299
  for (let j = 0; j < runq.size; j++) {
@@ -2110,7 +2314,9 @@ class Machine {
2110
2314
  }
2111
2315
  if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
2112
2316
  t.cap[1] = pos;
2113
- this.matchcap = t.cap.slice(0, this.ncap);
2317
+ // Using subarray creates a fast view, avoiding a full array copy
2318
+ // until the submatches are finalized at the very end.
2319
+ this.matchcap.set(t.cap.subarray(0, this.ncap));
2114
2320
  }
2115
2321
  if (!longest) {
2116
2322
  this.freeQueue(runq, j + 1);
@@ -2130,7 +2336,7 @@ class Machine {
2130
2336
  add = c !== Codepoint.CODES.get('\n');
2131
2337
  break;
2132
2338
  default:
2133
- throw new Error('bad inst');
2339
+ throw new RE2JSInternalException('bad inst');
2134
2340
  }
2135
2341
  if (add) {
2136
2342
  t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
@@ -2188,6 +2394,7 @@ class Machine {
2188
2394
  t.inst = inst;
2189
2395
  }
2190
2396
  if (this.ncap > 0 && t.cap !== cap) {
2397
+ // Direct assignment utilizing Typed Array performance
2191
2398
  for (let c = 0; c < this.ncap; c++) {
2192
2399
  t.cap[c] = cap[c];
2193
2400
  }
@@ -2222,20 +2429,23 @@ const arraysEqual = (a, b) => {
2222
2429
  return true;
2223
2430
  };
2224
2431
  class DFAState {
2225
- constructor(nfaStates, isMatch) {
2432
+ constructor(nfaStates, isMatch, matchIDs = []) {
2226
2433
  this.nfaStates = nfaStates; // Int32Array of Instruction PCs
2227
2434
  this.isMatch = isMatch; // Boolean
2435
+ this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
2228
2436
  this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
2229
2437
  this.nextMap = new Map(); // Cache of Char -> DFAState
2230
2438
  }
2231
2439
  }
2232
2440
  class DFA {
2441
+ static MAX_CACHE_CLEARS = 5;
2233
2442
  constructor(prog) {
2234
2443
  this.prog = prog;
2235
2444
  this.stateCache = new Map(); // hash(number) -> DFAState[]
2236
2445
  this.stateCount = 0; // Tracks total states for memory limits
2237
2446
  this.startState = null;
2238
2447
  this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
2448
+ this.cacheClears = 0; // Track thrashing
2239
2449
  this.failed = false; // mark if DFA cannot work with provided prog
2240
2450
  }
2241
2451
 
@@ -2244,6 +2454,7 @@ class DFA {
2244
2454
  const closure = new Set();
2245
2455
  const stack = [...pcs];
2246
2456
  let isMatch = false;
2457
+ const matchIDs = [];
2247
2458
  while (stack.length > 0) {
2248
2459
  const pc = stack.pop();
2249
2460
  if (closure.has(pc)) continue;
@@ -2252,6 +2463,7 @@ class DFA {
2252
2463
  switch (inst.op) {
2253
2464
  case Inst.MATCH:
2254
2465
  isMatch = true;
2466
+ if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
2255
2467
  break;
2256
2468
  case Inst.ALT:
2257
2469
  case Inst.ALT_MATCH:
@@ -2269,9 +2481,11 @@ class DFA {
2269
2481
  }
2270
2482
  }
2271
2483
  const sortedPCs = Int32Array.from(closure).sort();
2484
+ matchIDs.sort((a, b) => a - b);
2272
2485
  return {
2273
2486
  pcs: sortedPCs,
2274
- isMatch
2487
+ isMatch,
2488
+ matchIDs
2275
2489
  };
2276
2490
  }
2277
2491
 
@@ -2297,6 +2511,8 @@ class DFA {
2297
2511
  bucket = [];
2298
2512
  this.stateCache.set(hash, bucket);
2299
2513
  }
2514
+
2515
+ // DFA already failed once - exit
2300
2516
  if (this.failed) return null;
2301
2517
 
2302
2518
  // Safety: prevent memory exhaustion from state explosion
@@ -2305,12 +2521,18 @@ class DFA {
2305
2521
  this.stateCache.clear();
2306
2522
  this.stateCount = 0;
2307
2523
  this.startState = null;
2308
- this.failed = true;
2524
+ this.cacheClears++;
2525
+
2526
+ // If this regex causes continuous cache thrashing, permanently fall back to NFA
2527
+ // to avoid spending CPU cycles constantly rebuilding the DFA tree.
2528
+ if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
2529
+ this.failed = true;
2530
+ }
2309
2531
  return null;
2310
2532
  }
2311
2533
 
2312
2534
  // State not found, create it and add to bucket
2313
- const state = new DFAState(sortedPCs, closureResult.isMatch);
2535
+ const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
2314
2536
  bucket.push(state);
2315
2537
  this.stateCount++;
2316
2538
  return state;
@@ -2330,76 +2552,808 @@ class DFA {
2330
2552
  return state.nextMap.get(key);
2331
2553
  }
2332
2554
  }
2333
- const nextPCs = [];
2334
- for (let i = 0; i < state.nfaStates.length; i++) {
2335
- const pc = state.nfaStates[i];
2336
- const inst = this.prog.getInst(pc);
2337
- if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2338
- nextPCs.push(inst.out);
2555
+ const nextPCs = [];
2556
+ for (let i = 0; i < state.nfaStates.length; i++) {
2557
+ const pc = state.nfaStates[i];
2558
+ const inst = this.prog.getInst(pc);
2559
+ if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
2560
+ nextPCs.push(inst.out);
2561
+ }
2562
+ }
2563
+ if (anchor === RE2Flags.UNANCHORED) {
2564
+ nextPCs.push(this.prog.start);
2565
+ }
2566
+ const nextState = this.getState(nextPCs);
2567
+
2568
+ // Cache the result
2569
+ if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2570
+ state.nextAscii[charCode] = nextState;
2571
+ } else {
2572
+ const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2573
+ state.nextMap.set(key, nextState);
2574
+ }
2575
+ return nextState;
2576
+ }
2577
+
2578
+ // The hot loop: Execute the Lazy DFA
2579
+ match(input, pos, anchor) {
2580
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2581
+ return false;
2582
+ }
2583
+ if (!this.startState) {
2584
+ this.startState = this.getState([this.prog.start]);
2585
+ if (!this.startState) return null; // Fallback to NFA
2586
+ }
2587
+ let endPos = input.endPos();
2588
+ let currentState = this.startState;
2589
+ if (currentState.isMatch) {
2590
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2591
+ if (pos === endPos) return true;
2592
+ } else {
2593
+ return true;
2594
+ }
2595
+ }
2596
+ let i = pos;
2597
+ while (i < endPos) {
2598
+ const r = input.step(i);
2599
+ const rune = r >> 3;
2600
+ const width = r & 7;
2601
+
2602
+ // prevent infinite loop on EOF
2603
+ if (width === 0) {
2604
+ break;
2605
+ }
2606
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2607
+
2608
+ // If we hit an unrecoverable DFA error or bailout, signal fallback
2609
+ if (currentState === null) return null;
2610
+ if (currentState.isMatch) {
2611
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2612
+ if (i + width === endPos) return true;
2613
+ } else {
2614
+ return true;
2615
+ }
2616
+ }
2617
+
2618
+ // If we hit a dead end, and anchored, fail early
2619
+ if (currentState.nfaStates.length === 0) {
2620
+ if (anchor !== RE2Flags.UNANCHORED) return false;
2621
+ }
2622
+ i += width;
2623
+ }
2624
+ return false;
2625
+ }
2626
+
2627
+ // The hot loop for evaluating Multi-Pattern Sets
2628
+ matchSet(input, pos, anchor) {
2629
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2630
+ return [];
2631
+ }
2632
+ if (!this.startState) {
2633
+ this.startState = this.getState([this.prog.start]);
2634
+ if (!this.startState) return null; // Fallback to NFA
2635
+ }
2636
+ let endPos = input.endPos();
2637
+ let currentState = this.startState;
2638
+ const matches = new Set();
2639
+ const checkMatch = (state, currentPos) => {
2640
+ if (state.isMatch) {
2641
+ if (anchor === RE2Flags.ANCHOR_BOTH) {
2642
+ if (currentPos === endPos) {
2643
+ state.matchIDs.forEach(id => matches.add(id));
2644
+ }
2645
+ } else {
2646
+ state.matchIDs.forEach(id => matches.add(id));
2647
+ }
2648
+ }
2649
+ };
2650
+ checkMatch(currentState, pos);
2651
+ let i = pos;
2652
+ while (i < endPos) {
2653
+ const r = input.step(i);
2654
+ const rune = r >> 3;
2655
+ const width = r & 7;
2656
+ if (width === 0) break;
2657
+ currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2658
+ if (currentState === null) return null; // Bailout to NFA
2659
+
2660
+ i += width;
2661
+ checkMatch(currentState, i);
2662
+ if (currentState.nfaStates.length === 0) {
2663
+ if (anchor !== RE2Flags.UNANCHORED) break;
2664
+ }
2665
+ }
2666
+ return Array.from(matches).sort((a, b) => a - b);
2667
+ }
2668
+ }
2669
+
2670
+ const VISITED_BITS = 32;
2671
+ const MAX_BACKTRACK_PROG = 500;
2672
+ const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
2673
+ const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
2674
+
2675
+ class BitState {
2676
+ constructor() {
2677
+ this.end = 0;
2678
+ this.cap = new Int32Array(0);
2679
+ this.matchcap = new Int32Array(0);
2680
+ this.ncap = 0;
2681
+
2682
+ // Parallel arrays acting as the backtrack job stack
2683
+ this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
2684
+ this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
2685
+ this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
2686
+ this.jobLen = 0;
2687
+ this.visited = new Uint32Array(0);
2688
+ }
2689
+ reset(prog, end, ncap) {
2690
+ this.end = end;
2691
+ this.jobLen = 0;
2692
+ this.ncap = ncap;
2693
+
2694
+ // Bitwise shift (>>> 5) instead of Math.floor( / 32)
2695
+ const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
2696
+ if (this.visited.length < visitedSize) {
2697
+ this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
2698
+ } else {
2699
+ this.visited.fill(0, 0, visitedSize);
2700
+ }
2701
+ if (this.cap.length < ncap) {
2702
+ // Must explicitly fill with -1 as Int32Array defaults to 0
2703
+ this.cap = new Int32Array(ncap).fill(-1);
2704
+ } else {
2705
+ this.cap.fill(-1, 0, ncap);
2706
+ }
2707
+ if (this.matchcap.length < ncap) {
2708
+ this.matchcap = new Int32Array(ncap).fill(-1);
2709
+ } else {
2710
+ this.matchcap.fill(-1, 0, ncap);
2711
+ }
2712
+ }
2713
+ shouldVisit(pc, pos) {
2714
+ const n = pc * (this.end + 1) + pos;
2715
+ const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
2716
+ const mask = 1 << (n & 31); // Equivalent to n % 32
2717
+
2718
+ if ((this.visited[idx] & mask) !== 0) {
2719
+ return false;
2720
+ }
2721
+ this.visited[idx] |= mask;
2722
+ return true;
2723
+ }
2724
+ push(re2, pc, pos, arg) {
2725
+ if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
2726
+ if (this.jobLen >= this.jobPc.length) {
2727
+ const newSize = this.jobPc.length * 2;
2728
+ const newPc = new Int32Array(newSize);
2729
+ newPc.set(this.jobPc);
2730
+ this.jobPc = newPc;
2731
+ const newArg = new Uint8Array(newSize);
2732
+ newArg.set(this.jobArg);
2733
+ this.jobArg = newArg;
2734
+ const newPos = new Int32Array(newSize);
2735
+ newPos.set(this.jobPos);
2736
+ this.jobPos = newPos;
2737
+ }
2738
+ this.jobPc[this.jobLen] = pc;
2739
+ this.jobArg[this.jobLen] = arg ? 1 : 0;
2740
+ this.jobPos[this.jobLen] = pos;
2741
+ this.jobLen++;
2742
+ }
2743
+ }
2744
+ tryBacktrack(re2, input, pc, pos, anchor) {
2745
+ const longest = re2.longest;
2746
+ this.push(re2, pc, pos, false);
2747
+ while (this.jobLen > 0) {
2748
+ this.jobLen--;
2749
+ let currentPc = this.jobPc[this.jobLen];
2750
+ let arg = this.jobArg[this.jobLen] === 1;
2751
+ let currentPos = this.jobPos[this.jobLen];
2752
+ let skipShouldVisit = true;
2753
+ while (true) {
2754
+ if (!skipShouldVisit) {
2755
+ if (!this.shouldVisit(currentPc, currentPos)) {
2756
+ break;
2757
+ }
2758
+ }
2759
+ skipShouldVisit = false;
2760
+ const inst = re2.prog.getInst(currentPc);
2761
+ switch (inst.op) {
2762
+ case Inst.FAIL:
2763
+ {
2764
+ throw new RE2JSInternalException('unexpected InstFail');
2765
+ }
2766
+ case Inst.ALT:
2767
+ {
2768
+ if (arg) {
2769
+ arg = false;
2770
+ currentPc = inst.arg;
2771
+ continue;
2772
+ } else {
2773
+ this.push(re2, currentPc, currentPos, true);
2774
+ currentPc = inst.out;
2775
+ continue;
2776
+ }
2777
+ }
2778
+ case Inst.ALT_MATCH:
2779
+ {
2780
+ const outInst = re2.prog.getInst(inst.out);
2781
+ if (Inst.isRuneOp(outInst.op)) {
2782
+ this.push(re2, inst.arg, currentPos, false);
2783
+ currentPc = inst.out;
2784
+ continue;
2785
+ }
2786
+ this.push(re2, inst.out, this.end, false);
2787
+ currentPc = inst.arg;
2788
+ continue;
2789
+ }
2790
+ case Inst.RUNE:
2791
+ {
2792
+ const r = input.step(currentPos);
2793
+ if (r === MachineInputBase.EOF()) break;
2794
+ if (!inst.matchRune(r >> 3)) break;
2795
+ currentPos += r & 7;
2796
+ currentPc = inst.out;
2797
+ continue;
2798
+ }
2799
+ case Inst.RUNE1:
2800
+ {
2801
+ const r = input.step(currentPos);
2802
+ if (r === MachineInputBase.EOF()) break;
2803
+ if (r >> 3 !== inst.runes[0]) break;
2804
+ currentPos += r & 7;
2805
+ currentPc = inst.out;
2806
+ continue;
2807
+ }
2808
+ case Inst.RUNE_ANY_NOT_NL:
2809
+ {
2810
+ const r = input.step(currentPos);
2811
+ if (r === MachineInputBase.EOF()) break;
2812
+ if (r >> 3 === 10) break;
2813
+ currentPos += r & 7;
2814
+ currentPc = inst.out;
2815
+ continue;
2816
+ }
2817
+ case Inst.RUNE_ANY:
2818
+ {
2819
+ const r = input.step(currentPos);
2820
+ if (r === MachineInputBase.EOF()) break;
2821
+ currentPos += r & 7;
2822
+ currentPc = inst.out;
2823
+ continue;
2824
+ }
2825
+ case Inst.CAPTURE:
2826
+ {
2827
+ if (arg) {
2828
+ this.cap[inst.arg] = currentPos;
2829
+ break;
2830
+ } else {
2831
+ if (inst.arg < this.ncap) {
2832
+ this.push(re2, currentPc, this.cap[inst.arg], true);
2833
+ this.cap[inst.arg] = currentPos;
2834
+ }
2835
+ currentPc = inst.out;
2836
+ continue;
2837
+ }
2838
+ }
2839
+ case Inst.EMPTY_WIDTH:
2840
+ {
2841
+ const flag = input.context(currentPos);
2842
+ if ((inst.arg & ~flag) !== 0) break;
2843
+ currentPc = inst.out;
2844
+ continue;
2845
+ }
2846
+ case Inst.NOP:
2847
+ {
2848
+ currentPc = inst.out;
2849
+ continue;
2850
+ }
2851
+ case Inst.MATCH:
2852
+ {
2853
+ if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
2854
+ break;
2855
+ }
2856
+ if (this.ncap === 0) return true;
2857
+ if (this.ncap > 1) {
2858
+ this.cap[1] = currentPos;
2859
+ }
2860
+ const old = this.matchcap[1];
2861
+ if (old === -1 || longest && currentPos > 0 && currentPos > old) {
2862
+ this.matchcap.set(this.cap);
2863
+ }
2864
+ if (!longest) return true;
2865
+ if (currentPos === this.end) return true;
2866
+ break;
2867
+ }
2868
+ default:
2869
+ {
2870
+ throw new RE2JSInternalException('bad inst');
2871
+ }
2872
+ }
2873
+ break;
2874
+ }
2875
+ }
2876
+ return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
2877
+ }
2878
+ }
2879
+ const bitStatePool = [];
2880
+ class Backtracker {
2881
+ static shouldBacktrack(prog) {
2882
+ return prog.numInst() <= MAX_BACKTRACK_PROG;
2883
+ }
2884
+ static maxBitStateLen(prog) {
2885
+ if (!Backtracker.shouldBacktrack(prog)) {
2886
+ return 0;
2887
+ }
2888
+ return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
2889
+ }
2890
+ static execute(re2, input, pos, anchor, ncap) {
2891
+ const startCond = re2.cond;
2892
+ if (startCond === Utils.EMPTY_ALL) {
2893
+ return null;
2894
+ }
2895
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2896
+ return null;
2897
+ }
2898
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2899
+ return null;
2900
+ }
2901
+ const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
2902
+ const end = input.endPos();
2903
+ b.reset(re2.prog, end, ncap);
2904
+ let matched = false;
2905
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
2906
+ if (b.ncap > 0) {
2907
+ b.cap[0] = pos;
2908
+ }
2909
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2910
+ matched = true;
2911
+ }
2912
+ } else {
2913
+ let width = -1;
2914
+ for (; pos <= end && width !== 0; pos += width) {
2915
+ if (re2.prefix.length > 0) {
2916
+ const advance = input.index(re2, pos);
2917
+ if (advance < 0) {
2918
+ break;
2919
+ }
2920
+ pos += advance;
2921
+ }
2922
+ if (b.ncap > 0) {
2923
+ b.cap[0] = pos;
2924
+ }
2925
+ if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
2926
+ matched = true;
2927
+ break;
2928
+ }
2929
+ const r = input.step(pos);
2930
+ width = r === MachineInputBase.EOF() ? 0 : r & 7;
2931
+ }
2932
+ }
2933
+ if (!matched) {
2934
+ bitStatePool.push(b);
2935
+ return null;
2936
+ }
2937
+
2938
+ // Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
2939
+ const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
2940
+ bitStatePool.push(b);
2941
+ return result;
2942
+ }
2943
+ }
2944
+
2945
+ class QueueOnePass {
2946
+ constructor(size) {
2947
+ this.sparse = new Uint32Array(size);
2948
+ this.dense = new Uint32Array(size);
2949
+ this.size = 0;
2950
+ this.nextIndex = 0;
2951
+ }
2952
+ empty() {
2953
+ return this.nextIndex >= this.size;
2954
+ }
2955
+ next() {
2956
+ return this.dense[this.nextIndex++];
2957
+ }
2958
+ clear() {
2959
+ this.size = 0;
2960
+ this.nextIndex = 0;
2961
+ }
2962
+ contains(u) {
2963
+ return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
2964
+ }
2965
+ insert(u) {
2966
+ if (!this.contains(u)) this.insertNew(u);
2967
+ }
2968
+ insertNew(u) {
2969
+ if (u >= this.sparse.length) return;
2970
+ this.sparse[u] = this.size;
2971
+ this.dense[this.size] = u;
2972
+ this.size++;
2973
+ }
2974
+ }
2975
+ const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
2976
+ const leftLen = leftRunes.length;
2977
+ const rightLen = rightRunes.length;
2978
+ let lx = 0,
2979
+ rx = 0;
2980
+ const merged = [];
2981
+ const next = [];
2982
+ let ok = true;
2983
+ let ix = -1;
2984
+ const extend = isLeft => {
2985
+ const newArray = isLeft ? leftRunes : rightRunes;
2986
+ const low = isLeft ? lx : rx;
2987
+ const pc = isLeft ? leftPC : rightPC;
2988
+ if (ix > 0 && newArray[low] <= merged[ix]) return false;
2989
+ merged.push(newArray[low], newArray[low + 1]);
2990
+ if (isLeft) lx += 2;else rx += 2;
2991
+ ix += 2;
2992
+ next.push(pc);
2993
+ return true;
2994
+ };
2995
+ while (lx < leftLen || rx < rightLen) {
2996
+ if (rx >= rightLen) {
2997
+ ok = extend(true);
2998
+ } else if (lx >= leftLen) {
2999
+ ok = extend(false);
3000
+ } else if (rightRunes[rx] < leftRunes[lx]) {
3001
+ ok = extend(false);
3002
+ } else {
3003
+ ok = extend(true);
3004
+ }
3005
+ if (!ok) return null;
3006
+ }
3007
+ return {
3008
+ merged,
3009
+ next
3010
+ };
3011
+ };
3012
+ class OnePassProg {
3013
+ constructor(prog) {
3014
+ this.start = prog.start;
3015
+ this.numCap = prog.numCap;
3016
+ this.inst = new Array(prog.inst.length);
3017
+ for (let i = 0; i < prog.inst.length; i++) {
3018
+ const orig = prog.inst[i];
3019
+ const inst = new Inst(orig.op);
3020
+ inst.out = orig.out;
3021
+ inst.arg = orig.arg;
3022
+ inst.runes = orig.runes ? orig.runes.slice() : [];
3023
+ inst.next = null;
3024
+ this.inst[i] = inst;
3025
+ }
3026
+ }
3027
+ }
3028
+ const onePassCopy = prog => {
3029
+ const p = new OnePassProg(prog);
3030
+
3031
+ // Rewrites one or more common Prog constructs that enable some otherwise
3032
+ // non-onepass Progs to be onepass.
3033
+ for (let pc = 0; pc < p.inst.length; pc++) {
3034
+ const inst = p.inst[pc];
3035
+ if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
3036
+ let pAOther = 'out';
3037
+ let pAAlt = 'arg';
3038
+ let instAlt = p.inst[inst[pAAlt]];
3039
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
3040
+ pAOther = 'arg';
3041
+ pAAlt = 'out';
3042
+ instAlt = p.inst[inst[pAAlt]];
3043
+ if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
3044
+ }
3045
+ const instOther = p.inst[inst[pAOther]];
3046
+ if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
3047
+ let pBAlt = 'out';
3048
+ let pBOther = 'arg';
3049
+ let patch = false;
3050
+ if (instAlt.out === pc) {
3051
+ patch = true;
3052
+ } else if (instAlt.arg === pc) {
3053
+ patch = true;
3054
+ pBAlt = 'arg';
3055
+ pBOther = 'out';
3056
+ }
3057
+ if (patch) instAlt[pBAlt] = inst[pAOther];
3058
+ if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
3059
+ }
3060
+ return p;
3061
+ };
3062
+ const makeOnePass = p => {
3063
+ if (p.inst.length >= 1000) return null;
3064
+ const instQueue = new QueueOnePass(p.inst.length);
3065
+ const visitQueue = new QueueOnePass(p.inst.length);
3066
+ const onePassRunes = new Array(p.inst.length);
3067
+ const m = new Array(p.inst.length).fill(false);
3068
+ const check = pc => {
3069
+ let ok = true;
3070
+ const inst = p.inst[pc];
3071
+ if (visitQueue.contains(pc)) return true;
3072
+ visitQueue.insert(pc);
3073
+ switch (inst.op) {
3074
+ case Inst.ALT:
3075
+ case Inst.ALT_MATCH:
3076
+ {
3077
+ ok = check(inst.out) && check(inst.arg);
3078
+ let matchOut = m[inst.out];
3079
+ let matchArg = m[inst.arg];
3080
+ if (matchOut && matchArg) return false;
3081
+ if (matchArg) {
3082
+ const tempOut = inst.out;
3083
+ inst.out = inst.arg;
3084
+ inst.arg = tempOut;
3085
+ const tempMatch = matchOut;
3086
+ matchOut = matchArg;
3087
+ matchArg = tempMatch;
3088
+ }
3089
+ if (matchOut) {
3090
+ m[pc] = true;
3091
+ inst.op = Inst.ALT_MATCH;
3092
+ }
3093
+ const leftRunes = onePassRunes[inst.out] || [];
3094
+ const rightRunes = onePassRunes[inst.arg] || [];
3095
+ const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
3096
+ if (!mergeRes) return false;
3097
+ onePassRunes[pc] = mergeRes.merged;
3098
+ inst.next = new Uint32Array(mergeRes.next);
3099
+ break;
3100
+ }
3101
+ case Inst.CAPTURE:
3102
+ case Inst.EMPTY_WIDTH:
3103
+ case Inst.NOP:
3104
+ {
3105
+ ok = check(inst.out);
3106
+ m[pc] = m[inst.out];
3107
+ onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
3108
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3109
+ break;
3110
+ }
3111
+ case Inst.MATCH:
3112
+ case Inst.FAIL:
3113
+ {
3114
+ m[pc] = inst.op === Inst.MATCH;
3115
+ break;
3116
+ }
3117
+ case Inst.RUNE:
3118
+ {
3119
+ m[pc] = false;
3120
+ if (inst.next && inst.next.length > 0) break;
3121
+ instQueue.insert(inst.out);
3122
+ if (!inst.runes || inst.runes.length === 0) {
3123
+ onePassRunes[pc] = [];
3124
+ inst.next = new Uint32Array([inst.out]);
3125
+ break;
3126
+ }
3127
+ let runes = [];
3128
+ if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3129
+ const r0 = inst.runes[0];
3130
+ runes.push(r0, r0);
3131
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3132
+ runes.push(r1, r1);
3133
+ }
3134
+ runes.sort((a, b) => a - b);
3135
+ } else {
3136
+ runes.push(...inst.runes);
3137
+ }
3138
+ onePassRunes[pc] = runes;
3139
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3140
+ inst.op = Inst.RUNE;
3141
+ break;
3142
+ }
3143
+ case Inst.RUNE1:
3144
+ {
3145
+ m[pc] = false;
3146
+ if (inst.next && inst.next.length > 0) break;
3147
+ instQueue.insert(inst.out);
3148
+ let runes = [];
3149
+ if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
3150
+ const r0 = inst.runes[0];
3151
+ runes.push(r0, r0);
3152
+ for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
3153
+ runes.push(r1, r1);
3154
+ }
3155
+ runes.sort((a, b) => a - b);
3156
+ } else {
3157
+ runes.push(inst.runes[0], inst.runes[0]);
3158
+ }
3159
+ onePassRunes[pc] = runes;
3160
+ inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
3161
+ inst.op = Inst.RUNE;
3162
+ break;
3163
+ }
3164
+ case Inst.RUNE_ANY:
3165
+ {
3166
+ m[pc] = false;
3167
+ if (inst.next && inst.next.length > 0) break;
3168
+ instQueue.insert(inst.out);
3169
+ onePassRunes[pc] = [0, Unicode.MAX_RUNE];
3170
+ inst.next = new Uint32Array([inst.out]);
3171
+ break;
3172
+ }
3173
+ case Inst.RUNE_ANY_NOT_NL:
3174
+ {
3175
+ m[pc] = false;
3176
+ if (inst.next && inst.next.length > 0) break;
3177
+ instQueue.insert(inst.out);
3178
+ onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
3179
+ inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
3180
+ break;
3181
+ }
3182
+ }
3183
+ return ok;
3184
+ };
3185
+ instQueue.clear();
3186
+ instQueue.insert(p.start);
3187
+ while (!instQueue.empty()) {
3188
+ visitQueue.clear();
3189
+ const pc = instQueue.next();
3190
+ if (!check(pc)) return null;
3191
+ }
3192
+ for (let i = 0; i < p.inst.length; i++) {
3193
+ if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
3194
+ }
3195
+ return p;
3196
+ };
3197
+ const cleanupOnePass = (p, original) => {
3198
+ for (let ix = 0; ix < original.inst.length; ix++) {
3199
+ const instOriginal = original.inst[ix];
3200
+ switch (instOriginal.op) {
3201
+ case Inst.ALT:
3202
+ case Inst.ALT_MATCH:
3203
+ case Inst.RUNE:
3204
+ break;
3205
+ case Inst.CAPTURE:
3206
+ case Inst.EMPTY_WIDTH:
3207
+ case Inst.NOP:
3208
+ case Inst.MATCH:
3209
+ case Inst.FAIL:
3210
+ p.inst[ix].next = null;
3211
+ break;
3212
+ case Inst.RUNE1:
3213
+ case Inst.RUNE_ANY:
3214
+ case Inst.RUNE_ANY_NOT_NL:
3215
+ p.inst[ix].next = null;
3216
+ p.inst[ix].op = instOriginal.op;
3217
+ p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
3218
+ break;
3219
+ }
3220
+ }
3221
+ };
3222
+ class OnePass {
3223
+ static compile(prog) {
3224
+ if (prog.start === 0) return null;
3225
+ const startInst = prog.inst[prog.start];
3226
+ // onepass regexps must be strictly anchored
3227
+ if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
3228
+ return null;
3229
+ }
3230
+ let hasAlt = false;
3231
+ for (let i = 0; i < prog.inst.length; i++) {
3232
+ if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
3233
+ hasAlt = true;
3234
+ break;
2339
3235
  }
2340
3236
  }
2341
- if (anchor === RE2Flags.UNANCHORED) {
2342
- nextPCs.push(this.prog.start);
3237
+ for (let i = 0; i < prog.inst.length; i++) {
3238
+ const inst = prog.inst[i];
3239
+ const opOut = prog.inst[inst.out].op;
3240
+ switch (inst.op) {
3241
+ case Inst.ALT:
3242
+ case Inst.ALT_MATCH:
3243
+ if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
3244
+ return null;
3245
+ }
3246
+ break;
3247
+ case Inst.EMPTY_WIDTH:
3248
+ if (opOut === Inst.MATCH) {
3249
+ if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
3250
+ continue;
3251
+ }
3252
+ return null;
3253
+ }
3254
+ break;
3255
+ default:
3256
+ if (opOut === Inst.MATCH && hasAlt) {
3257
+ return null;
3258
+ }
3259
+ break;
3260
+ }
2343
3261
  }
2344
- const nextState = this.getState(nextPCs);
2345
-
2346
- // Cache the result
2347
- if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
2348
- state.nextAscii[charCode] = nextState;
2349
- } else {
2350
- const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
2351
- state.nextMap.set(key, nextState);
3262
+ let p = onePassCopy(prog);
3263
+ p = makeOnePass(p);
3264
+ if (p !== null) {
3265
+ cleanupOnePass(p, prog);
2352
3266
  }
2353
- return nextState;
3267
+ return p;
2354
3268
  }
2355
-
2356
- // The hot loop: Execute the Lazy DFA
2357
- match(input, pos, anchor) {
2358
- if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2359
- return false;
2360
- }
2361
- if (!this.startState) {
2362
- this.startState = this.getState([this.prog.start]);
2363
- if (!this.startState) return null; // Fallback to NFA
2364
- }
2365
- let endPos = input.endPos();
2366
- let currentState = this.startState;
2367
- if (currentState.isMatch) {
2368
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2369
- if (pos === endPos) return true;
2370
- } else {
2371
- return true;
3269
+ static next(inst, r) {
3270
+ const nextIdx = inst.matchRunePos(r);
3271
+ if (nextIdx >= 0) return inst.next[nextIdx];
3272
+ if (inst.op === Inst.ALT_MATCH) return inst.out;
3273
+ return 0; // fail
3274
+ }
3275
+ static execute(re2, input, pos, anchor, ncap) {
3276
+ const onepass = re2.onepass;
3277
+ if (!onepass) return null;
3278
+ const matchcap = new Int32Array(ncap).fill(-1);
3279
+ let matched = false;
3280
+ let r = input.step(pos);
3281
+ let rune = r >> 3;
3282
+ let width = r & 7;
3283
+ let r1 = MachineInputBase.EOF();
3284
+ let rune1 = -1;
3285
+ let width1 = 0;
3286
+ if (r !== MachineInputBase.EOF()) {
3287
+ r1 = input.step(pos + width);
3288
+ if (r1 !== MachineInputBase.EOF()) {
3289
+ rune1 = r1 >> 3;
3290
+ width1 = r1 & 7;
2372
3291
  }
2373
3292
  }
2374
- let i = pos;
2375
- while (i < endPos) {
2376
- const r = input.step(i);
2377
- const rune = r >> 3;
2378
- const width = r & 7;
2379
-
2380
- // prevent infinite loop on EOF
2381
- if (width === 0) {
2382
- break;
3293
+ let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
3294
+ let pc = onepass.start;
3295
+ let inst;
3296
+ while (true) {
3297
+ inst = onepass.inst[pc];
3298
+ pc = inst.out;
3299
+ switch (inst.op) {
3300
+ case Inst.MATCH:
3301
+ {
3302
+ matched = true;
3303
+ if (matchcap.length > 0) {
3304
+ matchcap[0] = 0;
3305
+ matchcap[1] = pos;
3306
+ }
3307
+ return ncap === 0 ? [] : Array.from(matchcap);
3308
+ }
3309
+ case Inst.RUNE:
3310
+ if (!inst.matchRune(rune)) return null;
3311
+ break;
3312
+ case Inst.RUNE1:
3313
+ if (rune !== inst.runes[0]) return null;
3314
+ break;
3315
+ case Inst.RUNE_ANY:
3316
+ break;
3317
+ case Inst.RUNE_ANY_NOT_NL:
3318
+ if (rune === 10) return null;
3319
+ break;
3320
+ case Inst.ALT:
3321
+ case Inst.ALT_MATCH:
3322
+ pc = OnePass.next(inst, rune);
3323
+ continue;
3324
+ case Inst.FAIL:
3325
+ return null;
3326
+ case Inst.NOP:
3327
+ continue;
3328
+ case Inst.EMPTY_WIDTH:
3329
+ if ((inst.arg & ~flag) !== 0) return null;
3330
+ continue;
3331
+ case Inst.CAPTURE:
3332
+ if (inst.arg < matchcap.length) {
3333
+ matchcap[inst.arg] = pos;
3334
+ }
3335
+ continue;
3336
+ default:
3337
+ throw new RE2JSInternalException('bad inst');
2383
3338
  }
2384
- currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
2385
-
2386
- // If we hit an unrecoverable DFA error or bailout, signal fallback
2387
- if (currentState === null) return null;
2388
- if (currentState.isMatch) {
2389
- if (anchor === RE2Flags.ANCHOR_BOTH) {
2390
- if (i + width === endPos) return true;
3339
+ if (width === 0) break;
3340
+ flag = Utils.emptyOpContext(rune, rune1);
3341
+ pos += width;
3342
+ rune = rune1;
3343
+ width = width1;
3344
+ if (rune !== -1) {
3345
+ r1 = input.step(pos + width);
3346
+ if (r1 !== MachineInputBase.EOF()) {
3347
+ rune1 = r1 >> 3;
3348
+ width1 = r1 & 7;
2391
3349
  } else {
2392
- return true;
3350
+ rune1 = -1;
3351
+ width1 = 0;
2393
3352
  }
2394
3353
  }
2395
-
2396
- // If we hit a dead end, and anchored, fail early
2397
- if (currentState.nfaStates.length === 0) {
2398
- if (anchor !== RE2Flags.UNANCHORED) return false;
2399
- }
2400
- i += width;
2401
3354
  }
2402
- return false;
3355
+ if (!matched) return null;
3356
+ return ncap === 0 ? [] : Array.from(matchcap);
2403
3357
  }
2404
3358
  }
2405
3359
 
@@ -2484,7 +3438,7 @@ class Regexp {
2484
3438
  this.max = 0; // max for REPEAT
2485
3439
  this.cap = 0; // capturing index, for CAPTURE
2486
3440
  this.name = null; // capturing name, for CAPTURE
2487
- this.namedGroups = {}; // map of group name -> capturing index
3441
+ this.namedGroups = Object.create(null); // map of group name -> capturing index
2488
3442
  }
2489
3443
  reinit() {
2490
3444
  this.flags = 0;
@@ -2494,7 +3448,7 @@ class Regexp {
2494
3448
  this.min = 0;
2495
3449
  this.max = 0;
2496
3450
  this.name = null;
2497
- this.namedGroups = {};
3451
+ this.namedGroups = Object.create(null);
2498
3452
  }
2499
3453
  toString() {
2500
3454
  return this.appendTo();
@@ -2754,6 +3708,188 @@ class Regexp {
2754
3708
  }
2755
3709
  }
2756
3710
 
3711
+ class Prefilter {
3712
+ static Type = {
3713
+ NONE: 0,
3714
+ EXACT: 1,
3715
+ AND: 2,
3716
+ OR: 3
3717
+ };
3718
+ constructor(type) {
3719
+ this.type = type;
3720
+ this.subs = [];
3721
+ this.str = '';
3722
+ this.bytes = null;
3723
+ }
3724
+ eval(input, pos) {
3725
+ switch (this.type) {
3726
+ case Prefilter.Type.NONE:
3727
+ return true;
3728
+ case Prefilter.Type.EXACT:
3729
+ return input.hasString(this, pos);
3730
+ case Prefilter.Type.AND:
3731
+ for (let i = 0; i < this.subs.length; i++) {
3732
+ if (!this.subs[i].eval(input, pos)) return false;
3733
+ }
3734
+ return true;
3735
+ case Prefilter.Type.OR:
3736
+ for (let i = 0; i < this.subs.length; i++) {
3737
+ if (this.subs[i].eval(input, pos)) return true;
3738
+ }
3739
+ return false;
3740
+ default:
3741
+ return true;
3742
+ }
3743
+ }
3744
+ }
3745
+ class PrefilterTree {
3746
+ static build(re) {
3747
+ const pf = PrefilterTree.fromRegexp(re);
3748
+ return PrefilterTree.simplify(pf);
3749
+ }
3750
+ static fromRegexp(re) {
3751
+ if (!re) return new Prefilter(Prefilter.Type.NONE);
3752
+ switch (re.op) {
3753
+ case Regexp.Op.NO_MATCH:
3754
+ case Regexp.Op.EMPTY_MATCH:
3755
+ case Regexp.Op.BEGIN_LINE:
3756
+ case Regexp.Op.END_LINE:
3757
+ case Regexp.Op.BEGIN_TEXT:
3758
+ case Regexp.Op.END_TEXT:
3759
+ case Regexp.Op.WORD_BOUNDARY:
3760
+ case Regexp.Op.NO_WORD_BOUNDARY:
3761
+ case Regexp.Op.CHAR_CLASS:
3762
+ case Regexp.Op.ANY_CHAR_NOT_NL:
3763
+ case Regexp.Op.ANY_CHAR:
3764
+ {
3765
+ return new Prefilter(Prefilter.Type.NONE);
3766
+ }
3767
+ case Regexp.Op.LITERAL:
3768
+ {
3769
+ if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
3770
+ // Skip case-folded literals for simplicity
3771
+ return new Prefilter(Prefilter.Type.NONE);
3772
+ }
3773
+ const pf = new Prefilter(Prefilter.Type.EXACT);
3774
+ let str = '';
3775
+ for (let i = 0; i < re.runes.length; i++) {
3776
+ str += String.fromCodePoint(re.runes[i]);
3777
+ }
3778
+ pf.str = str;
3779
+ pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
3780
+ return pf;
3781
+ }
3782
+ case Regexp.Op.CAPTURE:
3783
+ case Regexp.Op.PLUS:
3784
+ {
3785
+ return PrefilterTree.fromRegexp(re.subs[0]);
3786
+ }
3787
+ case Regexp.Op.REPEAT:
3788
+ {
3789
+ if (re.min >= 1) {
3790
+ return PrefilterTree.fromRegexp(re.subs[0]);
3791
+ }
3792
+ return new Prefilter(Prefilter.Type.NONE);
3793
+ }
3794
+ case Regexp.Op.CONCAT:
3795
+ {
3796
+ const pf = new Prefilter(Prefilter.Type.AND);
3797
+ for (const sub of re.subs) {
3798
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3799
+ }
3800
+ return pf;
3801
+ }
3802
+ case Regexp.Op.ALTERNATE:
3803
+ {
3804
+ const pf = new Prefilter(Prefilter.Type.OR);
3805
+ for (const sub of re.subs) {
3806
+ pf.subs.push(PrefilterTree.fromRegexp(sub));
3807
+ }
3808
+ return pf;
3809
+ }
3810
+ default:
3811
+ return new Prefilter(Prefilter.Type.NONE);
3812
+ }
3813
+ }
3814
+ static simplify(pf) {
3815
+ if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
3816
+ return pf;
3817
+ }
3818
+ if (pf.type === Prefilter.Type.AND) {
3819
+ const newSubs = [];
3820
+ for (const sub of pf.subs) {
3821
+ const s = PrefilterTree.simplify(sub);
3822
+ if (s.type !== Prefilter.Type.NONE) {
3823
+ if (s.type === Prefilter.Type.AND) {
3824
+ newSubs.push(...s.subs);
3825
+ } else {
3826
+ newSubs.push(s);
3827
+ }
3828
+ }
3829
+ }
3830
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3831
+ if (newSubs.length === 1) return newSubs[0];
3832
+ pf.subs = newSubs;
3833
+ return pf;
3834
+ }
3835
+ if (pf.type === Prefilter.Type.OR) {
3836
+ const newSubs = [];
3837
+ for (const sub of pf.subs) {
3838
+ const s = PrefilterTree.simplify(sub);
3839
+ if (s.type === Prefilter.Type.NONE) {
3840
+ // If any branch of an OR has no requirements, the whole OR has no requirements
3841
+ return new Prefilter(Prefilter.Type.NONE);
3842
+ }
3843
+ if (s.type === Prefilter.Type.OR) {
3844
+ newSubs.push(...s.subs);
3845
+ } else {
3846
+ newSubs.push(s);
3847
+ }
3848
+ }
3849
+ if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
3850
+ if (newSubs.length === 1) return newSubs[0];
3851
+
3852
+ // De-duplicate EXACT branches
3853
+ const seen = new Set();
3854
+ const uniqueSubs = [];
3855
+ for (const sub of newSubs) {
3856
+ if (sub.type === Prefilter.Type.EXACT) {
3857
+ if (!seen.has(sub.str)) {
3858
+ seen.add(sub.str);
3859
+ uniqueSubs.push(sub);
3860
+ }
3861
+ } else {
3862
+ uniqueSubs.push(sub);
3863
+ }
3864
+ }
3865
+ pf.subs = uniqueSubs;
3866
+ return pf;
3867
+ }
3868
+ return pf;
3869
+ }
3870
+ }
3871
+
3872
+ /**
3873
+ * A list of instruction pointers waiting to be patched.
3874
+ * Tracks both `head` and `tail` to allow O(1) appending during compilation.
3875
+ * * Values are encoded integers, not standard memory pointers:
3876
+ * - Program instruction index: `l >> 1`
3877
+ * - Patch `.out` field if: `(l & 1) === 0`
3878
+ * - Patch `.arg` field if: `(l & 1) === 1`
3879
+ * - `0` denotes an empty list.
3880
+ * * @see https://swtch.com/~rsc/regexp/regexp1.html
3881
+ */
3882
+ class PatchList {
3883
+ /**
3884
+ * @param {number} head - Encoded pointer to the start of the patch list.
3885
+ * @param {number} tail - Encoded pointer to the end of the patch list.
3886
+ */
3887
+ constructor(head = 0, tail = 0) {
3888
+ this.head = head;
3889
+ this.tail = tail;
3890
+ }
3891
+ }
3892
+
2757
3893
  /**
2758
3894
  * A Prog is a compiled regular expression program.
2759
3895
  */
@@ -2855,39 +3991,30 @@ class Prog {
2855
3991
  return i.arg;
2856
3992
  }
2857
3993
  patch(l, val) {
2858
- while (l !== 0) {
2859
- const i = this.inst[l >> 1];
2860
- if ((l & 1) === 0) {
2861
- l = i.out;
3994
+ let head = l.head;
3995
+ while (head !== 0) {
3996
+ const i = this.inst[head >> 1];
3997
+ if ((head & 1) === 0) {
3998
+ head = i.out;
2862
3999
  i.out = val;
2863
4000
  } else {
2864
- l = i.arg;
4001
+ head = i.arg;
2865
4002
  i.arg = val;
2866
4003
  }
2867
4004
  }
2868
4005
  }
2869
4006
  append(l1, l2) {
2870
- if (l1 === 0) {
2871
- return l2;
2872
- }
2873
- if (l2 === 0) {
2874
- return l1;
2875
- }
2876
- let last = l1;
2877
- for (;;) {
2878
- const next = this.next(last);
2879
- if (next === 0) {
2880
- break;
2881
- }
2882
- last = next;
2883
- }
2884
- const i = this.inst[last >> 1];
2885
- if ((last & 1) === 0) {
2886
- i.out = l2;
4007
+ if (l1.head === 0) return l2;
4008
+ if (l2.head === 0) return l1;
4009
+
4010
+ // We know exactly where the tail is
4011
+ const i = this.inst[l1.tail >> 1];
4012
+ if ((l1.tail & 1) === 0) {
4013
+ i.out = l2.head;
2887
4014
  } else {
2888
- i.arg = l2;
4015
+ i.arg = l2.head;
2889
4016
  }
2890
- return l1;
4017
+ return new PatchList(l1.head, l2.tail);
2891
4018
  }
2892
4019
  /**
2893
4020
  *
@@ -2916,7 +4043,7 @@ class Prog {
2916
4043
  * @class
2917
4044
  */
2918
4045
  class Frag {
2919
- constructor(i = 0, out = 0, nullable = false) {
4046
+ constructor(i = 0, out = new PatchList(), nullable = false) {
2920
4047
  this.i = i; // an instruction address (pc).
2921
4048
  this.out = out; // a patch list; see explanation in Prog.js
2922
4049
  this.nullable = nullable; // whether the fragment can match the empty string
@@ -2941,6 +4068,33 @@ class Compiler {
2941
4068
  c.prog.start = f.i;
2942
4069
  return c.prog;
2943
4070
  }
4071
+ static compileSet(regexps) {
4072
+ const c = new Compiler();
4073
+ if (regexps.length === 0) {
4074
+ c.prog.start = c.newInst(Inst.FAIL).i;
4075
+ return c.prog;
4076
+ }
4077
+ let starts = [];
4078
+ for (let i = 0; i < regexps.length; i++) {
4079
+ const f = c.compile(regexps[i]);
4080
+ const m = c.newInst(Inst.MATCH);
4081
+ c.prog.getInst(m.i).arg = i; // Store the regex index
4082
+ c.prog.patch(f.out, m.i);
4083
+ starts.push(f.i);
4084
+ }
4085
+
4086
+ // Link starts together via ALT
4087
+ let start = starts[0];
4088
+ for (let i = 1; i < starts.length; i++) {
4089
+ const f = c.newInst(Inst.ALT);
4090
+ const inst = c.prog.getInst(f.i);
4091
+ inst.out = start;
4092
+ inst.arg = starts[i];
4093
+ start = f.i;
4094
+ }
4095
+ c.prog.start = start;
4096
+ return c.prog;
4097
+ }
2944
4098
  constructor() {
2945
4099
  this.prog = new Prog();
2946
4100
  this.newInst(Inst.FAIL);
@@ -2953,7 +4107,7 @@ class Compiler {
2953
4107
  // Returns a no-op fragment. Sometimes unavoidable.
2954
4108
  nop() {
2955
4109
  const f = this.newInst(Inst.NOP);
2956
- f.out = f.i << 1;
4110
+ f.out = new PatchList(f.i << 1, f.i << 1);
2957
4111
  return f;
2958
4112
  }
2959
4113
  fail() {
@@ -2964,7 +4118,7 @@ class Compiler {
2964
4118
  // Given a fragment a, returns a fragment with capturing parens around a.
2965
4119
  cap(arg) {
2966
4120
  const f = this.newInst(Inst.CAPTURE);
2967
- f.out = f.i << 1;
4121
+ f.out = new PatchList(f.i << 1, f.i << 1);
2968
4122
  this.prog.getInst(f.i).arg = arg;
2969
4123
  if (this.prog.numCap < arg + 1) {
2970
4124
  this.prog.numCap = arg + 1;
@@ -3012,10 +4166,10 @@ class Compiler {
3012
4166
  const i = this.prog.getInst(f.i);
3013
4167
  if (nongreedy) {
3014
4168
  i.arg = f1.i;
3015
- f.out = f.i << 1;
4169
+ f.out = new PatchList(f.i << 1, f.i << 1);
3016
4170
  } else {
3017
4171
  i.out = f1.i;
3018
- f.out = f.i << 1 | 1;
4172
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3019
4173
  }
3020
4174
  this.prog.patch(f1.out, f.i);
3021
4175
  return f;
@@ -3027,10 +4181,10 @@ class Compiler {
3027
4181
  const i = this.prog.getInst(f.i);
3028
4182
  if (nongreedy) {
3029
4183
  i.arg = f1.i;
3030
- f.out = f.i << 1;
4184
+ f.out = new PatchList(f.i << 1, f.i << 1);
3031
4185
  } else {
3032
4186
  i.out = f1.i;
3033
- f.out = f.i << 1 | 1;
4187
+ f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
3034
4188
  }
3035
4189
  f.out = this.prog.append(f.out, f1.out);
3036
4190
  return f;
@@ -3053,7 +4207,7 @@ class Compiler {
3053
4207
  empty(op) {
3054
4208
  const f = this.newInst(Inst.EMPTY_WIDTH);
3055
4209
  this.prog.getInst(f.i).arg = op;
3056
- f.out = f.i << 1;
4210
+ f.out = new PatchList(f.i << 1, f.i << 1);
3057
4211
  return f;
3058
4212
  }
3059
4213
 
@@ -3068,7 +4222,7 @@ class Compiler {
3068
4222
  flags &= -2;
3069
4223
  }
3070
4224
  i.arg = flags;
3071
- f.out = f.i << 1;
4225
+ f.out = new PatchList(f.i << 1, f.i << 1);
3072
4226
  if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
3073
4227
  i.op = Inst.RUNE1;
3074
4228
  } else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
@@ -3173,23 +4327,92 @@ class Simplify {
3173
4327
  }
3174
4328
  switch (re.op) {
3175
4329
  case Regexp.Op.CAPTURE:
4330
+ {
4331
+ const sub = Simplify.simplify(re.subs[0]);
4332
+ if (sub !== re.subs[0]) {
4333
+ const nre = Regexp.fromRegexp(re);
4334
+ nre.runes = [];
4335
+ nre.subs = [sub];
4336
+ return nre;
4337
+ }
4338
+ return re;
4339
+ }
3176
4340
  case Regexp.Op.CONCAT:
3177
4341
  case Regexp.Op.ALTERNATE:
3178
4342
  {
3179
- let nre = re;
4343
+ const newSubs = [];
4344
+ let changed = false;
3180
4345
  for (let i = 0; i < re.subs.length; i++) {
3181
4346
  const sub = re.subs[i];
3182
4347
  const nsub = Simplify.simplify(sub);
3183
- if (nre === re && nsub !== sub) {
3184
- nre = Regexp.fromRegexp(re);
3185
- nre.runes = [];
3186
- nre.subs = re.subs.slice(0, re.subs.length);
4348
+ if (nsub !== sub) {
4349
+ changed = true;
4350
+ }
4351
+ if (re.op === Regexp.Op.CONCAT) {
4352
+ // If any part of a CONCAT is mathematically impossible,
4353
+ // the entire CONCAT sequence becomes impossible.
4354
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4355
+ return new Regexp(Regexp.Op.NO_MATCH);
4356
+ }
4357
+ // Drop empty 0-width match nodes entirely from sequences
4358
+ if (nsub.op === Regexp.Op.EMPTY_MATCH) {
4359
+ changed = true;
4360
+ continue;
4361
+ }
4362
+ // Flatten nested concatenations
4363
+ if (nsub.op === Regexp.Op.CONCAT) {
4364
+ changed = true;
4365
+ newSubs.push(...nsub.subs);
4366
+ continue;
4367
+ }
4368
+ } else if (re.op === Regexp.Op.ALTERNATE) {
4369
+ // Drop impossible branches from alternations
4370
+ if (nsub.op === Regexp.Op.NO_MATCH) {
4371
+ changed = true;
4372
+ continue;
4373
+ }
4374
+ // Flatten nested alternations
4375
+ if (nsub.op === Regexp.Op.ALTERNATE) {
4376
+ changed = true;
4377
+ newSubs.push(...nsub.subs);
4378
+ continue;
4379
+ }
3187
4380
  }
3188
- if (nre !== re) {
3189
- nre.subs[i] = nsub;
4381
+ newSubs.push(nsub);
4382
+ }
4383
+ if (changed) {
4384
+ // If we filtered out all nodes, return the mathematically correct fallback
4385
+ if (newSubs.length === 0) {
4386
+ return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
4387
+ }
4388
+ // If only 1 node remains, we don't need a CONCAT/ALT container at all
4389
+ if (newSubs.length === 1) {
4390
+ return newSubs[0];
3190
4391
  }
4392
+ const nre = Regexp.fromRegexp(re);
4393
+ nre.runes = [];
4394
+ nre.subs = newSubs;
4395
+ return nre;
4396
+ }
4397
+ return re;
4398
+ }
4399
+ case Regexp.Op.CHAR_CLASS:
4400
+ {
4401
+ if (re.runes === null) return re;
4402
+
4403
+ // Empty character classes match nothing.
4404
+ if (re.runes.length === 0) {
4405
+ return new Regexp(Regexp.Op.NO_MATCH);
3191
4406
  }
3192
- return nre;
4407
+ // Full character classes match everything.
4408
+ if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
4409
+ return new Regexp(Regexp.Op.ANY_CHAR);
4410
+ }
4411
+ // Standard catch-all except newline
4412
+ if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
4413
+ return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
4414
+ }
4415
+ return re;
3193
4416
  }
3194
4417
  case Regexp.Op.STAR:
3195
4418
  case Regexp.Op.PLUS:
@@ -3226,7 +4449,9 @@ class Simplify {
3226
4449
  }
3227
4450
  subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
3228
4451
  nre.subs = subs.slice(0);
3229
- return nre;
4452
+
4453
+ // Ensure newly created CONCAT is properly flattened
4454
+ return Simplify.simplify(nre);
3230
4455
  }
3231
4456
  // Special case x{0} handled above.
3232
4457
 
@@ -3264,7 +4489,8 @@ class Simplify {
3264
4489
  if (prefixSubs !== null) {
3265
4490
  const prefix = new Regexp(Regexp.Op.CONCAT);
3266
4491
  prefix.subs = prefixSubs.slice(0);
3267
- return prefix;
4492
+ // Ensure newly created CONCAT is properly flattened
4493
+ return Simplify.simplify(prefix);
3268
4494
  }
3269
4495
 
3270
4496
  // Some degenerate case like min > max or min < max < 0.
@@ -3297,6 +4523,13 @@ class Simplify {
3297
4523
  return sub;
3298
4524
  }
3299
4525
 
4526
+ // Handle impossible targets gracefully.
4527
+ // e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
4528
+ if (sub.op === Regexp.Op.NO_MATCH) {
4529
+ if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
4530
+ return new Regexp(Regexp.Op.EMPTY_MATCH);
4531
+ }
4532
+
3300
4533
  // The operators are idempotent if the flags match.
3301
4534
  if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
3302
4535
  return sub;
@@ -3304,10 +4537,10 @@ class Simplify {
3304
4537
  if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
3305
4538
  return re;
3306
4539
  }
3307
- re = new Regexp(op);
3308
- re.flags = flags;
3309
- re.subs = [sub];
3310
- return re;
4540
+ const nre = new Regexp(op);
4541
+ nre.flags = flags;
4542
+ nre.subs = [sub];
4543
+ return nre;
3311
4544
  }
3312
4545
  }
3313
4546
 
@@ -3653,16 +4886,6 @@ class CharClass {
3653
4886
  }
3654
4887
  }
3655
4888
 
3656
- class Pair {
3657
- static of(first, second) {
3658
- return new Pair(first, second);
3659
- }
3660
- constructor(first, second) {
3661
- this.first = first;
3662
- this.second = second;
3663
- }
3664
- }
3665
-
3666
4889
  // StringIterator: a stream of runes with an opaque cursor, permitting
3667
4890
  // rewinding. The units of the cursor are not specified beyond the
3668
4891
  // fact that ASCII characters are single width. (Cursor positions
@@ -3809,18 +5032,59 @@ class Parser {
3809
5032
  // stride).
3810
5033
  static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
3811
5034
 
5035
+ // Ascii tables
5036
+ static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
5037
+ static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
5038
+ // Old English long s (ſ), folds to S/s.
5039
+ 0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
5040
+ ]));
5041
+
3812
5042
  // unicodeTable() returns the Unicode RangeTable identified by name
3813
5043
  // and the table of additional fold-equivalent code points.
3814
5044
  // Returns null if |name| does not identify a Unicode character range.
3815
5045
  static unicodeTable(name) {
3816
5046
  if (name === 'Any') {
3817
- return Pair.of(Parser.ANY_TABLE, Parser.ANY_TABLE);
5047
+ return {
5048
+ tab: Parser.ANY_TABLE,
5049
+ fold: Parser.ANY_TABLE,
5050
+ sign: 1
5051
+ };
5052
+ }
5053
+ if (name === 'Ascii') {
5054
+ return {
5055
+ tab: Parser.ASCII_TABLE,
5056
+ fold: Parser.ASCII_FOLD_TABLE,
5057
+ sign: 1
5058
+ };
5059
+ }
5060
+ if (name === 'Assigned') {
5061
+ // Assigned is the mathematical inversion of Cn (Unassigned)
5062
+ return {
5063
+ tab: UnicodeTables.CATEGORIES.get('Cn'),
5064
+ fold: UnicodeTables.CATEGORIES.get('Cn'),
5065
+ sign: -1
5066
+ };
5067
+ }
5068
+ if (name === 'Lc') {
5069
+ return {
5070
+ tab: UnicodeTables.CATEGORIES.get('LC'),
5071
+ fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
5072
+ sign: 1
5073
+ };
3818
5074
  }
3819
5075
  if (UnicodeTables.CATEGORIES.has(name)) {
3820
- return Pair.of(UnicodeTables.CATEGORIES.get(name), UnicodeTables.FOLD_CATEGORIES.get(name));
5076
+ return {
5077
+ tab: UnicodeTables.CATEGORIES.get(name),
5078
+ fold: UnicodeTables.FOLD_CATEGORIES.get(name),
5079
+ sign: 1
5080
+ };
3821
5081
  }
3822
5082
  if (UnicodeTables.SCRIPTS.has(name)) {
3823
- return Pair.of(UnicodeTables.SCRIPTS.get(name), UnicodeTables.FOLD_SCRIPT.get(name));
5083
+ return {
5084
+ tab: UnicodeTables.SCRIPTS.get(name),
5085
+ fold: UnicodeTables.FOLD_SCRIPT.get(name),
5086
+ sign: 1
5087
+ };
3824
5088
  }
3825
5089
  return null;
3826
5090
  }
@@ -4129,7 +5393,7 @@ class Parser {
4129
5393
  this.flags = flags;
4130
5394
  // number of capturing groups seen
4131
5395
  this.numCap = 0;
4132
- this.namedGroups = {};
5396
+ this.namedGroups = Object.create(null);
4133
5397
  // Stack of parsed expressions.
4134
5398
  this.stack = [];
4135
5399
  this.free = null;
@@ -4973,9 +6237,11 @@ class Parser {
4973
6237
  const i = lit.indexOf('\\E');
4974
6238
  if (i >= 0) {
4975
6239
  lit = lit.substring(0, i);
6240
+ t.skipString(lit);
6241
+ t.skipString('\\E');
6242
+ } else {
6243
+ t.skipString(lit);
4976
6244
  }
4977
- t.skipString(lit);
4978
- t.skipString('\\E');
4979
6245
  let j = 0;
4980
6246
  while (j < lit.length) {
4981
6247
  const codepoint = lit.codePointAt(j);
@@ -4991,6 +6257,9 @@ class Parser {
4991
6257
  t.rewindTo(savedPos);
4992
6258
  break;
4993
6259
  }
6260
+ } else {
6261
+ // Unconditionally rewind if PERL_X is off, or if string ended abruptly
6262
+ t.rewindTo(savedPos);
4994
6263
  }
4995
6264
  const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
4996
6265
  re.flags = this.flags;
@@ -5316,8 +6585,11 @@ class Parser {
5316
6585
  if (pair === null) {
5317
6586
  throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
5318
6587
  }
5319
- const tab = pair.first;
5320
- const fold = pair.second; // fold-equivalent table
6588
+ if (pair.sign < 0) {
6589
+ sign = 0 - sign;
6590
+ }
6591
+ const tab = pair.tab;
6592
+ const fold = pair.fold; // fold-equivalent table
5321
6593
  // Variation of CharClass.appendGroup() for tables.
5322
6594
  if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
5323
6595
  cc.appendTableWithSign(tab, sign);
@@ -5461,6 +6733,7 @@ class RE2 {
5461
6733
  res.prefixUTF8 = re2.prefixUTF8;
5462
6734
  res.prefixComplete = re2.prefixComplete;
5463
6735
  res.prefixRune = re2.prefixRune;
6736
+ res.prefilter = re2.prefilter;
5464
6737
  return res;
5465
6738
  }
5466
6739
 
@@ -5503,8 +6776,10 @@ class RE2 {
5503
6776
  let re = Parser.parse(expr, mode);
5504
6777
  const maxCap = re.maxCap();
5505
6778
  re = Simplify.simplify(re);
6779
+ const prefilter = PrefilterTree.build(re);
5506
6780
  const prog = Compiler.compileRegexp(re);
5507
6781
  const re2 = new RE2(expr, prog, maxCap, longest);
6782
+ re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
5508
6783
  const [prefixCompl, prefixStr] = prog.prefix();
5509
6784
  re2.prefixComplete = prefixCompl;
5510
6785
  re2.prefix = prefixStr;
@@ -5536,12 +6811,78 @@ class RE2 {
5536
6811
  this.prefixComplete = false; // true if prefix is the entire regexp
5537
6812
  this.prefixRune = 0; // first rune in prefix
5538
6813
  this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
5539
- this.dfa = new DFA(prog); // Initialize the Lazy DFA
6814
+ this.dfa = new DFA(this.prog); // initialize Lazy DFA
6815
+ this.onepass = OnePass.compile(this.prog); // compile OnePass
6816
+ this.prefilter = null;
6817
+ }
6818
+ matchPrefixComplete(input, pos, anchor, ncap) {
6819
+ // If strictly anchored, execution must start at 0
6820
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
6821
+ return null;
6822
+ }
6823
+ let matchStart = -1;
6824
+ let matchEnd = -1;
6825
+ const pLen = input.prefixLength(this);
6826
+ if (anchor === RE2Flags.UNANCHORED) {
6827
+ const idx = input.index(this, pos);
6828
+ if (idx < 0) return null;
6829
+ matchStart = pos + idx;
6830
+ matchEnd = matchStart + pLen;
6831
+ } else if (anchor === RE2Flags.ANCHOR_BOTH) {
6832
+ if (input.endPos() !== pLen) return null;
6833
+ const idx = input.index(this, 0);
6834
+ if (idx !== 0) return null;
6835
+ matchStart = 0;
6836
+ matchEnd = pLen;
6837
+ } else if (anchor === RE2Flags.ANCHOR_START) {
6838
+ const idx = input.index(this, 0);
6839
+ if (idx !== 0) return null;
6840
+ matchStart = 0;
6841
+ matchEnd = pLen;
6842
+ }
6843
+ if (matchStart < 0) return null;
6844
+
6845
+ // If captures are requested (e.g. findSubmatch instead of test), populate bounds
6846
+ if (ncap > 0) {
6847
+ const matchcap = new Int32Array(ncap).fill(-1);
6848
+ matchcap[0] = matchStart;
6849
+ matchcap[1] = matchEnd;
6850
+ return Array.from(matchcap);
6851
+ }
6852
+ return []; // Matched successfully, but no capture data requested
5540
6853
  }
5541
6854
  executeEngine(input, pos, anchor, ncap) {
6855
+ // LITERAL FAST PATH
6856
+ // If the entire regex is just a literal string (and no nested capture boundaries are requested),
6857
+ // bypass all state machines and execute via V8's blistering fast native indexOf
6858
+ if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
6859
+ return this.matchPrefixComplete(input, pos, anchor, ncap);
6860
+ }
6861
+
6862
+ // PREFILTER FAST PATH
6863
+ // If the unanchored query requires specific literal strings (e.g. "a.*b"),
6864
+ // verify those strings exist using high-speed JS string searches before waking up the state engines.
6865
+ if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
6866
+ if (!this.prefilter.eval(input, pos)) {
6867
+ return null;
6868
+ }
6869
+ }
6870
+
6871
+ // FAST PATH: OnePass DFA engine.
6872
+ // If compiled successfully, it perfectly supports capture groups
6873
+ // and is blisteringly fast since it skips thread queues completely.
6874
+ if (this.onepass !== null) {
6875
+ return OnePass.execute(this, input, pos, anchor, ncap);
6876
+ }
6877
+
5542
6878
  // If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
5543
6879
  // We must use the NFA.
5544
6880
  if (ncap > 0) {
6881
+ // Backtracker bit-state execution bounds check
6882
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6883
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6884
+ }
6885
+ // NFA execution
5545
6886
  return this.doExecuteNFA(input, pos, anchor, ncap);
5546
6887
  }
5547
6888
  const dfaResult = this.dfa.match(input, pos, anchor);
@@ -5550,6 +6891,11 @@ class RE2 {
5550
6891
  return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
5551
6892
  }
5552
6893
 
6894
+ // Backtracker bit-state execution bounds check
6895
+ if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
6896
+ return Backtracker.execute(this, input, pos, anchor, ncap);
6897
+ }
6898
+
5553
6899
  // Fallback to NFA
5554
6900
  return this.doExecuteNFA(input, pos, anchor, ncap);
5555
6901
  }
@@ -6130,6 +7476,50 @@ class RE2 {
6130
7476
  }
6131
7477
  }
6132
7478
 
7479
+ class RE2Set {
7480
+ constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
7481
+ this.anchor = anchor;
7482
+ this.flags = flags;
7483
+ this.regexps = [];
7484
+ this.prog = null;
7485
+ this.dfa = null;
7486
+ this.dummyRe2 = null;
7487
+ }
7488
+ add(pattern) {
7489
+ if (this.prog) {
7490
+ throw new RE2JSCompileException('Cannot add patterns after compile');
7491
+ }
7492
+ const re = Parser.parse(pattern, this.flags);
7493
+ this.regexps.push(Simplify.simplify(re));
7494
+ return this.regexps.length - 1;
7495
+ }
7496
+ compile() {
7497
+ if (this.prog) return;
7498
+ this.prog = Compiler.compileSet(this.regexps);
7499
+ this.dfa = new DFA(this.prog);
7500
+ this.dummyRe2 = {
7501
+ prog: this.prog,
7502
+ cond: this.prog.startCond(),
7503
+ prefix: '',
7504
+ prefixRune: 0,
7505
+ longest: false
7506
+ };
7507
+ }
7508
+ match(input) {
7509
+ if (!this.prog) this.compile();
7510
+ const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7511
+
7512
+ // Fast path: Try the blistering fast DFA
7513
+ const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
7514
+ if (dfaResult !== null) return dfaResult;
7515
+
7516
+ // Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
7517
+ const machine = Machine.fromRE2(this.dummyRe2);
7518
+ machine.init(0);
7519
+ return machine.matchSet(machineInput, 0, this.anchor);
7520
+ }
7521
+ }
7522
+
6133
7523
  /**
6134
7524
  * Transform JS regex string to RE2 regex string
6135
7525
  */
@@ -6212,7 +7602,8 @@ class TranslateRegExpString {
6212
7602
  default:
6213
7603
  {
6214
7604
  result += '\\';
6215
- let symSize = Utils.charCount(ch.codePointAt(0));
7605
+ let cp = data.codePointAt(i + 1);
7606
+ let symSize = Utils.charCount(cp);
6216
7607
  result += data.substring(i + 1, i + 1 + symSize);
6217
7608
  i += symSize + 1;
6218
7609
  continue;
@@ -6232,7 +7623,8 @@ class TranslateRegExpString {
6232
7623
  continue;
6233
7624
  }
6234
7625
  }
6235
- let symSize = Utils.charCount(ch.codePointAt(0));
7626
+ let cp = data.codePointAt(i);
7627
+ let symSize = Utils.charCount(cp);
6236
7628
  result += data.substring(i, i + symSize);
6237
7629
  i += symSize;
6238
7630
  }
@@ -6595,5 +7987,7 @@ exports.RE2JSCompileException = RE2JSCompileException;
6595
7987
  exports.RE2JSException = RE2JSException;
6596
7988
  exports.RE2JSFlagsException = RE2JSFlagsException;
6597
7989
  exports.RE2JSGroupException = RE2JSGroupException;
7990
+ exports.RE2JSInternalException = RE2JSInternalException;
6598
7991
  exports.RE2JSSyntaxException = RE2JSSyntaxException;
7992
+ exports.RE2Set = RE2Set;
6599
7993
  //# sourceMappingURL=index.cjs.cjs.map