re2js 2.0.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -30
- package/build/index.cjs.cjs +1557 -163
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +71 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +1556 -164
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +1557 -163
- package/build/index.umd.js.map +1 -1
- package/package.json +2 -2
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.1.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -54,6 +54,23 @@ class RE2Flags {
|
|
|
54
54
|
/**
|
|
55
55
|
* Various constants and helper for unicode codepoints.
|
|
56
56
|
*/
|
|
57
|
+
const ASCII_SIZE = 128;
|
|
58
|
+
const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
|
|
59
|
+
const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
|
|
60
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
61
|
+
if (i >= 97 && i <= 122) {
|
|
62
|
+
// a-z
|
|
63
|
+
ASCII_TO_UPPER[i] = i - 32;
|
|
64
|
+
} else {
|
|
65
|
+
ASCII_TO_UPPER[i] = i;
|
|
66
|
+
}
|
|
67
|
+
if (i >= 65 && i <= 90) {
|
|
68
|
+
// A-Z
|
|
69
|
+
ASCII_TO_LOWER[i] = i + 32;
|
|
70
|
+
} else {
|
|
71
|
+
ASCII_TO_LOWER[i] = i;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
57
74
|
class Codepoint {
|
|
58
75
|
// codePointAt(0)
|
|
59
76
|
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
@@ -61,6 +78,7 @@ class Codepoint {
|
|
|
61
78
|
// convert unicode codepoint to upper case codepoint
|
|
62
79
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
63
80
|
static toUpperCase(codepoint) {
|
|
81
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
|
|
64
82
|
const s = String.fromCodePoint(codepoint).toUpperCase();
|
|
65
83
|
if (s.length > 1) {
|
|
66
84
|
return codepoint;
|
|
@@ -75,6 +93,7 @@ class Codepoint {
|
|
|
75
93
|
// convert unicode codepoint to lower case codepoint
|
|
76
94
|
// return same codepoint, if cannot do it (or codepoint not have lower variation)
|
|
77
95
|
static toLowerCase(codepoint) {
|
|
96
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
|
|
78
97
|
const s = String.fromCodePoint(codepoint).toLowerCase();
|
|
79
98
|
if (s.length > 1) {
|
|
80
99
|
return codepoint;
|
|
@@ -197,9 +216,11 @@ class UnicodeTables {
|
|
|
197
216
|
C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
|
|
198
217
|
Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
|
|
199
218
|
Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
|
|
219
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
|
|
200
220
|
Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
|
|
201
221
|
Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
|
|
202
222
|
L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
223
|
+
LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
|
|
203
224
|
Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
|
|
204
225
|
Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
|
|
205
226
|
Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
@@ -408,8 +429,11 @@ class UnicodeTables {
|
|
|
408
429
|
Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
|
|
409
430
|
});
|
|
410
431
|
static FOLD_CATEGORIES = new LazyMap({
|
|
411
|
-
|
|
412
|
-
|
|
432
|
+
C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
433
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
434
|
+
L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
435
|
+
LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
436
|
+
Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
|
|
413
437
|
Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
|
|
414
438
|
Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
|
|
415
439
|
M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
@@ -418,7 +442,9 @@ class UnicodeTables {
|
|
|
418
442
|
static FOLD_SCRIPT = new LazyMap({
|
|
419
443
|
Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
|
|
420
444
|
Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
|
|
421
|
-
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
|
|
445
|
+
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
446
|
+
Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
|
|
447
|
+
Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
|
|
422
448
|
});
|
|
423
449
|
}
|
|
424
450
|
|
|
@@ -672,7 +698,7 @@ class Utils {
|
|
|
672
698
|
|
|
673
699
|
// Returns the array of runes in the specified Java UTF-16 string.
|
|
674
700
|
static stringToRunes(str) {
|
|
675
|
-
return String(str)
|
|
701
|
+
return Array.from(String(str)).map(s => s.codePointAt(0));
|
|
676
702
|
}
|
|
677
703
|
|
|
678
704
|
// Returns the Java UTF-16 string containing the single rune |r|.
|
|
@@ -943,6 +969,14 @@ class MachineInputBase {
|
|
|
943
969
|
endPos() {
|
|
944
970
|
return this.end;
|
|
945
971
|
}
|
|
972
|
+
hasString() {
|
|
973
|
+
return false;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
// Helper for the exact-literal fast-path execution router
|
|
977
|
+
prefixLength() {
|
|
978
|
+
return 0;
|
|
979
|
+
}
|
|
946
980
|
}
|
|
947
981
|
|
|
948
982
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
@@ -954,6 +988,14 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
954
988
|
this.start = start;
|
|
955
989
|
this.end = end;
|
|
956
990
|
}
|
|
991
|
+
hasString(prefilter, pos) {
|
|
992
|
+
const target = prefilter.bytes;
|
|
993
|
+
if (target.length === 0) return true;
|
|
994
|
+
|
|
995
|
+
// Reuse the high-speed indexOf method already implemented below
|
|
996
|
+
const idx = this.indexOf(this.bytes, target, this.start + pos);
|
|
997
|
+
return idx !== -1 && idx <= this.end - target.length;
|
|
998
|
+
}
|
|
957
999
|
|
|
958
1000
|
// Returns the rune at the specified index; the units are
|
|
959
1001
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1030,10 +1072,10 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1030
1072
|
indexOf(source, target, fromIndex = 0) {
|
|
1031
1073
|
let targetLength = target.length;
|
|
1032
1074
|
if (targetLength === 0) {
|
|
1033
|
-
return -1;
|
|
1075
|
+
return fromIndex <= this.end ? fromIndex : -1;
|
|
1034
1076
|
}
|
|
1035
|
-
let
|
|
1036
|
-
for (let i = fromIndex; i <=
|
|
1077
|
+
let limit = this.end - targetLength;
|
|
1078
|
+
for (let i = fromIndex; i <= limit; i++) {
|
|
1037
1079
|
for (let j = 0; j < targetLength; j++) {
|
|
1038
1080
|
if (source[i + j] !== target[j]) {
|
|
1039
1081
|
break;
|
|
@@ -1044,6 +1086,9 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1044
1086
|
}
|
|
1045
1087
|
return -1;
|
|
1046
1088
|
}
|
|
1089
|
+
prefixLength(re2) {
|
|
1090
|
+
return re2.prefixUTF8.length;
|
|
1091
|
+
}
|
|
1047
1092
|
}
|
|
1048
1093
|
|
|
1049
1094
|
// |pos| and |width| are in JS "char" units.
|
|
@@ -1054,6 +1099,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1054
1099
|
this.start = start;
|
|
1055
1100
|
this.end = end;
|
|
1056
1101
|
}
|
|
1102
|
+
hasString(prefilter, pos) {
|
|
1103
|
+
const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
|
|
1104
|
+
return idx !== -1 && idx <= this.end - prefilter.str.length;
|
|
1105
|
+
}
|
|
1057
1106
|
|
|
1058
1107
|
// Returns the rune at the specified index; the units are
|
|
1059
1108
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1099,6 +1148,9 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1099
1148
|
const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
|
|
1100
1149
|
return Utils.emptyOpContext(r1, r2);
|
|
1101
1150
|
}
|
|
1151
|
+
prefixLength(re2) {
|
|
1152
|
+
return re2.prefix.length;
|
|
1153
|
+
}
|
|
1102
1154
|
}
|
|
1103
1155
|
class MachineInput {
|
|
1104
1156
|
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
@@ -1189,6 +1241,17 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
1189
1241
|
}
|
|
1190
1242
|
}
|
|
1191
1243
|
|
|
1244
|
+
/**
|
|
1245
|
+
* An exception thrown for internal engine errors, such as corrupted bytecodes.
|
|
1246
|
+
*/
|
|
1247
|
+
class RE2JSInternalException extends RE2JSException {
|
|
1248
|
+
/** @param {string} message */
|
|
1249
|
+
constructor(message) {
|
|
1250
|
+
super(message);
|
|
1251
|
+
this.name = 'RE2JSInternalException';
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1192
1255
|
/**
|
|
1193
1256
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
1194
1257
|
*
|
|
@@ -1391,6 +1454,23 @@ class Matcher {
|
|
|
1391
1454
|
}
|
|
1392
1455
|
return this.substring(start, end);
|
|
1393
1456
|
}
|
|
1457
|
+
|
|
1458
|
+
/**
|
|
1459
|
+
* Returns a dictionary map of all named capturing groups and their matched values.
|
|
1460
|
+
* If a group was not matched, its value will be `null`.
|
|
1461
|
+
* @returns {Record<string, string|null>}
|
|
1462
|
+
*/
|
|
1463
|
+
getNamedGroups() {
|
|
1464
|
+
if (!this.hasMatch) {
|
|
1465
|
+
throw new RE2JSGroupException('perhaps no match attempted');
|
|
1466
|
+
}
|
|
1467
|
+
const result = Object.create(null);
|
|
1468
|
+
for (const name of Object.keys(this.namedGroups)) {
|
|
1469
|
+
result[name] = this.group(name);
|
|
1470
|
+
}
|
|
1471
|
+
return result;
|
|
1472
|
+
}
|
|
1473
|
+
|
|
1394
1474
|
/**
|
|
1395
1475
|
* Returns the number of subgroups in this pattern.
|
|
1396
1476
|
*
|
|
@@ -1815,16 +1895,20 @@ class Inst {
|
|
|
1815
1895
|
}
|
|
1816
1896
|
return r === r0;
|
|
1817
1897
|
}
|
|
1818
|
-
|
|
1819
|
-
//
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1898
|
+
const len = this.runes.length;
|
|
1899
|
+
// If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
|
|
1900
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1901
|
+
for (let j = 0; j < len; j += 2) {
|
|
1902
|
+
if (r < this.runes[j]) {
|
|
1903
|
+
return false;
|
|
1904
|
+
}
|
|
1905
|
+
if (r <= this.runes[j + 1]) {
|
|
1906
|
+
return true;
|
|
1907
|
+
}
|
|
1826
1908
|
}
|
|
1909
|
+
return false; // Stop here
|
|
1827
1910
|
}
|
|
1911
|
+
|
|
1828
1912
|
// Otherwise binary search.
|
|
1829
1913
|
let lo = 0;
|
|
1830
1914
|
let hi = this.runes.length / 2 | 0;
|
|
@@ -1842,6 +1926,40 @@ class Inst {
|
|
|
1842
1926
|
}
|
|
1843
1927
|
return false;
|
|
1844
1928
|
}
|
|
1929
|
+
|
|
1930
|
+
// matchRunePos checks whether the instruction matches (and consumes) r.
|
|
1931
|
+
// If so, it returns the index of the matching rune pair.
|
|
1932
|
+
// If not, it returns -1.
|
|
1933
|
+
matchRunePos(r) {
|
|
1934
|
+
if (this.runes.length === 1) {
|
|
1935
|
+
const r0 = this.runes[0];
|
|
1936
|
+
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
1937
|
+
return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
|
|
1938
|
+
}
|
|
1939
|
+
return r === r0 ? 0 : -1;
|
|
1940
|
+
}
|
|
1941
|
+
const len = this.runes.length;
|
|
1942
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1943
|
+
for (let j = 0; j < len; j += 2) {
|
|
1944
|
+
if (r < this.runes[j]) return -1;
|
|
1945
|
+
if (r <= this.runes[j + 1]) return Math.floor(j / 2);
|
|
1946
|
+
}
|
|
1947
|
+
return -1;
|
|
1948
|
+
}
|
|
1949
|
+
let lo = 0;
|
|
1950
|
+
let hi = Math.floor(len / 2);
|
|
1951
|
+
while (lo < hi) {
|
|
1952
|
+
const m = lo + hi >> 1;
|
|
1953
|
+
const c = this.runes[2 * m];
|
|
1954
|
+
if (c <= r) {
|
|
1955
|
+
if (r <= this.runes[2 * m + 1]) return m;
|
|
1956
|
+
lo = m + 1;
|
|
1957
|
+
} else {
|
|
1958
|
+
hi = m;
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
return -1;
|
|
1962
|
+
}
|
|
1845
1963
|
/**
|
|
1846
1964
|
*
|
|
1847
1965
|
* @returns {string}
|
|
@@ -1857,7 +1975,7 @@ class Inst {
|
|
|
1857
1975
|
case Inst.EMPTY_WIDTH:
|
|
1858
1976
|
return `empty ${this.arg} -> ${this.out}`;
|
|
1859
1977
|
case Inst.MATCH:
|
|
1860
|
-
return
|
|
1978
|
+
return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
|
|
1861
1979
|
case Inst.FAIL:
|
|
1862
1980
|
return 'fail';
|
|
1863
1981
|
case Inst.NOP:
|
|
@@ -1883,7 +2001,7 @@ class Inst {
|
|
|
1883
2001
|
class Thread {
|
|
1884
2002
|
constructor() {
|
|
1885
2003
|
this.inst = null;
|
|
1886
|
-
this.cap =
|
|
2004
|
+
this.cap = null; // Initialized to Int32Array later
|
|
1887
2005
|
}
|
|
1888
2006
|
}
|
|
1889
2007
|
|
|
@@ -1911,9 +2029,11 @@ class Queue {
|
|
|
1911
2029
|
return j;
|
|
1912
2030
|
}
|
|
1913
2031
|
clear() {
|
|
1914
|
-
|
|
1915
|
-
this.
|
|
1916
|
-
|
|
2032
|
+
// Prevent memory leaks by nulling out used object references
|
|
2033
|
+
for (let i = 0; i < this.size; i++) {
|
|
2034
|
+
this.denseThreads[i] = null;
|
|
2035
|
+
}
|
|
2036
|
+
// The sparse set logic safely ignores stale integers in Typed Arrays.
|
|
1917
2037
|
this.size = 0;
|
|
1918
2038
|
}
|
|
1919
2039
|
toString() {
|
|
@@ -1942,7 +2062,8 @@ class Machine {
|
|
|
1942
2062
|
m.pool = [];
|
|
1943
2063
|
m.poolSize = 0;
|
|
1944
2064
|
m.matched = false;
|
|
1945
|
-
|
|
2065
|
+
// Use Int32Array instead of standard JS array
|
|
2066
|
+
m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
|
|
1946
2067
|
m.ncap = 0;
|
|
1947
2068
|
return m;
|
|
1948
2069
|
}
|
|
@@ -1956,27 +2077,30 @@ class Machine {
|
|
|
1956
2077
|
if (ncap > this.matchcap.length) {
|
|
1957
2078
|
this.initNewCap(ncap);
|
|
1958
2079
|
} else {
|
|
1959
|
-
this.resetCap(
|
|
2080
|
+
this.resetCap();
|
|
1960
2081
|
}
|
|
1961
2082
|
}
|
|
1962
|
-
|
|
2083
|
+
|
|
2084
|
+
// Wipes existing typed array memory without reallocating
|
|
2085
|
+
resetCap() {
|
|
1963
2086
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1964
2087
|
const t = this.pool[i];
|
|
1965
|
-
t.cap
|
|
2088
|
+
t.cap.fill(0);
|
|
1966
2089
|
}
|
|
1967
2090
|
}
|
|
1968
2091
|
initNewCap(ncap) {
|
|
1969
2092
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1970
2093
|
const t = this.pool[i];
|
|
1971
|
-
t.cap =
|
|
2094
|
+
t.cap = new Int32Array(ncap);
|
|
1972
2095
|
}
|
|
1973
|
-
this.matchcap =
|
|
2096
|
+
this.matchcap = new Int32Array(ncap);
|
|
1974
2097
|
}
|
|
1975
2098
|
submatches() {
|
|
1976
2099
|
if (this.ncap === 0) {
|
|
1977
2100
|
return Utils.emptyInts();
|
|
1978
2101
|
}
|
|
1979
|
-
|
|
2102
|
+
// Use subarray() to create a zero-allocation view before converting
|
|
2103
|
+
return Array.from(this.matchcap.subarray(0, this.ncap));
|
|
1980
2104
|
}
|
|
1981
2105
|
|
|
1982
2106
|
// alloc() allocates a new thread with the given instruction.
|
|
@@ -1988,6 +2112,7 @@ class Machine {
|
|
|
1988
2112
|
t = this.pool[this.poolSize];
|
|
1989
2113
|
} else {
|
|
1990
2114
|
t = new Thread();
|
|
2115
|
+
t.cap = new Int32Array(this.matchcap.length);
|
|
1991
2116
|
}
|
|
1992
2117
|
t.inst = inst;
|
|
1993
2118
|
return t;
|
|
@@ -2017,7 +2142,7 @@ class Machine {
|
|
|
2017
2142
|
return false;
|
|
2018
2143
|
}
|
|
2019
2144
|
this.matched = false;
|
|
2020
|
-
this.matchcap
|
|
2145
|
+
this.matchcap.fill(-1);
|
|
2021
2146
|
let runq = this.q0;
|
|
2022
2147
|
let nextq = this.q1;
|
|
2023
2148
|
let r = input.step(pos);
|
|
@@ -2088,6 +2213,85 @@ class Machine {
|
|
|
2088
2213
|
this.freeQueue(nextq);
|
|
2089
2214
|
return this.matched;
|
|
2090
2215
|
}
|
|
2216
|
+
matchSet(input, pos, anchor) {
|
|
2217
|
+
const startCond = this.re2.cond;
|
|
2218
|
+
if (startCond === Utils.EMPTY_ALL) return [];
|
|
2219
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2220
|
+
return [];
|
|
2221
|
+
}
|
|
2222
|
+
let runq = this.q0;
|
|
2223
|
+
let nextq = this.q1;
|
|
2224
|
+
let r = input.step(pos);
|
|
2225
|
+
let rune = r >> 3;
|
|
2226
|
+
let width = r & 7;
|
|
2227
|
+
let rune1 = -1;
|
|
2228
|
+
let width1 = 0;
|
|
2229
|
+
if (r !== MachineInputBase.EOF()) {
|
|
2230
|
+
r = input.step(pos + width);
|
|
2231
|
+
rune1 = r >> 3;
|
|
2232
|
+
width1 = r & 7;
|
|
2233
|
+
}
|
|
2234
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
2235
|
+
const matches = new Set();
|
|
2236
|
+
while (true) {
|
|
2237
|
+
if (runq.isEmpty()) {
|
|
2238
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
|
|
2239
|
+
}
|
|
2240
|
+
if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
|
|
2241
|
+
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2242
|
+
}
|
|
2243
|
+
const nextPos = pos + width;
|
|
2244
|
+
flag = input.context(nextPos);
|
|
2245
|
+
for (let j = 0; j < runq.size; j++) {
|
|
2246
|
+
let t = runq.denseThreads[j];
|
|
2247
|
+
if (t === null) continue;
|
|
2248
|
+
const i = t.inst;
|
|
2249
|
+
let add = false;
|
|
2250
|
+
switch (i.op) {
|
|
2251
|
+
case Inst.MATCH:
|
|
2252
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
|
|
2253
|
+
matches.add(i.arg); // Record the matched Set ID
|
|
2254
|
+
break;
|
|
2255
|
+
case Inst.RUNE:
|
|
2256
|
+
add = i.matchRune(rune);
|
|
2257
|
+
break;
|
|
2258
|
+
case Inst.RUNE1:
|
|
2259
|
+
add = rune === i.runes[0];
|
|
2260
|
+
break;
|
|
2261
|
+
case Inst.RUNE_ANY:
|
|
2262
|
+
add = true;
|
|
2263
|
+
break;
|
|
2264
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2265
|
+
add = rune !== Codepoint.CODES.get('\n');
|
|
2266
|
+
break;
|
|
2267
|
+
default:
|
|
2268
|
+
throw new RE2JSInternalException('bad inst');
|
|
2269
|
+
}
|
|
2270
|
+
if (add) {
|
|
2271
|
+
t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
|
|
2272
|
+
}
|
|
2273
|
+
if (t !== null) {
|
|
2274
|
+
this.freeThread(t);
|
|
2275
|
+
runq.denseThreads[j] = null;
|
|
2276
|
+
}
|
|
2277
|
+
}
|
|
2278
|
+
runq.clear();
|
|
2279
|
+
if (width === 0) break;
|
|
2280
|
+
pos += width;
|
|
2281
|
+
rune = rune1;
|
|
2282
|
+
width = width1;
|
|
2283
|
+
if (rune !== -1) {
|
|
2284
|
+
r = input.step(pos + width);
|
|
2285
|
+
rune1 = r >> 3;
|
|
2286
|
+
width1 = r & 7;
|
|
2287
|
+
}
|
|
2288
|
+
const tmpq = runq;
|
|
2289
|
+
runq = nextq;
|
|
2290
|
+
nextq = tmpq;
|
|
2291
|
+
}
|
|
2292
|
+
this.freeQueue(nextq);
|
|
2293
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2294
|
+
}
|
|
2091
2295
|
step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
|
|
2092
2296
|
const longest = this.re2.longest;
|
|
2093
2297
|
for (let j = 0; j < runq.size; j++) {
|
|
@@ -2108,7 +2312,9 @@ class Machine {
|
|
|
2108
2312
|
}
|
|
2109
2313
|
if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
|
|
2110
2314
|
t.cap[1] = pos;
|
|
2111
|
-
|
|
2315
|
+
// Using subarray creates a fast view, avoiding a full array copy
|
|
2316
|
+
// until the submatches are finalized at the very end.
|
|
2317
|
+
this.matchcap.set(t.cap.subarray(0, this.ncap));
|
|
2112
2318
|
}
|
|
2113
2319
|
if (!longest) {
|
|
2114
2320
|
this.freeQueue(runq, j + 1);
|
|
@@ -2128,7 +2334,7 @@ class Machine {
|
|
|
2128
2334
|
add = c !== Codepoint.CODES.get('\n');
|
|
2129
2335
|
break;
|
|
2130
2336
|
default:
|
|
2131
|
-
throw new
|
|
2337
|
+
throw new RE2JSInternalException('bad inst');
|
|
2132
2338
|
}
|
|
2133
2339
|
if (add) {
|
|
2134
2340
|
t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
|
|
@@ -2186,6 +2392,7 @@ class Machine {
|
|
|
2186
2392
|
t.inst = inst;
|
|
2187
2393
|
}
|
|
2188
2394
|
if (this.ncap > 0 && t.cap !== cap) {
|
|
2395
|
+
// Direct assignment utilizing Typed Array performance
|
|
2189
2396
|
for (let c = 0; c < this.ncap; c++) {
|
|
2190
2397
|
t.cap[c] = cap[c];
|
|
2191
2398
|
}
|
|
@@ -2220,20 +2427,23 @@ const arraysEqual = (a, b) => {
|
|
|
2220
2427
|
return true;
|
|
2221
2428
|
};
|
|
2222
2429
|
class DFAState {
|
|
2223
|
-
constructor(nfaStates, isMatch) {
|
|
2430
|
+
constructor(nfaStates, isMatch, matchIDs = []) {
|
|
2224
2431
|
this.nfaStates = nfaStates; // Int32Array of Instruction PCs
|
|
2225
2432
|
this.isMatch = isMatch; // Boolean
|
|
2433
|
+
this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
|
|
2226
2434
|
this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
|
|
2227
2435
|
this.nextMap = new Map(); // Cache of Char -> DFAState
|
|
2228
2436
|
}
|
|
2229
2437
|
}
|
|
2230
2438
|
class DFA {
|
|
2439
|
+
static MAX_CACHE_CLEARS = 5;
|
|
2231
2440
|
constructor(prog) {
|
|
2232
2441
|
this.prog = prog;
|
|
2233
2442
|
this.stateCache = new Map(); // hash(number) -> DFAState[]
|
|
2234
2443
|
this.stateCount = 0; // Tracks total states for memory limits
|
|
2235
2444
|
this.startState = null;
|
|
2236
2445
|
this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
|
|
2446
|
+
this.cacheClears = 0; // Track thrashing
|
|
2237
2447
|
this.failed = false; // mark if DFA cannot work with provided prog
|
|
2238
2448
|
}
|
|
2239
2449
|
|
|
@@ -2242,6 +2452,7 @@ class DFA {
|
|
|
2242
2452
|
const closure = new Set();
|
|
2243
2453
|
const stack = [...pcs];
|
|
2244
2454
|
let isMatch = false;
|
|
2455
|
+
const matchIDs = [];
|
|
2245
2456
|
while (stack.length > 0) {
|
|
2246
2457
|
const pc = stack.pop();
|
|
2247
2458
|
if (closure.has(pc)) continue;
|
|
@@ -2250,6 +2461,7 @@ class DFA {
|
|
|
2250
2461
|
switch (inst.op) {
|
|
2251
2462
|
case Inst.MATCH:
|
|
2252
2463
|
isMatch = true;
|
|
2464
|
+
if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
|
|
2253
2465
|
break;
|
|
2254
2466
|
case Inst.ALT:
|
|
2255
2467
|
case Inst.ALT_MATCH:
|
|
@@ -2267,9 +2479,11 @@ class DFA {
|
|
|
2267
2479
|
}
|
|
2268
2480
|
}
|
|
2269
2481
|
const sortedPCs = Int32Array.from(closure).sort();
|
|
2482
|
+
matchIDs.sort((a, b) => a - b);
|
|
2270
2483
|
return {
|
|
2271
2484
|
pcs: sortedPCs,
|
|
2272
|
-
isMatch
|
|
2485
|
+
isMatch,
|
|
2486
|
+
matchIDs
|
|
2273
2487
|
};
|
|
2274
2488
|
}
|
|
2275
2489
|
|
|
@@ -2295,6 +2509,8 @@ class DFA {
|
|
|
2295
2509
|
bucket = [];
|
|
2296
2510
|
this.stateCache.set(hash, bucket);
|
|
2297
2511
|
}
|
|
2512
|
+
|
|
2513
|
+
// DFA already failed once - exit
|
|
2298
2514
|
if (this.failed) return null;
|
|
2299
2515
|
|
|
2300
2516
|
// Safety: prevent memory exhaustion from state explosion
|
|
@@ -2303,12 +2519,18 @@ class DFA {
|
|
|
2303
2519
|
this.stateCache.clear();
|
|
2304
2520
|
this.stateCount = 0;
|
|
2305
2521
|
this.startState = null;
|
|
2306
|
-
this.
|
|
2522
|
+
this.cacheClears++;
|
|
2523
|
+
|
|
2524
|
+
// If this regex causes continuous cache thrashing, permanently fall back to NFA
|
|
2525
|
+
// to avoid spending CPU cycles constantly rebuilding the DFA tree.
|
|
2526
|
+
if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
|
|
2527
|
+
this.failed = true;
|
|
2528
|
+
}
|
|
2307
2529
|
return null;
|
|
2308
2530
|
}
|
|
2309
2531
|
|
|
2310
2532
|
// State not found, create it and add to bucket
|
|
2311
|
-
const state = new DFAState(sortedPCs, closureResult.isMatch);
|
|
2533
|
+
const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
|
|
2312
2534
|
bucket.push(state);
|
|
2313
2535
|
this.stateCount++;
|
|
2314
2536
|
return state;
|
|
@@ -2328,76 +2550,808 @@ class DFA {
|
|
|
2328
2550
|
return state.nextMap.get(key);
|
|
2329
2551
|
}
|
|
2330
2552
|
}
|
|
2331
|
-
const nextPCs = [];
|
|
2332
|
-
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2333
|
-
const pc = state.nfaStates[i];
|
|
2334
|
-
const inst = this.prog.getInst(pc);
|
|
2335
|
-
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2336
|
-
nextPCs.push(inst.out);
|
|
2553
|
+
const nextPCs = [];
|
|
2554
|
+
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2555
|
+
const pc = state.nfaStates[i];
|
|
2556
|
+
const inst = this.prog.getInst(pc);
|
|
2557
|
+
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2558
|
+
nextPCs.push(inst.out);
|
|
2559
|
+
}
|
|
2560
|
+
}
|
|
2561
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
2562
|
+
nextPCs.push(this.prog.start);
|
|
2563
|
+
}
|
|
2564
|
+
const nextState = this.getState(nextPCs);
|
|
2565
|
+
|
|
2566
|
+
// Cache the result
|
|
2567
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2568
|
+
state.nextAscii[charCode] = nextState;
|
|
2569
|
+
} else {
|
|
2570
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2571
|
+
state.nextMap.set(key, nextState);
|
|
2572
|
+
}
|
|
2573
|
+
return nextState;
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
// The hot loop: Execute the Lazy DFA
|
|
2577
|
+
match(input, pos, anchor) {
|
|
2578
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2579
|
+
return false;
|
|
2580
|
+
}
|
|
2581
|
+
if (!this.startState) {
|
|
2582
|
+
this.startState = this.getState([this.prog.start]);
|
|
2583
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2584
|
+
}
|
|
2585
|
+
let endPos = input.endPos();
|
|
2586
|
+
let currentState = this.startState;
|
|
2587
|
+
if (currentState.isMatch) {
|
|
2588
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2589
|
+
if (pos === endPos) return true;
|
|
2590
|
+
} else {
|
|
2591
|
+
return true;
|
|
2592
|
+
}
|
|
2593
|
+
}
|
|
2594
|
+
let i = pos;
|
|
2595
|
+
while (i < endPos) {
|
|
2596
|
+
const r = input.step(i);
|
|
2597
|
+
const rune = r >> 3;
|
|
2598
|
+
const width = r & 7;
|
|
2599
|
+
|
|
2600
|
+
// prevent infinite loop on EOF
|
|
2601
|
+
if (width === 0) {
|
|
2602
|
+
break;
|
|
2603
|
+
}
|
|
2604
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2605
|
+
|
|
2606
|
+
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2607
|
+
if (currentState === null) return null;
|
|
2608
|
+
if (currentState.isMatch) {
|
|
2609
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2610
|
+
if (i + width === endPos) return true;
|
|
2611
|
+
} else {
|
|
2612
|
+
return true;
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
|
|
2616
|
+
// If we hit a dead end, and anchored, fail early
|
|
2617
|
+
if (currentState.nfaStates.length === 0) {
|
|
2618
|
+
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2619
|
+
}
|
|
2620
|
+
i += width;
|
|
2621
|
+
}
|
|
2622
|
+
return false;
|
|
2623
|
+
}
|
|
2624
|
+
|
|
2625
|
+
// The hot loop for evaluating Multi-Pattern Sets
|
|
2626
|
+
matchSet(input, pos, anchor) {
|
|
2627
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2628
|
+
return [];
|
|
2629
|
+
}
|
|
2630
|
+
if (!this.startState) {
|
|
2631
|
+
this.startState = this.getState([this.prog.start]);
|
|
2632
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2633
|
+
}
|
|
2634
|
+
let endPos = input.endPos();
|
|
2635
|
+
let currentState = this.startState;
|
|
2636
|
+
const matches = new Set();
|
|
2637
|
+
const checkMatch = (state, currentPos) => {
|
|
2638
|
+
if (state.isMatch) {
|
|
2639
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2640
|
+
if (currentPos === endPos) {
|
|
2641
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2642
|
+
}
|
|
2643
|
+
} else {
|
|
2644
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
};
|
|
2648
|
+
checkMatch(currentState, pos);
|
|
2649
|
+
let i = pos;
|
|
2650
|
+
while (i < endPos) {
|
|
2651
|
+
const r = input.step(i);
|
|
2652
|
+
const rune = r >> 3;
|
|
2653
|
+
const width = r & 7;
|
|
2654
|
+
if (width === 0) break;
|
|
2655
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2656
|
+
if (currentState === null) return null; // Bailout to NFA
|
|
2657
|
+
|
|
2658
|
+
i += width;
|
|
2659
|
+
checkMatch(currentState, i);
|
|
2660
|
+
if (currentState.nfaStates.length === 0) {
|
|
2661
|
+
if (anchor !== RE2Flags.UNANCHORED) break;
|
|
2662
|
+
}
|
|
2663
|
+
}
|
|
2664
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2665
|
+
}
|
|
2666
|
+
}
|
|
2667
|
+
|
|
2668
|
+
const VISITED_BITS = 32;
|
|
2669
|
+
const MAX_BACKTRACK_PROG = 500;
|
|
2670
|
+
const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
|
|
2671
|
+
const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
|
|
2672
|
+
|
|
2673
|
+
class BitState {
|
|
2674
|
+
constructor() {
|
|
2675
|
+
this.end = 0;
|
|
2676
|
+
this.cap = new Int32Array(0);
|
|
2677
|
+
this.matchcap = new Int32Array(0);
|
|
2678
|
+
this.ncap = 0;
|
|
2679
|
+
|
|
2680
|
+
// Parallel arrays acting as the backtrack job stack
|
|
2681
|
+
this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2682
|
+
this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
|
|
2683
|
+
this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2684
|
+
this.jobLen = 0;
|
|
2685
|
+
this.visited = new Uint32Array(0);
|
|
2686
|
+
}
|
|
2687
|
+
reset(prog, end, ncap) {
|
|
2688
|
+
this.end = end;
|
|
2689
|
+
this.jobLen = 0;
|
|
2690
|
+
this.ncap = ncap;
|
|
2691
|
+
|
|
2692
|
+
// Bitwise shift (>>> 5) instead of Math.floor( / 32)
|
|
2693
|
+
const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
|
|
2694
|
+
if (this.visited.length < visitedSize) {
|
|
2695
|
+
this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
|
|
2696
|
+
} else {
|
|
2697
|
+
this.visited.fill(0, 0, visitedSize);
|
|
2698
|
+
}
|
|
2699
|
+
if (this.cap.length < ncap) {
|
|
2700
|
+
// Must explicitly fill with -1 as Int32Array defaults to 0
|
|
2701
|
+
this.cap = new Int32Array(ncap).fill(-1);
|
|
2702
|
+
} else {
|
|
2703
|
+
this.cap.fill(-1, 0, ncap);
|
|
2704
|
+
}
|
|
2705
|
+
if (this.matchcap.length < ncap) {
|
|
2706
|
+
this.matchcap = new Int32Array(ncap).fill(-1);
|
|
2707
|
+
} else {
|
|
2708
|
+
this.matchcap.fill(-1, 0, ncap);
|
|
2709
|
+
}
|
|
2710
|
+
}
|
|
2711
|
+
shouldVisit(pc, pos) {
|
|
2712
|
+
const n = pc * (this.end + 1) + pos;
|
|
2713
|
+
const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
|
|
2714
|
+
const mask = 1 << (n & 31); // Equivalent to n % 32
|
|
2715
|
+
|
|
2716
|
+
if ((this.visited[idx] & mask) !== 0) {
|
|
2717
|
+
return false;
|
|
2718
|
+
}
|
|
2719
|
+
this.visited[idx] |= mask;
|
|
2720
|
+
return true;
|
|
2721
|
+
}
|
|
2722
|
+
push(re2, pc, pos, arg) {
|
|
2723
|
+
if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
|
|
2724
|
+
if (this.jobLen >= this.jobPc.length) {
|
|
2725
|
+
const newSize = this.jobPc.length * 2;
|
|
2726
|
+
const newPc = new Int32Array(newSize);
|
|
2727
|
+
newPc.set(this.jobPc);
|
|
2728
|
+
this.jobPc = newPc;
|
|
2729
|
+
const newArg = new Uint8Array(newSize);
|
|
2730
|
+
newArg.set(this.jobArg);
|
|
2731
|
+
this.jobArg = newArg;
|
|
2732
|
+
const newPos = new Int32Array(newSize);
|
|
2733
|
+
newPos.set(this.jobPos);
|
|
2734
|
+
this.jobPos = newPos;
|
|
2735
|
+
}
|
|
2736
|
+
this.jobPc[this.jobLen] = pc;
|
|
2737
|
+
this.jobArg[this.jobLen] = arg ? 1 : 0;
|
|
2738
|
+
this.jobPos[this.jobLen] = pos;
|
|
2739
|
+
this.jobLen++;
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
tryBacktrack(re2, input, pc, pos, anchor) {
|
|
2743
|
+
const longest = re2.longest;
|
|
2744
|
+
this.push(re2, pc, pos, false);
|
|
2745
|
+
while (this.jobLen > 0) {
|
|
2746
|
+
this.jobLen--;
|
|
2747
|
+
let currentPc = this.jobPc[this.jobLen];
|
|
2748
|
+
let arg = this.jobArg[this.jobLen] === 1;
|
|
2749
|
+
let currentPos = this.jobPos[this.jobLen];
|
|
2750
|
+
let skipShouldVisit = true;
|
|
2751
|
+
while (true) {
|
|
2752
|
+
if (!skipShouldVisit) {
|
|
2753
|
+
if (!this.shouldVisit(currentPc, currentPos)) {
|
|
2754
|
+
break;
|
|
2755
|
+
}
|
|
2756
|
+
}
|
|
2757
|
+
skipShouldVisit = false;
|
|
2758
|
+
const inst = re2.prog.getInst(currentPc);
|
|
2759
|
+
switch (inst.op) {
|
|
2760
|
+
case Inst.FAIL:
|
|
2761
|
+
{
|
|
2762
|
+
throw new RE2JSInternalException('unexpected InstFail');
|
|
2763
|
+
}
|
|
2764
|
+
case Inst.ALT:
|
|
2765
|
+
{
|
|
2766
|
+
if (arg) {
|
|
2767
|
+
arg = false;
|
|
2768
|
+
currentPc = inst.arg;
|
|
2769
|
+
continue;
|
|
2770
|
+
} else {
|
|
2771
|
+
this.push(re2, currentPc, currentPos, true);
|
|
2772
|
+
currentPc = inst.out;
|
|
2773
|
+
continue;
|
|
2774
|
+
}
|
|
2775
|
+
}
|
|
2776
|
+
case Inst.ALT_MATCH:
|
|
2777
|
+
{
|
|
2778
|
+
const outInst = re2.prog.getInst(inst.out);
|
|
2779
|
+
if (Inst.isRuneOp(outInst.op)) {
|
|
2780
|
+
this.push(re2, inst.arg, currentPos, false);
|
|
2781
|
+
currentPc = inst.out;
|
|
2782
|
+
continue;
|
|
2783
|
+
}
|
|
2784
|
+
this.push(re2, inst.out, this.end, false);
|
|
2785
|
+
currentPc = inst.arg;
|
|
2786
|
+
continue;
|
|
2787
|
+
}
|
|
2788
|
+
case Inst.RUNE:
|
|
2789
|
+
{
|
|
2790
|
+
const r = input.step(currentPos);
|
|
2791
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2792
|
+
if (!inst.matchRune(r >> 3)) break;
|
|
2793
|
+
currentPos += r & 7;
|
|
2794
|
+
currentPc = inst.out;
|
|
2795
|
+
continue;
|
|
2796
|
+
}
|
|
2797
|
+
case Inst.RUNE1:
|
|
2798
|
+
{
|
|
2799
|
+
const r = input.step(currentPos);
|
|
2800
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2801
|
+
if (r >> 3 !== inst.runes[0]) break;
|
|
2802
|
+
currentPos += r & 7;
|
|
2803
|
+
currentPc = inst.out;
|
|
2804
|
+
continue;
|
|
2805
|
+
}
|
|
2806
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2807
|
+
{
|
|
2808
|
+
const r = input.step(currentPos);
|
|
2809
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2810
|
+
if (r >> 3 === 10) break;
|
|
2811
|
+
currentPos += r & 7;
|
|
2812
|
+
currentPc = inst.out;
|
|
2813
|
+
continue;
|
|
2814
|
+
}
|
|
2815
|
+
case Inst.RUNE_ANY:
|
|
2816
|
+
{
|
|
2817
|
+
const r = input.step(currentPos);
|
|
2818
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2819
|
+
currentPos += r & 7;
|
|
2820
|
+
currentPc = inst.out;
|
|
2821
|
+
continue;
|
|
2822
|
+
}
|
|
2823
|
+
case Inst.CAPTURE:
|
|
2824
|
+
{
|
|
2825
|
+
if (arg) {
|
|
2826
|
+
this.cap[inst.arg] = currentPos;
|
|
2827
|
+
break;
|
|
2828
|
+
} else {
|
|
2829
|
+
if (inst.arg < this.ncap) {
|
|
2830
|
+
this.push(re2, currentPc, this.cap[inst.arg], true);
|
|
2831
|
+
this.cap[inst.arg] = currentPos;
|
|
2832
|
+
}
|
|
2833
|
+
currentPc = inst.out;
|
|
2834
|
+
continue;
|
|
2835
|
+
}
|
|
2836
|
+
}
|
|
2837
|
+
case Inst.EMPTY_WIDTH:
|
|
2838
|
+
{
|
|
2839
|
+
const flag = input.context(currentPos);
|
|
2840
|
+
if ((inst.arg & ~flag) !== 0) break;
|
|
2841
|
+
currentPc = inst.out;
|
|
2842
|
+
continue;
|
|
2843
|
+
}
|
|
2844
|
+
case Inst.NOP:
|
|
2845
|
+
{
|
|
2846
|
+
currentPc = inst.out;
|
|
2847
|
+
continue;
|
|
2848
|
+
}
|
|
2849
|
+
case Inst.MATCH:
|
|
2850
|
+
{
|
|
2851
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
|
|
2852
|
+
break;
|
|
2853
|
+
}
|
|
2854
|
+
if (this.ncap === 0) return true;
|
|
2855
|
+
if (this.ncap > 1) {
|
|
2856
|
+
this.cap[1] = currentPos;
|
|
2857
|
+
}
|
|
2858
|
+
const old = this.matchcap[1];
|
|
2859
|
+
if (old === -1 || longest && currentPos > 0 && currentPos > old) {
|
|
2860
|
+
this.matchcap.set(this.cap);
|
|
2861
|
+
}
|
|
2862
|
+
if (!longest) return true;
|
|
2863
|
+
if (currentPos === this.end) return true;
|
|
2864
|
+
break;
|
|
2865
|
+
}
|
|
2866
|
+
default:
|
|
2867
|
+
{
|
|
2868
|
+
throw new RE2JSInternalException('bad inst');
|
|
2869
|
+
}
|
|
2870
|
+
}
|
|
2871
|
+
break;
|
|
2872
|
+
}
|
|
2873
|
+
}
|
|
2874
|
+
return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
|
|
2875
|
+
}
|
|
2876
|
+
}
|
|
2877
|
+
const bitStatePool = [];
|
|
2878
|
+
class Backtracker {
|
|
2879
|
+
static shouldBacktrack(prog) {
|
|
2880
|
+
return prog.numInst() <= MAX_BACKTRACK_PROG;
|
|
2881
|
+
}
|
|
2882
|
+
static maxBitStateLen(prog) {
|
|
2883
|
+
if (!Backtracker.shouldBacktrack(prog)) {
|
|
2884
|
+
return 0;
|
|
2885
|
+
}
|
|
2886
|
+
return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
|
|
2887
|
+
}
|
|
2888
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
2889
|
+
const startCond = re2.cond;
|
|
2890
|
+
if (startCond === Utils.EMPTY_ALL) {
|
|
2891
|
+
return null;
|
|
2892
|
+
}
|
|
2893
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2894
|
+
return null;
|
|
2895
|
+
}
|
|
2896
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
|
|
2897
|
+
return null;
|
|
2898
|
+
}
|
|
2899
|
+
const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
|
|
2900
|
+
const end = input.endPos();
|
|
2901
|
+
b.reset(re2.prog, end, ncap);
|
|
2902
|
+
let matched = false;
|
|
2903
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2904
|
+
if (b.ncap > 0) {
|
|
2905
|
+
b.cap[0] = pos;
|
|
2906
|
+
}
|
|
2907
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2908
|
+
matched = true;
|
|
2909
|
+
}
|
|
2910
|
+
} else {
|
|
2911
|
+
let width = -1;
|
|
2912
|
+
for (; pos <= end && width !== 0; pos += width) {
|
|
2913
|
+
if (re2.prefix.length > 0) {
|
|
2914
|
+
const advance = input.index(re2, pos);
|
|
2915
|
+
if (advance < 0) {
|
|
2916
|
+
break;
|
|
2917
|
+
}
|
|
2918
|
+
pos += advance;
|
|
2919
|
+
}
|
|
2920
|
+
if (b.ncap > 0) {
|
|
2921
|
+
b.cap[0] = pos;
|
|
2922
|
+
}
|
|
2923
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2924
|
+
matched = true;
|
|
2925
|
+
break;
|
|
2926
|
+
}
|
|
2927
|
+
const r = input.step(pos);
|
|
2928
|
+
width = r === MachineInputBase.EOF() ? 0 : r & 7;
|
|
2929
|
+
}
|
|
2930
|
+
}
|
|
2931
|
+
if (!matched) {
|
|
2932
|
+
bitStatePool.push(b);
|
|
2933
|
+
return null;
|
|
2934
|
+
}
|
|
2935
|
+
|
|
2936
|
+
// Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
|
|
2937
|
+
const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
|
|
2938
|
+
bitStatePool.push(b);
|
|
2939
|
+
return result;
|
|
2940
|
+
}
|
|
2941
|
+
}
|
|
2942
|
+
|
|
2943
|
+
class QueueOnePass {
|
|
2944
|
+
constructor(size) {
|
|
2945
|
+
this.sparse = new Uint32Array(size);
|
|
2946
|
+
this.dense = new Uint32Array(size);
|
|
2947
|
+
this.size = 0;
|
|
2948
|
+
this.nextIndex = 0;
|
|
2949
|
+
}
|
|
2950
|
+
empty() {
|
|
2951
|
+
return this.nextIndex >= this.size;
|
|
2952
|
+
}
|
|
2953
|
+
next() {
|
|
2954
|
+
return this.dense[this.nextIndex++];
|
|
2955
|
+
}
|
|
2956
|
+
clear() {
|
|
2957
|
+
this.size = 0;
|
|
2958
|
+
this.nextIndex = 0;
|
|
2959
|
+
}
|
|
2960
|
+
contains(u) {
|
|
2961
|
+
return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
|
|
2962
|
+
}
|
|
2963
|
+
insert(u) {
|
|
2964
|
+
if (!this.contains(u)) this.insertNew(u);
|
|
2965
|
+
}
|
|
2966
|
+
insertNew(u) {
|
|
2967
|
+
if (u >= this.sparse.length) return;
|
|
2968
|
+
this.sparse[u] = this.size;
|
|
2969
|
+
this.dense[this.size] = u;
|
|
2970
|
+
this.size++;
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
2973
|
+
const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
|
|
2974
|
+
const leftLen = leftRunes.length;
|
|
2975
|
+
const rightLen = rightRunes.length;
|
|
2976
|
+
let lx = 0,
|
|
2977
|
+
rx = 0;
|
|
2978
|
+
const merged = [];
|
|
2979
|
+
const next = [];
|
|
2980
|
+
let ok = true;
|
|
2981
|
+
let ix = -1;
|
|
2982
|
+
const extend = isLeft => {
|
|
2983
|
+
const newArray = isLeft ? leftRunes : rightRunes;
|
|
2984
|
+
const low = isLeft ? lx : rx;
|
|
2985
|
+
const pc = isLeft ? leftPC : rightPC;
|
|
2986
|
+
if (ix > 0 && newArray[low] <= merged[ix]) return false;
|
|
2987
|
+
merged.push(newArray[low], newArray[low + 1]);
|
|
2988
|
+
if (isLeft) lx += 2;else rx += 2;
|
|
2989
|
+
ix += 2;
|
|
2990
|
+
next.push(pc);
|
|
2991
|
+
return true;
|
|
2992
|
+
};
|
|
2993
|
+
while (lx < leftLen || rx < rightLen) {
|
|
2994
|
+
if (rx >= rightLen) {
|
|
2995
|
+
ok = extend(true);
|
|
2996
|
+
} else if (lx >= leftLen) {
|
|
2997
|
+
ok = extend(false);
|
|
2998
|
+
} else if (rightRunes[rx] < leftRunes[lx]) {
|
|
2999
|
+
ok = extend(false);
|
|
3000
|
+
} else {
|
|
3001
|
+
ok = extend(true);
|
|
3002
|
+
}
|
|
3003
|
+
if (!ok) return null;
|
|
3004
|
+
}
|
|
3005
|
+
return {
|
|
3006
|
+
merged,
|
|
3007
|
+
next
|
|
3008
|
+
};
|
|
3009
|
+
};
|
|
3010
|
+
class OnePassProg {
|
|
3011
|
+
constructor(prog) {
|
|
3012
|
+
this.start = prog.start;
|
|
3013
|
+
this.numCap = prog.numCap;
|
|
3014
|
+
this.inst = new Array(prog.inst.length);
|
|
3015
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3016
|
+
const orig = prog.inst[i];
|
|
3017
|
+
const inst = new Inst(orig.op);
|
|
3018
|
+
inst.out = orig.out;
|
|
3019
|
+
inst.arg = orig.arg;
|
|
3020
|
+
inst.runes = orig.runes ? orig.runes.slice() : [];
|
|
3021
|
+
inst.next = null;
|
|
3022
|
+
this.inst[i] = inst;
|
|
3023
|
+
}
|
|
3024
|
+
}
|
|
3025
|
+
}
|
|
3026
|
+
const onePassCopy = prog => {
|
|
3027
|
+
const p = new OnePassProg(prog);
|
|
3028
|
+
|
|
3029
|
+
// Rewrites one or more common Prog constructs that enable some otherwise
|
|
3030
|
+
// non-onepass Progs to be onepass.
|
|
3031
|
+
for (let pc = 0; pc < p.inst.length; pc++) {
|
|
3032
|
+
const inst = p.inst[pc];
|
|
3033
|
+
if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
|
|
3034
|
+
let pAOther = 'out';
|
|
3035
|
+
let pAAlt = 'arg';
|
|
3036
|
+
let instAlt = p.inst[inst[pAAlt]];
|
|
3037
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
|
|
3038
|
+
pAOther = 'arg';
|
|
3039
|
+
pAAlt = 'out';
|
|
3040
|
+
instAlt = p.inst[inst[pAAlt]];
|
|
3041
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
|
|
3042
|
+
}
|
|
3043
|
+
const instOther = p.inst[inst[pAOther]];
|
|
3044
|
+
if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
|
|
3045
|
+
let pBAlt = 'out';
|
|
3046
|
+
let pBOther = 'arg';
|
|
3047
|
+
let patch = false;
|
|
3048
|
+
if (instAlt.out === pc) {
|
|
3049
|
+
patch = true;
|
|
3050
|
+
} else if (instAlt.arg === pc) {
|
|
3051
|
+
patch = true;
|
|
3052
|
+
pBAlt = 'arg';
|
|
3053
|
+
pBOther = 'out';
|
|
3054
|
+
}
|
|
3055
|
+
if (patch) instAlt[pBAlt] = inst[pAOther];
|
|
3056
|
+
if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
|
|
3057
|
+
}
|
|
3058
|
+
return p;
|
|
3059
|
+
};
|
|
3060
|
+
const makeOnePass = p => {
|
|
3061
|
+
if (p.inst.length >= 1000) return null;
|
|
3062
|
+
const instQueue = new QueueOnePass(p.inst.length);
|
|
3063
|
+
const visitQueue = new QueueOnePass(p.inst.length);
|
|
3064
|
+
const onePassRunes = new Array(p.inst.length);
|
|
3065
|
+
const m = new Array(p.inst.length).fill(false);
|
|
3066
|
+
const check = pc => {
|
|
3067
|
+
let ok = true;
|
|
3068
|
+
const inst = p.inst[pc];
|
|
3069
|
+
if (visitQueue.contains(pc)) return true;
|
|
3070
|
+
visitQueue.insert(pc);
|
|
3071
|
+
switch (inst.op) {
|
|
3072
|
+
case Inst.ALT:
|
|
3073
|
+
case Inst.ALT_MATCH:
|
|
3074
|
+
{
|
|
3075
|
+
ok = check(inst.out) && check(inst.arg);
|
|
3076
|
+
let matchOut = m[inst.out];
|
|
3077
|
+
let matchArg = m[inst.arg];
|
|
3078
|
+
if (matchOut && matchArg) return false;
|
|
3079
|
+
if (matchArg) {
|
|
3080
|
+
const tempOut = inst.out;
|
|
3081
|
+
inst.out = inst.arg;
|
|
3082
|
+
inst.arg = tempOut;
|
|
3083
|
+
const tempMatch = matchOut;
|
|
3084
|
+
matchOut = matchArg;
|
|
3085
|
+
matchArg = tempMatch;
|
|
3086
|
+
}
|
|
3087
|
+
if (matchOut) {
|
|
3088
|
+
m[pc] = true;
|
|
3089
|
+
inst.op = Inst.ALT_MATCH;
|
|
3090
|
+
}
|
|
3091
|
+
const leftRunes = onePassRunes[inst.out] || [];
|
|
3092
|
+
const rightRunes = onePassRunes[inst.arg] || [];
|
|
3093
|
+
const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
|
|
3094
|
+
if (!mergeRes) return false;
|
|
3095
|
+
onePassRunes[pc] = mergeRes.merged;
|
|
3096
|
+
inst.next = new Uint32Array(mergeRes.next);
|
|
3097
|
+
break;
|
|
3098
|
+
}
|
|
3099
|
+
case Inst.CAPTURE:
|
|
3100
|
+
case Inst.EMPTY_WIDTH:
|
|
3101
|
+
case Inst.NOP:
|
|
3102
|
+
{
|
|
3103
|
+
ok = check(inst.out);
|
|
3104
|
+
m[pc] = m[inst.out];
|
|
3105
|
+
onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
|
|
3106
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3107
|
+
break;
|
|
3108
|
+
}
|
|
3109
|
+
case Inst.MATCH:
|
|
3110
|
+
case Inst.FAIL:
|
|
3111
|
+
{
|
|
3112
|
+
m[pc] = inst.op === Inst.MATCH;
|
|
3113
|
+
break;
|
|
3114
|
+
}
|
|
3115
|
+
case Inst.RUNE:
|
|
3116
|
+
{
|
|
3117
|
+
m[pc] = false;
|
|
3118
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3119
|
+
instQueue.insert(inst.out);
|
|
3120
|
+
if (!inst.runes || inst.runes.length === 0) {
|
|
3121
|
+
onePassRunes[pc] = [];
|
|
3122
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3123
|
+
break;
|
|
3124
|
+
}
|
|
3125
|
+
let runes = [];
|
|
3126
|
+
if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3127
|
+
const r0 = inst.runes[0];
|
|
3128
|
+
runes.push(r0, r0);
|
|
3129
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3130
|
+
runes.push(r1, r1);
|
|
3131
|
+
}
|
|
3132
|
+
runes.sort((a, b) => a - b);
|
|
3133
|
+
} else {
|
|
3134
|
+
runes.push(...inst.runes);
|
|
3135
|
+
}
|
|
3136
|
+
onePassRunes[pc] = runes;
|
|
3137
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3138
|
+
inst.op = Inst.RUNE;
|
|
3139
|
+
break;
|
|
3140
|
+
}
|
|
3141
|
+
case Inst.RUNE1:
|
|
3142
|
+
{
|
|
3143
|
+
m[pc] = false;
|
|
3144
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3145
|
+
instQueue.insert(inst.out);
|
|
3146
|
+
let runes = [];
|
|
3147
|
+
if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3148
|
+
const r0 = inst.runes[0];
|
|
3149
|
+
runes.push(r0, r0);
|
|
3150
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3151
|
+
runes.push(r1, r1);
|
|
3152
|
+
}
|
|
3153
|
+
runes.sort((a, b) => a - b);
|
|
3154
|
+
} else {
|
|
3155
|
+
runes.push(inst.runes[0], inst.runes[0]);
|
|
3156
|
+
}
|
|
3157
|
+
onePassRunes[pc] = runes;
|
|
3158
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3159
|
+
inst.op = Inst.RUNE;
|
|
3160
|
+
break;
|
|
3161
|
+
}
|
|
3162
|
+
case Inst.RUNE_ANY:
|
|
3163
|
+
{
|
|
3164
|
+
m[pc] = false;
|
|
3165
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3166
|
+
instQueue.insert(inst.out);
|
|
3167
|
+
onePassRunes[pc] = [0, Unicode.MAX_RUNE];
|
|
3168
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3169
|
+
break;
|
|
3170
|
+
}
|
|
3171
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3172
|
+
{
|
|
3173
|
+
m[pc] = false;
|
|
3174
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3175
|
+
instQueue.insert(inst.out);
|
|
3176
|
+
onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
|
|
3177
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3178
|
+
break;
|
|
3179
|
+
}
|
|
3180
|
+
}
|
|
3181
|
+
return ok;
|
|
3182
|
+
};
|
|
3183
|
+
instQueue.clear();
|
|
3184
|
+
instQueue.insert(p.start);
|
|
3185
|
+
while (!instQueue.empty()) {
|
|
3186
|
+
visitQueue.clear();
|
|
3187
|
+
const pc = instQueue.next();
|
|
3188
|
+
if (!check(pc)) return null;
|
|
3189
|
+
}
|
|
3190
|
+
for (let i = 0; i < p.inst.length; i++) {
|
|
3191
|
+
if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
|
|
3192
|
+
}
|
|
3193
|
+
return p;
|
|
3194
|
+
};
|
|
3195
|
+
const cleanupOnePass = (p, original) => {
|
|
3196
|
+
for (let ix = 0; ix < original.inst.length; ix++) {
|
|
3197
|
+
const instOriginal = original.inst[ix];
|
|
3198
|
+
switch (instOriginal.op) {
|
|
3199
|
+
case Inst.ALT:
|
|
3200
|
+
case Inst.ALT_MATCH:
|
|
3201
|
+
case Inst.RUNE:
|
|
3202
|
+
break;
|
|
3203
|
+
case Inst.CAPTURE:
|
|
3204
|
+
case Inst.EMPTY_WIDTH:
|
|
3205
|
+
case Inst.NOP:
|
|
3206
|
+
case Inst.MATCH:
|
|
3207
|
+
case Inst.FAIL:
|
|
3208
|
+
p.inst[ix].next = null;
|
|
3209
|
+
break;
|
|
3210
|
+
case Inst.RUNE1:
|
|
3211
|
+
case Inst.RUNE_ANY:
|
|
3212
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3213
|
+
p.inst[ix].next = null;
|
|
3214
|
+
p.inst[ix].op = instOriginal.op;
|
|
3215
|
+
p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
|
|
3216
|
+
break;
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
};
|
|
3220
|
+
class OnePass {
|
|
3221
|
+
static compile(prog) {
|
|
3222
|
+
if (prog.start === 0) return null;
|
|
3223
|
+
const startInst = prog.inst[prog.start];
|
|
3224
|
+
// onepass regexps must be strictly anchored
|
|
3225
|
+
if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
|
|
3226
|
+
return null;
|
|
3227
|
+
}
|
|
3228
|
+
let hasAlt = false;
|
|
3229
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3230
|
+
if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
|
|
3231
|
+
hasAlt = true;
|
|
3232
|
+
break;
|
|
2337
3233
|
}
|
|
2338
3234
|
}
|
|
2339
|
-
|
|
2340
|
-
|
|
3235
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3236
|
+
const inst = prog.inst[i];
|
|
3237
|
+
const opOut = prog.inst[inst.out].op;
|
|
3238
|
+
switch (inst.op) {
|
|
3239
|
+
case Inst.ALT:
|
|
3240
|
+
case Inst.ALT_MATCH:
|
|
3241
|
+
if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
|
|
3242
|
+
return null;
|
|
3243
|
+
}
|
|
3244
|
+
break;
|
|
3245
|
+
case Inst.EMPTY_WIDTH:
|
|
3246
|
+
if (opOut === Inst.MATCH) {
|
|
3247
|
+
if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
|
|
3248
|
+
continue;
|
|
3249
|
+
}
|
|
3250
|
+
return null;
|
|
3251
|
+
}
|
|
3252
|
+
break;
|
|
3253
|
+
default:
|
|
3254
|
+
if (opOut === Inst.MATCH && hasAlt) {
|
|
3255
|
+
return null;
|
|
3256
|
+
}
|
|
3257
|
+
break;
|
|
3258
|
+
}
|
|
2341
3259
|
}
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
state.nextAscii[charCode] = nextState;
|
|
2347
|
-
} else {
|
|
2348
|
-
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2349
|
-
state.nextMap.set(key, nextState);
|
|
3260
|
+
let p = onePassCopy(prog);
|
|
3261
|
+
p = makeOnePass(p);
|
|
3262
|
+
if (p !== null) {
|
|
3263
|
+
cleanupOnePass(p, prog);
|
|
2350
3264
|
}
|
|
2351
|
-
return
|
|
3265
|
+
return p;
|
|
2352
3266
|
}
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
if (
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
let
|
|
2364
|
-
let
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
3267
|
+
static next(inst, r) {
|
|
3268
|
+
const nextIdx = inst.matchRunePos(r);
|
|
3269
|
+
if (nextIdx >= 0) return inst.next[nextIdx];
|
|
3270
|
+
if (inst.op === Inst.ALT_MATCH) return inst.out;
|
|
3271
|
+
return 0; // fail
|
|
3272
|
+
}
|
|
3273
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
3274
|
+
const onepass = re2.onepass;
|
|
3275
|
+
if (!onepass) return null;
|
|
3276
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
3277
|
+
let matched = false;
|
|
3278
|
+
let r = input.step(pos);
|
|
3279
|
+
let rune = r >> 3;
|
|
3280
|
+
let width = r & 7;
|
|
3281
|
+
let r1 = MachineInputBase.EOF();
|
|
3282
|
+
let rune1 = -1;
|
|
3283
|
+
let width1 = 0;
|
|
3284
|
+
if (r !== MachineInputBase.EOF()) {
|
|
3285
|
+
r1 = input.step(pos + width);
|
|
3286
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3287
|
+
rune1 = r1 >> 3;
|
|
3288
|
+
width1 = r1 & 7;
|
|
2370
3289
|
}
|
|
2371
3290
|
}
|
|
2372
|
-
let
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
3291
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
3292
|
+
let pc = onepass.start;
|
|
3293
|
+
let inst;
|
|
3294
|
+
while (true) {
|
|
3295
|
+
inst = onepass.inst[pc];
|
|
3296
|
+
pc = inst.out;
|
|
3297
|
+
switch (inst.op) {
|
|
3298
|
+
case Inst.MATCH:
|
|
3299
|
+
{
|
|
3300
|
+
matched = true;
|
|
3301
|
+
if (matchcap.length > 0) {
|
|
3302
|
+
matchcap[0] = 0;
|
|
3303
|
+
matchcap[1] = pos;
|
|
3304
|
+
}
|
|
3305
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
3306
|
+
}
|
|
3307
|
+
case Inst.RUNE:
|
|
3308
|
+
if (!inst.matchRune(rune)) return null;
|
|
3309
|
+
break;
|
|
3310
|
+
case Inst.RUNE1:
|
|
3311
|
+
if (rune !== inst.runes[0]) return null;
|
|
3312
|
+
break;
|
|
3313
|
+
case Inst.RUNE_ANY:
|
|
3314
|
+
break;
|
|
3315
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3316
|
+
if (rune === 10) return null;
|
|
3317
|
+
break;
|
|
3318
|
+
case Inst.ALT:
|
|
3319
|
+
case Inst.ALT_MATCH:
|
|
3320
|
+
pc = OnePass.next(inst, rune);
|
|
3321
|
+
continue;
|
|
3322
|
+
case Inst.FAIL:
|
|
3323
|
+
return null;
|
|
3324
|
+
case Inst.NOP:
|
|
3325
|
+
continue;
|
|
3326
|
+
case Inst.EMPTY_WIDTH:
|
|
3327
|
+
if ((inst.arg & ~flag) !== 0) return null;
|
|
3328
|
+
continue;
|
|
3329
|
+
case Inst.CAPTURE:
|
|
3330
|
+
if (inst.arg < matchcap.length) {
|
|
3331
|
+
matchcap[inst.arg] = pos;
|
|
3332
|
+
}
|
|
3333
|
+
continue;
|
|
3334
|
+
default:
|
|
3335
|
+
throw new RE2JSInternalException('bad inst');
|
|
2381
3336
|
}
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
3337
|
+
if (width === 0) break;
|
|
3338
|
+
flag = Utils.emptyOpContext(rune, rune1);
|
|
3339
|
+
pos += width;
|
|
3340
|
+
rune = rune1;
|
|
3341
|
+
width = width1;
|
|
3342
|
+
if (rune !== -1) {
|
|
3343
|
+
r1 = input.step(pos + width);
|
|
3344
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3345
|
+
rune1 = r1 >> 3;
|
|
3346
|
+
width1 = r1 & 7;
|
|
2389
3347
|
} else {
|
|
2390
|
-
|
|
3348
|
+
rune1 = -1;
|
|
3349
|
+
width1 = 0;
|
|
2391
3350
|
}
|
|
2392
3351
|
}
|
|
2393
|
-
|
|
2394
|
-
// If we hit a dead end, and anchored, fail early
|
|
2395
|
-
if (currentState.nfaStates.length === 0) {
|
|
2396
|
-
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2397
|
-
}
|
|
2398
|
-
i += width;
|
|
2399
3352
|
}
|
|
2400
|
-
return
|
|
3353
|
+
if (!matched) return null;
|
|
3354
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
2401
3355
|
}
|
|
2402
3356
|
}
|
|
2403
3357
|
|
|
@@ -2482,7 +3436,7 @@ class Regexp {
|
|
|
2482
3436
|
this.max = 0; // max for REPEAT
|
|
2483
3437
|
this.cap = 0; // capturing index, for CAPTURE
|
|
2484
3438
|
this.name = null; // capturing name, for CAPTURE
|
|
2485
|
-
this.namedGroups =
|
|
3439
|
+
this.namedGroups = Object.create(null); // map of group name -> capturing index
|
|
2486
3440
|
}
|
|
2487
3441
|
reinit() {
|
|
2488
3442
|
this.flags = 0;
|
|
@@ -2492,7 +3446,7 @@ class Regexp {
|
|
|
2492
3446
|
this.min = 0;
|
|
2493
3447
|
this.max = 0;
|
|
2494
3448
|
this.name = null;
|
|
2495
|
-
this.namedGroups =
|
|
3449
|
+
this.namedGroups = Object.create(null);
|
|
2496
3450
|
}
|
|
2497
3451
|
toString() {
|
|
2498
3452
|
return this.appendTo();
|
|
@@ -2752,6 +3706,188 @@ class Regexp {
|
|
|
2752
3706
|
}
|
|
2753
3707
|
}
|
|
2754
3708
|
|
|
3709
|
+
class Prefilter {
|
|
3710
|
+
static Type = {
|
|
3711
|
+
NONE: 0,
|
|
3712
|
+
EXACT: 1,
|
|
3713
|
+
AND: 2,
|
|
3714
|
+
OR: 3
|
|
3715
|
+
};
|
|
3716
|
+
constructor(type) {
|
|
3717
|
+
this.type = type;
|
|
3718
|
+
this.subs = [];
|
|
3719
|
+
this.str = '';
|
|
3720
|
+
this.bytes = null;
|
|
3721
|
+
}
|
|
3722
|
+
eval(input, pos) {
|
|
3723
|
+
switch (this.type) {
|
|
3724
|
+
case Prefilter.Type.NONE:
|
|
3725
|
+
return true;
|
|
3726
|
+
case Prefilter.Type.EXACT:
|
|
3727
|
+
return input.hasString(this, pos);
|
|
3728
|
+
case Prefilter.Type.AND:
|
|
3729
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3730
|
+
if (!this.subs[i].eval(input, pos)) return false;
|
|
3731
|
+
}
|
|
3732
|
+
return true;
|
|
3733
|
+
case Prefilter.Type.OR:
|
|
3734
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3735
|
+
if (this.subs[i].eval(input, pos)) return true;
|
|
3736
|
+
}
|
|
3737
|
+
return false;
|
|
3738
|
+
default:
|
|
3739
|
+
return true;
|
|
3740
|
+
}
|
|
3741
|
+
}
|
|
3742
|
+
}
|
|
3743
|
+
class PrefilterTree {
|
|
3744
|
+
static build(re) {
|
|
3745
|
+
const pf = PrefilterTree.fromRegexp(re);
|
|
3746
|
+
return PrefilterTree.simplify(pf);
|
|
3747
|
+
}
|
|
3748
|
+
static fromRegexp(re) {
|
|
3749
|
+
if (!re) return new Prefilter(Prefilter.Type.NONE);
|
|
3750
|
+
switch (re.op) {
|
|
3751
|
+
case Regexp.Op.NO_MATCH:
|
|
3752
|
+
case Regexp.Op.EMPTY_MATCH:
|
|
3753
|
+
case Regexp.Op.BEGIN_LINE:
|
|
3754
|
+
case Regexp.Op.END_LINE:
|
|
3755
|
+
case Regexp.Op.BEGIN_TEXT:
|
|
3756
|
+
case Regexp.Op.END_TEXT:
|
|
3757
|
+
case Regexp.Op.WORD_BOUNDARY:
|
|
3758
|
+
case Regexp.Op.NO_WORD_BOUNDARY:
|
|
3759
|
+
case Regexp.Op.CHAR_CLASS:
|
|
3760
|
+
case Regexp.Op.ANY_CHAR_NOT_NL:
|
|
3761
|
+
case Regexp.Op.ANY_CHAR:
|
|
3762
|
+
{
|
|
3763
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3764
|
+
}
|
|
3765
|
+
case Regexp.Op.LITERAL:
|
|
3766
|
+
{
|
|
3767
|
+
if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
|
|
3768
|
+
// Skip case-folded literals for simplicity
|
|
3769
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3770
|
+
}
|
|
3771
|
+
const pf = new Prefilter(Prefilter.Type.EXACT);
|
|
3772
|
+
let str = '';
|
|
3773
|
+
for (let i = 0; i < re.runes.length; i++) {
|
|
3774
|
+
str += String.fromCodePoint(re.runes[i]);
|
|
3775
|
+
}
|
|
3776
|
+
pf.str = str;
|
|
3777
|
+
pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
|
|
3778
|
+
return pf;
|
|
3779
|
+
}
|
|
3780
|
+
case Regexp.Op.CAPTURE:
|
|
3781
|
+
case Regexp.Op.PLUS:
|
|
3782
|
+
{
|
|
3783
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3784
|
+
}
|
|
3785
|
+
case Regexp.Op.REPEAT:
|
|
3786
|
+
{
|
|
3787
|
+
if (re.min >= 1) {
|
|
3788
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3789
|
+
}
|
|
3790
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3791
|
+
}
|
|
3792
|
+
case Regexp.Op.CONCAT:
|
|
3793
|
+
{
|
|
3794
|
+
const pf = new Prefilter(Prefilter.Type.AND);
|
|
3795
|
+
for (const sub of re.subs) {
|
|
3796
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3797
|
+
}
|
|
3798
|
+
return pf;
|
|
3799
|
+
}
|
|
3800
|
+
case Regexp.Op.ALTERNATE:
|
|
3801
|
+
{
|
|
3802
|
+
const pf = new Prefilter(Prefilter.Type.OR);
|
|
3803
|
+
for (const sub of re.subs) {
|
|
3804
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3805
|
+
}
|
|
3806
|
+
return pf;
|
|
3807
|
+
}
|
|
3808
|
+
default:
|
|
3809
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3810
|
+
}
|
|
3811
|
+
}
|
|
3812
|
+
static simplify(pf) {
|
|
3813
|
+
if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
|
|
3814
|
+
return pf;
|
|
3815
|
+
}
|
|
3816
|
+
if (pf.type === Prefilter.Type.AND) {
|
|
3817
|
+
const newSubs = [];
|
|
3818
|
+
for (const sub of pf.subs) {
|
|
3819
|
+
const s = PrefilterTree.simplify(sub);
|
|
3820
|
+
if (s.type !== Prefilter.Type.NONE) {
|
|
3821
|
+
if (s.type === Prefilter.Type.AND) {
|
|
3822
|
+
newSubs.push(...s.subs);
|
|
3823
|
+
} else {
|
|
3824
|
+
newSubs.push(s);
|
|
3825
|
+
}
|
|
3826
|
+
}
|
|
3827
|
+
}
|
|
3828
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3829
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3830
|
+
pf.subs = newSubs;
|
|
3831
|
+
return pf;
|
|
3832
|
+
}
|
|
3833
|
+
if (pf.type === Prefilter.Type.OR) {
|
|
3834
|
+
const newSubs = [];
|
|
3835
|
+
for (const sub of pf.subs) {
|
|
3836
|
+
const s = PrefilterTree.simplify(sub);
|
|
3837
|
+
if (s.type === Prefilter.Type.NONE) {
|
|
3838
|
+
// If any branch of an OR has no requirements, the whole OR has no requirements
|
|
3839
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3840
|
+
}
|
|
3841
|
+
if (s.type === Prefilter.Type.OR) {
|
|
3842
|
+
newSubs.push(...s.subs);
|
|
3843
|
+
} else {
|
|
3844
|
+
newSubs.push(s);
|
|
3845
|
+
}
|
|
3846
|
+
}
|
|
3847
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3848
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3849
|
+
|
|
3850
|
+
// De-duplicate EXACT branches
|
|
3851
|
+
const seen = new Set();
|
|
3852
|
+
const uniqueSubs = [];
|
|
3853
|
+
for (const sub of newSubs) {
|
|
3854
|
+
if (sub.type === Prefilter.Type.EXACT) {
|
|
3855
|
+
if (!seen.has(sub.str)) {
|
|
3856
|
+
seen.add(sub.str);
|
|
3857
|
+
uniqueSubs.push(sub);
|
|
3858
|
+
}
|
|
3859
|
+
} else {
|
|
3860
|
+
uniqueSubs.push(sub);
|
|
3861
|
+
}
|
|
3862
|
+
}
|
|
3863
|
+
pf.subs = uniqueSubs;
|
|
3864
|
+
return pf;
|
|
3865
|
+
}
|
|
3866
|
+
return pf;
|
|
3867
|
+
}
|
|
3868
|
+
}
|
|
3869
|
+
|
|
3870
|
+
/**
|
|
3871
|
+
* A list of instruction pointers waiting to be patched.
|
|
3872
|
+
* Tracks both `head` and `tail` to allow O(1) appending during compilation.
|
|
3873
|
+
* * Values are encoded integers, not standard memory pointers:
|
|
3874
|
+
* - Program instruction index: `l >> 1`
|
|
3875
|
+
* - Patch `.out` field if: `(l & 1) === 0`
|
|
3876
|
+
* - Patch `.arg` field if: `(l & 1) === 1`
|
|
3877
|
+
* - `0` denotes an empty list.
|
|
3878
|
+
* * @see https://swtch.com/~rsc/regexp/regexp1.html
|
|
3879
|
+
*/
|
|
3880
|
+
class PatchList {
|
|
3881
|
+
/**
|
|
3882
|
+
* @param {number} head - Encoded pointer to the start of the patch list.
|
|
3883
|
+
* @param {number} tail - Encoded pointer to the end of the patch list.
|
|
3884
|
+
*/
|
|
3885
|
+
constructor(head = 0, tail = 0) {
|
|
3886
|
+
this.head = head;
|
|
3887
|
+
this.tail = tail;
|
|
3888
|
+
}
|
|
3889
|
+
}
|
|
3890
|
+
|
|
2755
3891
|
/**
|
|
2756
3892
|
* A Prog is a compiled regular expression program.
|
|
2757
3893
|
*/
|
|
@@ -2853,39 +3989,30 @@ class Prog {
|
|
|
2853
3989
|
return i.arg;
|
|
2854
3990
|
}
|
|
2855
3991
|
patch(l, val) {
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
3992
|
+
let head = l.head;
|
|
3993
|
+
while (head !== 0) {
|
|
3994
|
+
const i = this.inst[head >> 1];
|
|
3995
|
+
if ((head & 1) === 0) {
|
|
3996
|
+
head = i.out;
|
|
2860
3997
|
i.out = val;
|
|
2861
3998
|
} else {
|
|
2862
|
-
|
|
3999
|
+
head = i.arg;
|
|
2863
4000
|
i.arg = val;
|
|
2864
4001
|
}
|
|
2865
4002
|
}
|
|
2866
4003
|
}
|
|
2867
4004
|
append(l1, l2) {
|
|
2868
|
-
if (l1 === 0)
|
|
2869
|
-
|
|
2870
|
-
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
for (;;) {
|
|
2876
|
-
const next = this.next(last);
|
|
2877
|
-
if (next === 0) {
|
|
2878
|
-
break;
|
|
2879
|
-
}
|
|
2880
|
-
last = next;
|
|
2881
|
-
}
|
|
2882
|
-
const i = this.inst[last >> 1];
|
|
2883
|
-
if ((last & 1) === 0) {
|
|
2884
|
-
i.out = l2;
|
|
4005
|
+
if (l1.head === 0) return l2;
|
|
4006
|
+
if (l2.head === 0) return l1;
|
|
4007
|
+
|
|
4008
|
+
// We know exactly where the tail is
|
|
4009
|
+
const i = this.inst[l1.tail >> 1];
|
|
4010
|
+
if ((l1.tail & 1) === 0) {
|
|
4011
|
+
i.out = l2.head;
|
|
2885
4012
|
} else {
|
|
2886
|
-
i.arg = l2;
|
|
4013
|
+
i.arg = l2.head;
|
|
2887
4014
|
}
|
|
2888
|
-
return l1;
|
|
4015
|
+
return new PatchList(l1.head, l2.tail);
|
|
2889
4016
|
}
|
|
2890
4017
|
/**
|
|
2891
4018
|
*
|
|
@@ -2914,7 +4041,7 @@ class Prog {
|
|
|
2914
4041
|
* @class
|
|
2915
4042
|
*/
|
|
2916
4043
|
class Frag {
|
|
2917
|
-
constructor(i = 0, out =
|
|
4044
|
+
constructor(i = 0, out = new PatchList(), nullable = false) {
|
|
2918
4045
|
this.i = i; // an instruction address (pc).
|
|
2919
4046
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2920
4047
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2939,6 +4066,33 @@ class Compiler {
|
|
|
2939
4066
|
c.prog.start = f.i;
|
|
2940
4067
|
return c.prog;
|
|
2941
4068
|
}
|
|
4069
|
+
static compileSet(regexps) {
|
|
4070
|
+
const c = new Compiler();
|
|
4071
|
+
if (regexps.length === 0) {
|
|
4072
|
+
c.prog.start = c.newInst(Inst.FAIL).i;
|
|
4073
|
+
return c.prog;
|
|
4074
|
+
}
|
|
4075
|
+
let starts = [];
|
|
4076
|
+
for (let i = 0; i < regexps.length; i++) {
|
|
4077
|
+
const f = c.compile(regexps[i]);
|
|
4078
|
+
const m = c.newInst(Inst.MATCH);
|
|
4079
|
+
c.prog.getInst(m.i).arg = i; // Store the regex index
|
|
4080
|
+
c.prog.patch(f.out, m.i);
|
|
4081
|
+
starts.push(f.i);
|
|
4082
|
+
}
|
|
4083
|
+
|
|
4084
|
+
// Link starts together via ALT
|
|
4085
|
+
let start = starts[0];
|
|
4086
|
+
for (let i = 1; i < starts.length; i++) {
|
|
4087
|
+
const f = c.newInst(Inst.ALT);
|
|
4088
|
+
const inst = c.prog.getInst(f.i);
|
|
4089
|
+
inst.out = start;
|
|
4090
|
+
inst.arg = starts[i];
|
|
4091
|
+
start = f.i;
|
|
4092
|
+
}
|
|
4093
|
+
c.prog.start = start;
|
|
4094
|
+
return c.prog;
|
|
4095
|
+
}
|
|
2942
4096
|
constructor() {
|
|
2943
4097
|
this.prog = new Prog();
|
|
2944
4098
|
this.newInst(Inst.FAIL);
|
|
@@ -2951,7 +4105,7 @@ class Compiler {
|
|
|
2951
4105
|
// Returns a no-op fragment. Sometimes unavoidable.
|
|
2952
4106
|
nop() {
|
|
2953
4107
|
const f = this.newInst(Inst.NOP);
|
|
2954
|
-
f.out = f.i << 1;
|
|
4108
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2955
4109
|
return f;
|
|
2956
4110
|
}
|
|
2957
4111
|
fail() {
|
|
@@ -2962,7 +4116,7 @@ class Compiler {
|
|
|
2962
4116
|
// Given a fragment a, returns a fragment with capturing parens around a.
|
|
2963
4117
|
cap(arg) {
|
|
2964
4118
|
const f = this.newInst(Inst.CAPTURE);
|
|
2965
|
-
f.out = f.i << 1;
|
|
4119
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2966
4120
|
this.prog.getInst(f.i).arg = arg;
|
|
2967
4121
|
if (this.prog.numCap < arg + 1) {
|
|
2968
4122
|
this.prog.numCap = arg + 1;
|
|
@@ -3010,10 +4164,10 @@ class Compiler {
|
|
|
3010
4164
|
const i = this.prog.getInst(f.i);
|
|
3011
4165
|
if (nongreedy) {
|
|
3012
4166
|
i.arg = f1.i;
|
|
3013
|
-
f.out = f.i << 1;
|
|
4167
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3014
4168
|
} else {
|
|
3015
4169
|
i.out = f1.i;
|
|
3016
|
-
f.out = f.i << 1 | 1;
|
|
4170
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3017
4171
|
}
|
|
3018
4172
|
this.prog.patch(f1.out, f.i);
|
|
3019
4173
|
return f;
|
|
@@ -3025,10 +4179,10 @@ class Compiler {
|
|
|
3025
4179
|
const i = this.prog.getInst(f.i);
|
|
3026
4180
|
if (nongreedy) {
|
|
3027
4181
|
i.arg = f1.i;
|
|
3028
|
-
f.out = f.i << 1;
|
|
4182
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3029
4183
|
} else {
|
|
3030
4184
|
i.out = f1.i;
|
|
3031
|
-
f.out = f.i << 1 | 1;
|
|
4185
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3032
4186
|
}
|
|
3033
4187
|
f.out = this.prog.append(f.out, f1.out);
|
|
3034
4188
|
return f;
|
|
@@ -3051,7 +4205,7 @@ class Compiler {
|
|
|
3051
4205
|
empty(op) {
|
|
3052
4206
|
const f = this.newInst(Inst.EMPTY_WIDTH);
|
|
3053
4207
|
this.prog.getInst(f.i).arg = op;
|
|
3054
|
-
f.out = f.i << 1;
|
|
4208
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3055
4209
|
return f;
|
|
3056
4210
|
}
|
|
3057
4211
|
|
|
@@ -3066,7 +4220,7 @@ class Compiler {
|
|
|
3066
4220
|
flags &= -2;
|
|
3067
4221
|
}
|
|
3068
4222
|
i.arg = flags;
|
|
3069
|
-
f.out = f.i << 1;
|
|
4223
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3070
4224
|
if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
|
|
3071
4225
|
i.op = Inst.RUNE1;
|
|
3072
4226
|
} else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
|
|
@@ -3171,23 +4325,92 @@ class Simplify {
|
|
|
3171
4325
|
}
|
|
3172
4326
|
switch (re.op) {
|
|
3173
4327
|
case Regexp.Op.CAPTURE:
|
|
4328
|
+
{
|
|
4329
|
+
const sub = Simplify.simplify(re.subs[0]);
|
|
4330
|
+
if (sub !== re.subs[0]) {
|
|
4331
|
+
const nre = Regexp.fromRegexp(re);
|
|
4332
|
+
nre.runes = [];
|
|
4333
|
+
nre.subs = [sub];
|
|
4334
|
+
return nre;
|
|
4335
|
+
}
|
|
4336
|
+
return re;
|
|
4337
|
+
}
|
|
3174
4338
|
case Regexp.Op.CONCAT:
|
|
3175
4339
|
case Regexp.Op.ALTERNATE:
|
|
3176
4340
|
{
|
|
3177
|
-
|
|
4341
|
+
const newSubs = [];
|
|
4342
|
+
let changed = false;
|
|
3178
4343
|
for (let i = 0; i < re.subs.length; i++) {
|
|
3179
4344
|
const sub = re.subs[i];
|
|
3180
4345
|
const nsub = Simplify.simplify(sub);
|
|
3181
|
-
if (
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
4346
|
+
if (nsub !== sub) {
|
|
4347
|
+
changed = true;
|
|
4348
|
+
}
|
|
4349
|
+
if (re.op === Regexp.Op.CONCAT) {
|
|
4350
|
+
// If any part of a CONCAT is mathematically impossible,
|
|
4351
|
+
// the entire CONCAT sequence becomes impossible.
|
|
4352
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4353
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
4354
|
+
}
|
|
4355
|
+
// Drop empty 0-width match nodes entirely from sequences
|
|
4356
|
+
if (nsub.op === Regexp.Op.EMPTY_MATCH) {
|
|
4357
|
+
changed = true;
|
|
4358
|
+
continue;
|
|
4359
|
+
}
|
|
4360
|
+
// Flatten nested concatenations
|
|
4361
|
+
if (nsub.op === Regexp.Op.CONCAT) {
|
|
4362
|
+
changed = true;
|
|
4363
|
+
newSubs.push(...nsub.subs);
|
|
4364
|
+
continue;
|
|
4365
|
+
}
|
|
4366
|
+
} else if (re.op === Regexp.Op.ALTERNATE) {
|
|
4367
|
+
// Drop impossible branches from alternations
|
|
4368
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4369
|
+
changed = true;
|
|
4370
|
+
continue;
|
|
4371
|
+
}
|
|
4372
|
+
// Flatten nested alternations
|
|
4373
|
+
if (nsub.op === Regexp.Op.ALTERNATE) {
|
|
4374
|
+
changed = true;
|
|
4375
|
+
newSubs.push(...nsub.subs);
|
|
4376
|
+
continue;
|
|
4377
|
+
}
|
|
3185
4378
|
}
|
|
3186
|
-
|
|
3187
|
-
|
|
4379
|
+
newSubs.push(nsub);
|
|
4380
|
+
}
|
|
4381
|
+
if (changed) {
|
|
4382
|
+
// If we filtered out all nodes, return the mathematically correct fallback
|
|
4383
|
+
if (newSubs.length === 0) {
|
|
4384
|
+
return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
|
|
4385
|
+
}
|
|
4386
|
+
// If only 1 node remains, we don't need a CONCAT/ALT container at all
|
|
4387
|
+
if (newSubs.length === 1) {
|
|
4388
|
+
return newSubs[0];
|
|
3188
4389
|
}
|
|
4390
|
+
const nre = Regexp.fromRegexp(re);
|
|
4391
|
+
nre.runes = [];
|
|
4392
|
+
nre.subs = newSubs;
|
|
4393
|
+
return nre;
|
|
4394
|
+
}
|
|
4395
|
+
return re;
|
|
4396
|
+
}
|
|
4397
|
+
case Regexp.Op.CHAR_CLASS:
|
|
4398
|
+
{
|
|
4399
|
+
if (re.runes === null) return re;
|
|
4400
|
+
|
|
4401
|
+
// Empty character classes match nothing.
|
|
4402
|
+
if (re.runes.length === 0) {
|
|
4403
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
3189
4404
|
}
|
|
3190
|
-
|
|
4405
|
+
// Full character classes match everything.
|
|
4406
|
+
if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
|
|
4407
|
+
return new Regexp(Regexp.Op.ANY_CHAR);
|
|
4408
|
+
}
|
|
4409
|
+
// Standard catch-all except newline
|
|
4410
|
+
if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
|
|
4411
|
+
return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
|
|
4412
|
+
}
|
|
4413
|
+
return re;
|
|
3191
4414
|
}
|
|
3192
4415
|
case Regexp.Op.STAR:
|
|
3193
4416
|
case Regexp.Op.PLUS:
|
|
@@ -3224,7 +4447,9 @@ class Simplify {
|
|
|
3224
4447
|
}
|
|
3225
4448
|
subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
|
|
3226
4449
|
nre.subs = subs.slice(0);
|
|
3227
|
-
|
|
4450
|
+
|
|
4451
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4452
|
+
return Simplify.simplify(nre);
|
|
3228
4453
|
}
|
|
3229
4454
|
// Special case x{0} handled above.
|
|
3230
4455
|
|
|
@@ -3262,7 +4487,8 @@ class Simplify {
|
|
|
3262
4487
|
if (prefixSubs !== null) {
|
|
3263
4488
|
const prefix = new Regexp(Regexp.Op.CONCAT);
|
|
3264
4489
|
prefix.subs = prefixSubs.slice(0);
|
|
3265
|
-
|
|
4490
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4491
|
+
return Simplify.simplify(prefix);
|
|
3266
4492
|
}
|
|
3267
4493
|
|
|
3268
4494
|
// Some degenerate case like min > max or min < max < 0.
|
|
@@ -3295,6 +4521,13 @@ class Simplify {
|
|
|
3295
4521
|
return sub;
|
|
3296
4522
|
}
|
|
3297
4523
|
|
|
4524
|
+
// Handle impossible targets gracefully.
|
|
4525
|
+
// e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
|
|
4526
|
+
if (sub.op === Regexp.Op.NO_MATCH) {
|
|
4527
|
+
if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
|
|
4528
|
+
return new Regexp(Regexp.Op.EMPTY_MATCH);
|
|
4529
|
+
}
|
|
4530
|
+
|
|
3298
4531
|
// The operators are idempotent if the flags match.
|
|
3299
4532
|
if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
|
|
3300
4533
|
return sub;
|
|
@@ -3302,10 +4535,10 @@ class Simplify {
|
|
|
3302
4535
|
if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
|
|
3303
4536
|
return re;
|
|
3304
4537
|
}
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
return
|
|
4538
|
+
const nre = new Regexp(op);
|
|
4539
|
+
nre.flags = flags;
|
|
4540
|
+
nre.subs = [sub];
|
|
4541
|
+
return nre;
|
|
3309
4542
|
}
|
|
3310
4543
|
}
|
|
3311
4544
|
|
|
@@ -3651,16 +4884,6 @@ class CharClass {
|
|
|
3651
4884
|
}
|
|
3652
4885
|
}
|
|
3653
4886
|
|
|
3654
|
-
class Pair {
|
|
3655
|
-
static of(first, second) {
|
|
3656
|
-
return new Pair(first, second);
|
|
3657
|
-
}
|
|
3658
|
-
constructor(first, second) {
|
|
3659
|
-
this.first = first;
|
|
3660
|
-
this.second = second;
|
|
3661
|
-
}
|
|
3662
|
-
}
|
|
3663
|
-
|
|
3664
4887
|
// StringIterator: a stream of runes with an opaque cursor, permitting
|
|
3665
4888
|
// rewinding. The units of the cursor are not specified beyond the
|
|
3666
4889
|
// fact that ASCII characters are single width. (Cursor positions
|
|
@@ -3807,18 +5030,59 @@ class Parser {
|
|
|
3807
5030
|
// stride).
|
|
3808
5031
|
static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
|
|
3809
5032
|
|
|
5033
|
+
// Ascii tables
|
|
5034
|
+
static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
|
|
5035
|
+
static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
|
|
5036
|
+
// Old English long s (ſ), folds to S/s.
|
|
5037
|
+
0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
|
|
5038
|
+
]));
|
|
5039
|
+
|
|
3810
5040
|
// unicodeTable() returns the Unicode RangeTable identified by name
|
|
3811
5041
|
// and the table of additional fold-equivalent code points.
|
|
3812
5042
|
// Returns null if |name| does not identify a Unicode character range.
|
|
3813
5043
|
static unicodeTable(name) {
|
|
3814
5044
|
if (name === 'Any') {
|
|
3815
|
-
return
|
|
5045
|
+
return {
|
|
5046
|
+
tab: Parser.ANY_TABLE,
|
|
5047
|
+
fold: Parser.ANY_TABLE,
|
|
5048
|
+
sign: 1
|
|
5049
|
+
};
|
|
5050
|
+
}
|
|
5051
|
+
if (name === 'Ascii') {
|
|
5052
|
+
return {
|
|
5053
|
+
tab: Parser.ASCII_TABLE,
|
|
5054
|
+
fold: Parser.ASCII_FOLD_TABLE,
|
|
5055
|
+
sign: 1
|
|
5056
|
+
};
|
|
5057
|
+
}
|
|
5058
|
+
if (name === 'Assigned') {
|
|
5059
|
+
// Assigned is the mathematical inversion of Cn (Unassigned)
|
|
5060
|
+
return {
|
|
5061
|
+
tab: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5062
|
+
fold: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5063
|
+
sign: -1
|
|
5064
|
+
};
|
|
5065
|
+
}
|
|
5066
|
+
if (name === 'Lc') {
|
|
5067
|
+
return {
|
|
5068
|
+
tab: UnicodeTables.CATEGORIES.get('LC'),
|
|
5069
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
|
|
5070
|
+
sign: 1
|
|
5071
|
+
};
|
|
3816
5072
|
}
|
|
3817
5073
|
if (UnicodeTables.CATEGORIES.has(name)) {
|
|
3818
|
-
return
|
|
5074
|
+
return {
|
|
5075
|
+
tab: UnicodeTables.CATEGORIES.get(name),
|
|
5076
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get(name),
|
|
5077
|
+
sign: 1
|
|
5078
|
+
};
|
|
3819
5079
|
}
|
|
3820
5080
|
if (UnicodeTables.SCRIPTS.has(name)) {
|
|
3821
|
-
return
|
|
5081
|
+
return {
|
|
5082
|
+
tab: UnicodeTables.SCRIPTS.get(name),
|
|
5083
|
+
fold: UnicodeTables.FOLD_SCRIPT.get(name),
|
|
5084
|
+
sign: 1
|
|
5085
|
+
};
|
|
3822
5086
|
}
|
|
3823
5087
|
return null;
|
|
3824
5088
|
}
|
|
@@ -4127,7 +5391,7 @@ class Parser {
|
|
|
4127
5391
|
this.flags = flags;
|
|
4128
5392
|
// number of capturing groups seen
|
|
4129
5393
|
this.numCap = 0;
|
|
4130
|
-
this.namedGroups =
|
|
5394
|
+
this.namedGroups = Object.create(null);
|
|
4131
5395
|
// Stack of parsed expressions.
|
|
4132
5396
|
this.stack = [];
|
|
4133
5397
|
this.free = null;
|
|
@@ -4971,9 +6235,11 @@ class Parser {
|
|
|
4971
6235
|
const i = lit.indexOf('\\E');
|
|
4972
6236
|
if (i >= 0) {
|
|
4973
6237
|
lit = lit.substring(0, i);
|
|
6238
|
+
t.skipString(lit);
|
|
6239
|
+
t.skipString('\\E');
|
|
6240
|
+
} else {
|
|
6241
|
+
t.skipString(lit);
|
|
4974
6242
|
}
|
|
4975
|
-
t.skipString(lit);
|
|
4976
|
-
t.skipString('\\E');
|
|
4977
6243
|
let j = 0;
|
|
4978
6244
|
while (j < lit.length) {
|
|
4979
6245
|
const codepoint = lit.codePointAt(j);
|
|
@@ -4989,6 +6255,9 @@ class Parser {
|
|
|
4989
6255
|
t.rewindTo(savedPos);
|
|
4990
6256
|
break;
|
|
4991
6257
|
}
|
|
6258
|
+
} else {
|
|
6259
|
+
// Unconditionally rewind if PERL_X is off, or if string ended abruptly
|
|
6260
|
+
t.rewindTo(savedPos);
|
|
4992
6261
|
}
|
|
4993
6262
|
const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
|
|
4994
6263
|
re.flags = this.flags;
|
|
@@ -5314,8 +6583,11 @@ class Parser {
|
|
|
5314
6583
|
if (pair === null) {
|
|
5315
6584
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
|
|
5316
6585
|
}
|
|
5317
|
-
|
|
5318
|
-
|
|
6586
|
+
if (pair.sign < 0) {
|
|
6587
|
+
sign = 0 - sign;
|
|
6588
|
+
}
|
|
6589
|
+
const tab = pair.tab;
|
|
6590
|
+
const fold = pair.fold; // fold-equivalent table
|
|
5319
6591
|
// Variation of CharClass.appendGroup() for tables.
|
|
5320
6592
|
if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
|
|
5321
6593
|
cc.appendTableWithSign(tab, sign);
|
|
@@ -5459,6 +6731,7 @@ class RE2 {
|
|
|
5459
6731
|
res.prefixUTF8 = re2.prefixUTF8;
|
|
5460
6732
|
res.prefixComplete = re2.prefixComplete;
|
|
5461
6733
|
res.prefixRune = re2.prefixRune;
|
|
6734
|
+
res.prefilter = re2.prefilter;
|
|
5462
6735
|
return res;
|
|
5463
6736
|
}
|
|
5464
6737
|
|
|
@@ -5501,8 +6774,10 @@ class RE2 {
|
|
|
5501
6774
|
let re = Parser.parse(expr, mode);
|
|
5502
6775
|
const maxCap = re.maxCap();
|
|
5503
6776
|
re = Simplify.simplify(re);
|
|
6777
|
+
const prefilter = PrefilterTree.build(re);
|
|
5504
6778
|
const prog = Compiler.compileRegexp(re);
|
|
5505
6779
|
const re2 = new RE2(expr, prog, maxCap, longest);
|
|
6780
|
+
re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
|
|
5506
6781
|
const [prefixCompl, prefixStr] = prog.prefix();
|
|
5507
6782
|
re2.prefixComplete = prefixCompl;
|
|
5508
6783
|
re2.prefix = prefixStr;
|
|
@@ -5534,12 +6809,78 @@ class RE2 {
|
|
|
5534
6809
|
this.prefixComplete = false; // true if prefix is the entire regexp
|
|
5535
6810
|
this.prefixRune = 0; // first rune in prefix
|
|
5536
6811
|
this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
|
|
5537
|
-
this.dfa = new DFA(prog); //
|
|
6812
|
+
this.dfa = new DFA(this.prog); // initialize Lazy DFA
|
|
6813
|
+
this.onepass = OnePass.compile(this.prog); // compile OnePass
|
|
6814
|
+
this.prefilter = null;
|
|
6815
|
+
}
|
|
6816
|
+
matchPrefixComplete(input, pos, anchor, ncap) {
|
|
6817
|
+
// If strictly anchored, execution must start at 0
|
|
6818
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
6819
|
+
return null;
|
|
6820
|
+
}
|
|
6821
|
+
let matchStart = -1;
|
|
6822
|
+
let matchEnd = -1;
|
|
6823
|
+
const pLen = input.prefixLength(this);
|
|
6824
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
6825
|
+
const idx = input.index(this, pos);
|
|
6826
|
+
if (idx < 0) return null;
|
|
6827
|
+
matchStart = pos + idx;
|
|
6828
|
+
matchEnd = matchStart + pLen;
|
|
6829
|
+
} else if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
6830
|
+
if (input.endPos() !== pLen) return null;
|
|
6831
|
+
const idx = input.index(this, 0);
|
|
6832
|
+
if (idx !== 0) return null;
|
|
6833
|
+
matchStart = 0;
|
|
6834
|
+
matchEnd = pLen;
|
|
6835
|
+
} else if (anchor === RE2Flags.ANCHOR_START) {
|
|
6836
|
+
const idx = input.index(this, 0);
|
|
6837
|
+
if (idx !== 0) return null;
|
|
6838
|
+
matchStart = 0;
|
|
6839
|
+
matchEnd = pLen;
|
|
6840
|
+
}
|
|
6841
|
+
if (matchStart < 0) return null;
|
|
6842
|
+
|
|
6843
|
+
// If captures are requested (e.g. findSubmatch instead of test), populate bounds
|
|
6844
|
+
if (ncap > 0) {
|
|
6845
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
6846
|
+
matchcap[0] = matchStart;
|
|
6847
|
+
matchcap[1] = matchEnd;
|
|
6848
|
+
return Array.from(matchcap);
|
|
6849
|
+
}
|
|
6850
|
+
return []; // Matched successfully, but no capture data requested
|
|
5538
6851
|
}
|
|
5539
6852
|
executeEngine(input, pos, anchor, ncap) {
|
|
6853
|
+
// LITERAL FAST PATH
|
|
6854
|
+
// If the entire regex is just a literal string (and no nested capture boundaries are requested),
|
|
6855
|
+
// bypass all state machines and execute via V8's blistering fast native indexOf
|
|
6856
|
+
if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
|
|
6857
|
+
return this.matchPrefixComplete(input, pos, anchor, ncap);
|
|
6858
|
+
}
|
|
6859
|
+
|
|
6860
|
+
// PREFILTER FAST PATH
|
|
6861
|
+
// If the unanchored query requires specific literal strings (e.g. "a.*b"),
|
|
6862
|
+
// verify those strings exist using high-speed JS string searches before waking up the state engines.
|
|
6863
|
+
if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
|
|
6864
|
+
if (!this.prefilter.eval(input, pos)) {
|
|
6865
|
+
return null;
|
|
6866
|
+
}
|
|
6867
|
+
}
|
|
6868
|
+
|
|
6869
|
+
// FAST PATH: OnePass DFA engine.
|
|
6870
|
+
// If compiled successfully, it perfectly supports capture groups
|
|
6871
|
+
// and is blisteringly fast since it skips thread queues completely.
|
|
6872
|
+
if (this.onepass !== null) {
|
|
6873
|
+
return OnePass.execute(this, input, pos, anchor, ncap);
|
|
6874
|
+
}
|
|
6875
|
+
|
|
5540
6876
|
// If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
|
|
5541
6877
|
// We must use the NFA.
|
|
5542
6878
|
if (ncap > 0) {
|
|
6879
|
+
// Backtracker bit-state execution bounds check
|
|
6880
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6881
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6882
|
+
}
|
|
6883
|
+
// NFA execution
|
|
5543
6884
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5544
6885
|
}
|
|
5545
6886
|
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
@@ -5548,6 +6889,11 @@ class RE2 {
|
|
|
5548
6889
|
return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
|
|
5549
6890
|
}
|
|
5550
6891
|
|
|
6892
|
+
// Backtracker bit-state execution bounds check
|
|
6893
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6894
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6895
|
+
}
|
|
6896
|
+
|
|
5551
6897
|
// Fallback to NFA
|
|
5552
6898
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5553
6899
|
}
|
|
@@ -6128,6 +7474,50 @@ class RE2 {
|
|
|
6128
7474
|
}
|
|
6129
7475
|
}
|
|
6130
7476
|
|
|
7477
|
+
class RE2Set {
|
|
7478
|
+
constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
|
|
7479
|
+
this.anchor = anchor;
|
|
7480
|
+
this.flags = flags;
|
|
7481
|
+
this.regexps = [];
|
|
7482
|
+
this.prog = null;
|
|
7483
|
+
this.dfa = null;
|
|
7484
|
+
this.dummyRe2 = null;
|
|
7485
|
+
}
|
|
7486
|
+
add(pattern) {
|
|
7487
|
+
if (this.prog) {
|
|
7488
|
+
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
7489
|
+
}
|
|
7490
|
+
const re = Parser.parse(pattern, this.flags);
|
|
7491
|
+
this.regexps.push(Simplify.simplify(re));
|
|
7492
|
+
return this.regexps.length - 1;
|
|
7493
|
+
}
|
|
7494
|
+
compile() {
|
|
7495
|
+
if (this.prog) return;
|
|
7496
|
+
this.prog = Compiler.compileSet(this.regexps);
|
|
7497
|
+
this.dfa = new DFA(this.prog);
|
|
7498
|
+
this.dummyRe2 = {
|
|
7499
|
+
prog: this.prog,
|
|
7500
|
+
cond: this.prog.startCond(),
|
|
7501
|
+
prefix: '',
|
|
7502
|
+
prefixRune: 0,
|
|
7503
|
+
longest: false
|
|
7504
|
+
};
|
|
7505
|
+
}
|
|
7506
|
+
match(input) {
|
|
7507
|
+
if (!this.prog) this.compile();
|
|
7508
|
+
const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7509
|
+
|
|
7510
|
+
// Fast path: Try the blistering fast DFA
|
|
7511
|
+
const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
|
|
7512
|
+
if (dfaResult !== null) return dfaResult;
|
|
7513
|
+
|
|
7514
|
+
// Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
|
|
7515
|
+
const machine = Machine.fromRE2(this.dummyRe2);
|
|
7516
|
+
machine.init(0);
|
|
7517
|
+
return machine.matchSet(machineInput, 0, this.anchor);
|
|
7518
|
+
}
|
|
7519
|
+
}
|
|
7520
|
+
|
|
6131
7521
|
/**
|
|
6132
7522
|
* Transform JS regex string to RE2 regex string
|
|
6133
7523
|
*/
|
|
@@ -6210,7 +7600,8 @@ class TranslateRegExpString {
|
|
|
6210
7600
|
default:
|
|
6211
7601
|
{
|
|
6212
7602
|
result += '\\';
|
|
6213
|
-
let
|
|
7603
|
+
let cp = data.codePointAt(i + 1);
|
|
7604
|
+
let symSize = Utils.charCount(cp);
|
|
6214
7605
|
result += data.substring(i + 1, i + 1 + symSize);
|
|
6215
7606
|
i += symSize + 1;
|
|
6216
7607
|
continue;
|
|
@@ -6230,7 +7621,8 @@ class TranslateRegExpString {
|
|
|
6230
7621
|
continue;
|
|
6231
7622
|
}
|
|
6232
7623
|
}
|
|
6233
|
-
let
|
|
7624
|
+
let cp = data.codePointAt(i);
|
|
7625
|
+
let symSize = Utils.charCount(cp);
|
|
6234
7626
|
result += data.substring(i, i + symSize);
|
|
6235
7627
|
i += symSize;
|
|
6236
7628
|
}
|
|
@@ -6587,5 +7979,5 @@ class RE2JS {
|
|
|
6587
7979
|
}
|
|
6588
7980
|
}
|
|
6589
7981
|
|
|
6590
|
-
export { Matcher, RE2JS, RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSSyntaxException };
|
|
7982
|
+
export { Matcher, RE2JS, RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, RE2Set };
|
|
6591
7983
|
//# sourceMappingURL=index.esm.js.map
|