re2js 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -30
- package/build/index.cjs.cjs +1557 -163
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +71 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +1556 -164
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +1557 -163
- package/build/index.umd.js.map +1 -1
- package/package.json +2 -2
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.0
|
|
5
|
+
* @version v2.1.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -60,6 +60,23 @@
|
|
|
60
60
|
/**
|
|
61
61
|
* Various constants and helper for unicode codepoints.
|
|
62
62
|
*/
|
|
63
|
+
const ASCII_SIZE = 128;
|
|
64
|
+
const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
|
|
65
|
+
const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
|
|
66
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
67
|
+
if (i >= 97 && i <= 122) {
|
|
68
|
+
// a-z
|
|
69
|
+
ASCII_TO_UPPER[i] = i - 32;
|
|
70
|
+
} else {
|
|
71
|
+
ASCII_TO_UPPER[i] = i;
|
|
72
|
+
}
|
|
73
|
+
if (i >= 65 && i <= 90) {
|
|
74
|
+
// A-Z
|
|
75
|
+
ASCII_TO_LOWER[i] = i + 32;
|
|
76
|
+
} else {
|
|
77
|
+
ASCII_TO_LOWER[i] = i;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
63
80
|
class Codepoint {
|
|
64
81
|
// codePointAt(0)
|
|
65
82
|
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
@@ -67,6 +84,7 @@
|
|
|
67
84
|
// convert unicode codepoint to upper case codepoint
|
|
68
85
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
69
86
|
static toUpperCase(codepoint) {
|
|
87
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
|
|
70
88
|
const s = String.fromCodePoint(codepoint).toUpperCase();
|
|
71
89
|
if (s.length > 1) {
|
|
72
90
|
return codepoint;
|
|
@@ -81,6 +99,7 @@
|
|
|
81
99
|
// convert unicode codepoint to lower case codepoint
|
|
82
100
|
// return same codepoint, if cannot do it (or codepoint not have lower variation)
|
|
83
101
|
static toLowerCase(codepoint) {
|
|
102
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
|
|
84
103
|
const s = String.fromCodePoint(codepoint).toLowerCase();
|
|
85
104
|
if (s.length > 1) {
|
|
86
105
|
return codepoint;
|
|
@@ -203,9 +222,11 @@
|
|
|
203
222
|
C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
|
|
204
223
|
Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
|
|
205
224
|
Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
|
|
225
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
|
|
206
226
|
Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
|
|
207
227
|
Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
|
|
208
228
|
L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
229
|
+
LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
|
|
209
230
|
Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
|
|
210
231
|
Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
|
|
211
232
|
Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
@@ -414,8 +435,11 @@
|
|
|
414
435
|
Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
|
|
415
436
|
});
|
|
416
437
|
static FOLD_CATEGORIES = new LazyMap({
|
|
417
|
-
|
|
418
|
-
|
|
438
|
+
C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
439
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
440
|
+
L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
441
|
+
LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
442
|
+
Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
|
|
419
443
|
Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
|
|
420
444
|
Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
|
|
421
445
|
M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
@@ -424,7 +448,9 @@
|
|
|
424
448
|
static FOLD_SCRIPT = new LazyMap({
|
|
425
449
|
Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
|
|
426
450
|
Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
|
|
427
|
-
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
|
|
451
|
+
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
452
|
+
Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
|
|
453
|
+
Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
|
|
428
454
|
});
|
|
429
455
|
}
|
|
430
456
|
|
|
@@ -678,7 +704,7 @@
|
|
|
678
704
|
|
|
679
705
|
// Returns the array of runes in the specified Java UTF-16 string.
|
|
680
706
|
static stringToRunes(str) {
|
|
681
|
-
return String(str)
|
|
707
|
+
return Array.from(String(str)).map(s => s.codePointAt(0));
|
|
682
708
|
}
|
|
683
709
|
|
|
684
710
|
// Returns the Java UTF-16 string containing the single rune |r|.
|
|
@@ -949,6 +975,14 @@
|
|
|
949
975
|
endPos() {
|
|
950
976
|
return this.end;
|
|
951
977
|
}
|
|
978
|
+
hasString() {
|
|
979
|
+
return false;
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
// Helper for the exact-literal fast-path execution router
|
|
983
|
+
prefixLength() {
|
|
984
|
+
return 0;
|
|
985
|
+
}
|
|
952
986
|
}
|
|
953
987
|
|
|
954
988
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
@@ -960,6 +994,14 @@
|
|
|
960
994
|
this.start = start;
|
|
961
995
|
this.end = end;
|
|
962
996
|
}
|
|
997
|
+
hasString(prefilter, pos) {
|
|
998
|
+
const target = prefilter.bytes;
|
|
999
|
+
if (target.length === 0) return true;
|
|
1000
|
+
|
|
1001
|
+
// Reuse the high-speed indexOf method already implemented below
|
|
1002
|
+
const idx = this.indexOf(this.bytes, target, this.start + pos);
|
|
1003
|
+
return idx !== -1 && idx <= this.end - target.length;
|
|
1004
|
+
}
|
|
963
1005
|
|
|
964
1006
|
// Returns the rune at the specified index; the units are
|
|
965
1007
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1036,10 +1078,10 @@
|
|
|
1036
1078
|
indexOf(source, target, fromIndex = 0) {
|
|
1037
1079
|
let targetLength = target.length;
|
|
1038
1080
|
if (targetLength === 0) {
|
|
1039
|
-
return -1;
|
|
1081
|
+
return fromIndex <= this.end ? fromIndex : -1;
|
|
1040
1082
|
}
|
|
1041
|
-
let
|
|
1042
|
-
for (let i = fromIndex; i <=
|
|
1083
|
+
let limit = this.end - targetLength;
|
|
1084
|
+
for (let i = fromIndex; i <= limit; i++) {
|
|
1043
1085
|
for (let j = 0; j < targetLength; j++) {
|
|
1044
1086
|
if (source[i + j] !== target[j]) {
|
|
1045
1087
|
break;
|
|
@@ -1050,6 +1092,9 @@
|
|
|
1050
1092
|
}
|
|
1051
1093
|
return -1;
|
|
1052
1094
|
}
|
|
1095
|
+
prefixLength(re2) {
|
|
1096
|
+
return re2.prefixUTF8.length;
|
|
1097
|
+
}
|
|
1053
1098
|
}
|
|
1054
1099
|
|
|
1055
1100
|
// |pos| and |width| are in JS "char" units.
|
|
@@ -1060,6 +1105,10 @@
|
|
|
1060
1105
|
this.start = start;
|
|
1061
1106
|
this.end = end;
|
|
1062
1107
|
}
|
|
1108
|
+
hasString(prefilter, pos) {
|
|
1109
|
+
const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
|
|
1110
|
+
return idx !== -1 && idx <= this.end - prefilter.str.length;
|
|
1111
|
+
}
|
|
1063
1112
|
|
|
1064
1113
|
// Returns the rune at the specified index; the units are
|
|
1065
1114
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1105,6 +1154,9 @@
|
|
|
1105
1154
|
const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
|
|
1106
1155
|
return Utils.emptyOpContext(r1, r2);
|
|
1107
1156
|
}
|
|
1157
|
+
prefixLength(re2) {
|
|
1158
|
+
return re2.prefix.length;
|
|
1159
|
+
}
|
|
1108
1160
|
}
|
|
1109
1161
|
class MachineInput {
|
|
1110
1162
|
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
@@ -1195,6 +1247,17 @@
|
|
|
1195
1247
|
}
|
|
1196
1248
|
}
|
|
1197
1249
|
|
|
1250
|
+
/**
|
|
1251
|
+
* An exception thrown for internal engine errors, such as corrupted bytecodes.
|
|
1252
|
+
*/
|
|
1253
|
+
class RE2JSInternalException extends RE2JSException {
|
|
1254
|
+
/** @param {string} message */
|
|
1255
|
+
constructor(message) {
|
|
1256
|
+
super(message);
|
|
1257
|
+
this.name = 'RE2JSInternalException';
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1198
1261
|
/**
|
|
1199
1262
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
1200
1263
|
*
|
|
@@ -1397,6 +1460,23 @@
|
|
|
1397
1460
|
}
|
|
1398
1461
|
return this.substring(start, end);
|
|
1399
1462
|
}
|
|
1463
|
+
|
|
1464
|
+
/**
|
|
1465
|
+
* Returns a dictionary map of all named capturing groups and their matched values.
|
|
1466
|
+
* If a group was not matched, its value will be `null`.
|
|
1467
|
+
* @returns {Record<string, string|null>}
|
|
1468
|
+
*/
|
|
1469
|
+
getNamedGroups() {
|
|
1470
|
+
if (!this.hasMatch) {
|
|
1471
|
+
throw new RE2JSGroupException('perhaps no match attempted');
|
|
1472
|
+
}
|
|
1473
|
+
const result = {};
|
|
1474
|
+
for (const name of Object.keys(this.namedGroups)) {
|
|
1475
|
+
result[name] = this.group(name);
|
|
1476
|
+
}
|
|
1477
|
+
return result;
|
|
1478
|
+
}
|
|
1479
|
+
|
|
1400
1480
|
/**
|
|
1401
1481
|
* Returns the number of subgroups in this pattern.
|
|
1402
1482
|
*
|
|
@@ -1821,16 +1901,20 @@
|
|
|
1821
1901
|
}
|
|
1822
1902
|
return r === r0;
|
|
1823
1903
|
}
|
|
1824
|
-
|
|
1825
|
-
//
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1904
|
+
const len = this.runes.length;
|
|
1905
|
+
// If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
|
|
1906
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1907
|
+
for (let j = 0; j < len; j += 2) {
|
|
1908
|
+
if (r < this.runes[j]) {
|
|
1909
|
+
return false;
|
|
1910
|
+
}
|
|
1911
|
+
if (r <= this.runes[j + 1]) {
|
|
1912
|
+
return true;
|
|
1913
|
+
}
|
|
1832
1914
|
}
|
|
1915
|
+
return false; // Stop here
|
|
1833
1916
|
}
|
|
1917
|
+
|
|
1834
1918
|
// Otherwise binary search.
|
|
1835
1919
|
let lo = 0;
|
|
1836
1920
|
let hi = this.runes.length / 2 | 0;
|
|
@@ -1848,6 +1932,40 @@
|
|
|
1848
1932
|
}
|
|
1849
1933
|
return false;
|
|
1850
1934
|
}
|
|
1935
|
+
|
|
1936
|
+
// matchRunePos checks whether the instruction matches (and consumes) r.
|
|
1937
|
+
// If so, it returns the index of the matching rune pair.
|
|
1938
|
+
// If not, it returns -1.
|
|
1939
|
+
matchRunePos(r) {
|
|
1940
|
+
if (this.runes.length === 1) {
|
|
1941
|
+
const r0 = this.runes[0];
|
|
1942
|
+
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
1943
|
+
return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
|
|
1944
|
+
}
|
|
1945
|
+
return r === r0 ? 0 : -1;
|
|
1946
|
+
}
|
|
1947
|
+
const len = this.runes.length;
|
|
1948
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1949
|
+
for (let j = 0; j < len; j += 2) {
|
|
1950
|
+
if (r < this.runes[j]) return -1;
|
|
1951
|
+
if (r <= this.runes[j + 1]) return Math.floor(j / 2);
|
|
1952
|
+
}
|
|
1953
|
+
return -1;
|
|
1954
|
+
}
|
|
1955
|
+
let lo = 0;
|
|
1956
|
+
let hi = Math.floor(len / 2);
|
|
1957
|
+
while (lo < hi) {
|
|
1958
|
+
const m = lo + hi >> 1;
|
|
1959
|
+
const c = this.runes[2 * m];
|
|
1960
|
+
if (c <= r) {
|
|
1961
|
+
if (r <= this.runes[2 * m + 1]) return m;
|
|
1962
|
+
lo = m + 1;
|
|
1963
|
+
} else {
|
|
1964
|
+
hi = m;
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
return -1;
|
|
1968
|
+
}
|
|
1851
1969
|
/**
|
|
1852
1970
|
*
|
|
1853
1971
|
* @returns {string}
|
|
@@ -1863,7 +1981,7 @@
|
|
|
1863
1981
|
case Inst.EMPTY_WIDTH:
|
|
1864
1982
|
return `empty ${this.arg} -> ${this.out}`;
|
|
1865
1983
|
case Inst.MATCH:
|
|
1866
|
-
return
|
|
1984
|
+
return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
|
|
1867
1985
|
case Inst.FAIL:
|
|
1868
1986
|
return 'fail';
|
|
1869
1987
|
case Inst.NOP:
|
|
@@ -1889,7 +2007,7 @@
|
|
|
1889
2007
|
class Thread {
|
|
1890
2008
|
constructor() {
|
|
1891
2009
|
this.inst = null;
|
|
1892
|
-
this.cap =
|
|
2010
|
+
this.cap = null; // Initialized to Int32Array later
|
|
1893
2011
|
}
|
|
1894
2012
|
}
|
|
1895
2013
|
|
|
@@ -1917,9 +2035,11 @@
|
|
|
1917
2035
|
return j;
|
|
1918
2036
|
}
|
|
1919
2037
|
clear() {
|
|
1920
|
-
|
|
1921
|
-
this.
|
|
1922
|
-
|
|
2038
|
+
// Prevent memory leaks by nulling out used object references
|
|
2039
|
+
for (let i = 0; i < this.size; i++) {
|
|
2040
|
+
this.denseThreads[i] = null;
|
|
2041
|
+
}
|
|
2042
|
+
// The sparse set logic safely ignores stale integers in Typed Arrays.
|
|
1923
2043
|
this.size = 0;
|
|
1924
2044
|
}
|
|
1925
2045
|
toString() {
|
|
@@ -1948,7 +2068,8 @@
|
|
|
1948
2068
|
m.pool = [];
|
|
1949
2069
|
m.poolSize = 0;
|
|
1950
2070
|
m.matched = false;
|
|
1951
|
-
|
|
2071
|
+
// Use Int32Array instead of standard JS array
|
|
2072
|
+
m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
|
|
1952
2073
|
m.ncap = 0;
|
|
1953
2074
|
return m;
|
|
1954
2075
|
}
|
|
@@ -1962,27 +2083,30 @@
|
|
|
1962
2083
|
if (ncap > this.matchcap.length) {
|
|
1963
2084
|
this.initNewCap(ncap);
|
|
1964
2085
|
} else {
|
|
1965
|
-
this.resetCap(
|
|
2086
|
+
this.resetCap();
|
|
1966
2087
|
}
|
|
1967
2088
|
}
|
|
1968
|
-
|
|
2089
|
+
|
|
2090
|
+
// Wipes existing typed array memory without reallocating
|
|
2091
|
+
resetCap() {
|
|
1969
2092
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1970
2093
|
const t = this.pool[i];
|
|
1971
|
-
t.cap
|
|
2094
|
+
t.cap.fill(0);
|
|
1972
2095
|
}
|
|
1973
2096
|
}
|
|
1974
2097
|
initNewCap(ncap) {
|
|
1975
2098
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1976
2099
|
const t = this.pool[i];
|
|
1977
|
-
t.cap =
|
|
2100
|
+
t.cap = new Int32Array(ncap);
|
|
1978
2101
|
}
|
|
1979
|
-
this.matchcap =
|
|
2102
|
+
this.matchcap = new Int32Array(ncap);
|
|
1980
2103
|
}
|
|
1981
2104
|
submatches() {
|
|
1982
2105
|
if (this.ncap === 0) {
|
|
1983
2106
|
return Utils.emptyInts();
|
|
1984
2107
|
}
|
|
1985
|
-
|
|
2108
|
+
// Use subarray() to create a zero-allocation view before converting
|
|
2109
|
+
return Array.from(this.matchcap.subarray(0, this.ncap));
|
|
1986
2110
|
}
|
|
1987
2111
|
|
|
1988
2112
|
// alloc() allocates a new thread with the given instruction.
|
|
@@ -1994,6 +2118,7 @@
|
|
|
1994
2118
|
t = this.pool[this.poolSize];
|
|
1995
2119
|
} else {
|
|
1996
2120
|
t = new Thread();
|
|
2121
|
+
t.cap = new Int32Array(this.matchcap.length);
|
|
1997
2122
|
}
|
|
1998
2123
|
t.inst = inst;
|
|
1999
2124
|
return t;
|
|
@@ -2023,7 +2148,7 @@
|
|
|
2023
2148
|
return false;
|
|
2024
2149
|
}
|
|
2025
2150
|
this.matched = false;
|
|
2026
|
-
this.matchcap
|
|
2151
|
+
this.matchcap.fill(-1);
|
|
2027
2152
|
let runq = this.q0;
|
|
2028
2153
|
let nextq = this.q1;
|
|
2029
2154
|
let r = input.step(pos);
|
|
@@ -2094,6 +2219,85 @@
|
|
|
2094
2219
|
this.freeQueue(nextq);
|
|
2095
2220
|
return this.matched;
|
|
2096
2221
|
}
|
|
2222
|
+
matchSet(input, pos, anchor) {
|
|
2223
|
+
const startCond = this.re2.cond;
|
|
2224
|
+
if (startCond === Utils.EMPTY_ALL) return [];
|
|
2225
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2226
|
+
return [];
|
|
2227
|
+
}
|
|
2228
|
+
let runq = this.q0;
|
|
2229
|
+
let nextq = this.q1;
|
|
2230
|
+
let r = input.step(pos);
|
|
2231
|
+
let rune = r >> 3;
|
|
2232
|
+
let width = r & 7;
|
|
2233
|
+
let rune1 = -1;
|
|
2234
|
+
let width1 = 0;
|
|
2235
|
+
if (r !== MachineInputBase.EOF()) {
|
|
2236
|
+
r = input.step(pos + width);
|
|
2237
|
+
rune1 = r >> 3;
|
|
2238
|
+
width1 = r & 7;
|
|
2239
|
+
}
|
|
2240
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
2241
|
+
const matches = new Set();
|
|
2242
|
+
while (true) {
|
|
2243
|
+
if (runq.isEmpty()) {
|
|
2244
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
|
|
2245
|
+
}
|
|
2246
|
+
if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
|
|
2247
|
+
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2248
|
+
}
|
|
2249
|
+
const nextPos = pos + width;
|
|
2250
|
+
flag = input.context(nextPos);
|
|
2251
|
+
for (let j = 0; j < runq.size; j++) {
|
|
2252
|
+
let t = runq.denseThreads[j];
|
|
2253
|
+
if (t === null) continue;
|
|
2254
|
+
const i = t.inst;
|
|
2255
|
+
let add = false;
|
|
2256
|
+
switch (i.op) {
|
|
2257
|
+
case Inst.MATCH:
|
|
2258
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
|
|
2259
|
+
matches.add(i.arg); // Record the matched Set ID
|
|
2260
|
+
break;
|
|
2261
|
+
case Inst.RUNE:
|
|
2262
|
+
add = i.matchRune(rune);
|
|
2263
|
+
break;
|
|
2264
|
+
case Inst.RUNE1:
|
|
2265
|
+
add = rune === i.runes[0];
|
|
2266
|
+
break;
|
|
2267
|
+
case Inst.RUNE_ANY:
|
|
2268
|
+
add = true;
|
|
2269
|
+
break;
|
|
2270
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2271
|
+
add = rune !== Codepoint.CODES.get('\n');
|
|
2272
|
+
break;
|
|
2273
|
+
default:
|
|
2274
|
+
throw new RE2JSInternalException('bad inst');
|
|
2275
|
+
}
|
|
2276
|
+
if (add) {
|
|
2277
|
+
t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
|
|
2278
|
+
}
|
|
2279
|
+
if (t !== null) {
|
|
2280
|
+
this.freeThread(t);
|
|
2281
|
+
runq.denseThreads[j] = null;
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
runq.clear();
|
|
2285
|
+
if (width === 0) break;
|
|
2286
|
+
pos += width;
|
|
2287
|
+
rune = rune1;
|
|
2288
|
+
width = width1;
|
|
2289
|
+
if (rune !== -1) {
|
|
2290
|
+
r = input.step(pos + width);
|
|
2291
|
+
rune1 = r >> 3;
|
|
2292
|
+
width1 = r & 7;
|
|
2293
|
+
}
|
|
2294
|
+
const tmpq = runq;
|
|
2295
|
+
runq = nextq;
|
|
2296
|
+
nextq = tmpq;
|
|
2297
|
+
}
|
|
2298
|
+
this.freeQueue(nextq);
|
|
2299
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2300
|
+
}
|
|
2097
2301
|
step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
|
|
2098
2302
|
const longest = this.re2.longest;
|
|
2099
2303
|
for (let j = 0; j < runq.size; j++) {
|
|
@@ -2114,7 +2318,9 @@
|
|
|
2114
2318
|
}
|
|
2115
2319
|
if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
|
|
2116
2320
|
t.cap[1] = pos;
|
|
2117
|
-
|
|
2321
|
+
// Using subarray creates a fast view, avoiding a full array copy
|
|
2322
|
+
// until the submatches are finalized at the very end.
|
|
2323
|
+
this.matchcap.set(t.cap.subarray(0, this.ncap));
|
|
2118
2324
|
}
|
|
2119
2325
|
if (!longest) {
|
|
2120
2326
|
this.freeQueue(runq, j + 1);
|
|
@@ -2134,7 +2340,7 @@
|
|
|
2134
2340
|
add = c !== Codepoint.CODES.get('\n');
|
|
2135
2341
|
break;
|
|
2136
2342
|
default:
|
|
2137
|
-
throw new
|
|
2343
|
+
throw new RE2JSInternalException('bad inst');
|
|
2138
2344
|
}
|
|
2139
2345
|
if (add) {
|
|
2140
2346
|
t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
|
|
@@ -2192,6 +2398,7 @@
|
|
|
2192
2398
|
t.inst = inst;
|
|
2193
2399
|
}
|
|
2194
2400
|
if (this.ncap > 0 && t.cap !== cap) {
|
|
2401
|
+
// Direct assignment utilizing Typed Array performance
|
|
2195
2402
|
for (let c = 0; c < this.ncap; c++) {
|
|
2196
2403
|
t.cap[c] = cap[c];
|
|
2197
2404
|
}
|
|
@@ -2226,20 +2433,23 @@
|
|
|
2226
2433
|
return true;
|
|
2227
2434
|
};
|
|
2228
2435
|
class DFAState {
|
|
2229
|
-
constructor(nfaStates, isMatch) {
|
|
2436
|
+
constructor(nfaStates, isMatch, matchIDs = []) {
|
|
2230
2437
|
this.nfaStates = nfaStates; // Int32Array of Instruction PCs
|
|
2231
2438
|
this.isMatch = isMatch; // Boolean
|
|
2439
|
+
this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
|
|
2232
2440
|
this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
|
|
2233
2441
|
this.nextMap = new Map(); // Cache of Char -> DFAState
|
|
2234
2442
|
}
|
|
2235
2443
|
}
|
|
2236
2444
|
class DFA {
|
|
2445
|
+
static MAX_CACHE_CLEARS = 5;
|
|
2237
2446
|
constructor(prog) {
|
|
2238
2447
|
this.prog = prog;
|
|
2239
2448
|
this.stateCache = new Map(); // hash(number) -> DFAState[]
|
|
2240
2449
|
this.stateCount = 0; // Tracks total states for memory limits
|
|
2241
2450
|
this.startState = null;
|
|
2242
2451
|
this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
|
|
2452
|
+
this.cacheClears = 0; // Track thrashing
|
|
2243
2453
|
this.failed = false; // mark if DFA cannot work with provided prog
|
|
2244
2454
|
}
|
|
2245
2455
|
|
|
@@ -2248,6 +2458,7 @@
|
|
|
2248
2458
|
const closure = new Set();
|
|
2249
2459
|
const stack = [...pcs];
|
|
2250
2460
|
let isMatch = false;
|
|
2461
|
+
const matchIDs = [];
|
|
2251
2462
|
while (stack.length > 0) {
|
|
2252
2463
|
const pc = stack.pop();
|
|
2253
2464
|
if (closure.has(pc)) continue;
|
|
@@ -2256,6 +2467,7 @@
|
|
|
2256
2467
|
switch (inst.op) {
|
|
2257
2468
|
case Inst.MATCH:
|
|
2258
2469
|
isMatch = true;
|
|
2470
|
+
if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
|
|
2259
2471
|
break;
|
|
2260
2472
|
case Inst.ALT:
|
|
2261
2473
|
case Inst.ALT_MATCH:
|
|
@@ -2273,9 +2485,11 @@
|
|
|
2273
2485
|
}
|
|
2274
2486
|
}
|
|
2275
2487
|
const sortedPCs = Int32Array.from(closure).sort();
|
|
2488
|
+
matchIDs.sort((a, b) => a - b);
|
|
2276
2489
|
return {
|
|
2277
2490
|
pcs: sortedPCs,
|
|
2278
|
-
isMatch
|
|
2491
|
+
isMatch,
|
|
2492
|
+
matchIDs
|
|
2279
2493
|
};
|
|
2280
2494
|
}
|
|
2281
2495
|
|
|
@@ -2301,6 +2515,8 @@
|
|
|
2301
2515
|
bucket = [];
|
|
2302
2516
|
this.stateCache.set(hash, bucket);
|
|
2303
2517
|
}
|
|
2518
|
+
|
|
2519
|
+
// DFA already failed once - exit
|
|
2304
2520
|
if (this.failed) return null;
|
|
2305
2521
|
|
|
2306
2522
|
// Safety: prevent memory exhaustion from state explosion
|
|
@@ -2309,12 +2525,18 @@
|
|
|
2309
2525
|
this.stateCache.clear();
|
|
2310
2526
|
this.stateCount = 0;
|
|
2311
2527
|
this.startState = null;
|
|
2312
|
-
this.
|
|
2528
|
+
this.cacheClears++;
|
|
2529
|
+
|
|
2530
|
+
// If this regex causes continuous cache thrashing, permanently fall back to NFA
|
|
2531
|
+
// to avoid spending CPU cycles constantly rebuilding the DFA tree.
|
|
2532
|
+
if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
|
|
2533
|
+
this.failed = true;
|
|
2534
|
+
}
|
|
2313
2535
|
return null;
|
|
2314
2536
|
}
|
|
2315
2537
|
|
|
2316
2538
|
// State not found, create it and add to bucket
|
|
2317
|
-
const state = new DFAState(sortedPCs, closureResult.isMatch);
|
|
2539
|
+
const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
|
|
2318
2540
|
bucket.push(state);
|
|
2319
2541
|
this.stateCount++;
|
|
2320
2542
|
return state;
|
|
@@ -2334,76 +2556,808 @@
|
|
|
2334
2556
|
return state.nextMap.get(key);
|
|
2335
2557
|
}
|
|
2336
2558
|
}
|
|
2337
|
-
const nextPCs = [];
|
|
2338
|
-
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2339
|
-
const pc = state.nfaStates[i];
|
|
2340
|
-
const inst = this.prog.getInst(pc);
|
|
2341
|
-
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2342
|
-
nextPCs.push(inst.out);
|
|
2559
|
+
const nextPCs = [];
|
|
2560
|
+
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2561
|
+
const pc = state.nfaStates[i];
|
|
2562
|
+
const inst = this.prog.getInst(pc);
|
|
2563
|
+
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2564
|
+
nextPCs.push(inst.out);
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
2568
|
+
nextPCs.push(this.prog.start);
|
|
2569
|
+
}
|
|
2570
|
+
const nextState = this.getState(nextPCs);
|
|
2571
|
+
|
|
2572
|
+
// Cache the result
|
|
2573
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2574
|
+
state.nextAscii[charCode] = nextState;
|
|
2575
|
+
} else {
|
|
2576
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2577
|
+
state.nextMap.set(key, nextState);
|
|
2578
|
+
}
|
|
2579
|
+
return nextState;
|
|
2580
|
+
}
|
|
2581
|
+
|
|
2582
|
+
// The hot loop: Execute the Lazy DFA
|
|
2583
|
+
match(input, pos, anchor) {
|
|
2584
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2585
|
+
return false;
|
|
2586
|
+
}
|
|
2587
|
+
if (!this.startState) {
|
|
2588
|
+
this.startState = this.getState([this.prog.start]);
|
|
2589
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2590
|
+
}
|
|
2591
|
+
let endPos = input.endPos();
|
|
2592
|
+
let currentState = this.startState;
|
|
2593
|
+
if (currentState.isMatch) {
|
|
2594
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2595
|
+
if (pos === endPos) return true;
|
|
2596
|
+
} else {
|
|
2597
|
+
return true;
|
|
2598
|
+
}
|
|
2599
|
+
}
|
|
2600
|
+
let i = pos;
|
|
2601
|
+
while (i < endPos) {
|
|
2602
|
+
const r = input.step(i);
|
|
2603
|
+
const rune = r >> 3;
|
|
2604
|
+
const width = r & 7;
|
|
2605
|
+
|
|
2606
|
+
// prevent infinite loop on EOF
|
|
2607
|
+
if (width === 0) {
|
|
2608
|
+
break;
|
|
2609
|
+
}
|
|
2610
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2611
|
+
|
|
2612
|
+
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2613
|
+
if (currentState === null) return null;
|
|
2614
|
+
if (currentState.isMatch) {
|
|
2615
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2616
|
+
if (i + width === endPos) return true;
|
|
2617
|
+
} else {
|
|
2618
|
+
return true;
|
|
2619
|
+
}
|
|
2620
|
+
}
|
|
2621
|
+
|
|
2622
|
+
// If we hit a dead end, and anchored, fail early
|
|
2623
|
+
if (currentState.nfaStates.length === 0) {
|
|
2624
|
+
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2625
|
+
}
|
|
2626
|
+
i += width;
|
|
2627
|
+
}
|
|
2628
|
+
return false;
|
|
2629
|
+
}
|
|
2630
|
+
|
|
2631
|
+
// The hot loop for evaluating Multi-Pattern Sets
|
|
2632
|
+
matchSet(input, pos, anchor) {
|
|
2633
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2634
|
+
return [];
|
|
2635
|
+
}
|
|
2636
|
+
if (!this.startState) {
|
|
2637
|
+
this.startState = this.getState([this.prog.start]);
|
|
2638
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2639
|
+
}
|
|
2640
|
+
let endPos = input.endPos();
|
|
2641
|
+
let currentState = this.startState;
|
|
2642
|
+
const matches = new Set();
|
|
2643
|
+
const checkMatch = (state, currentPos) => {
|
|
2644
|
+
if (state.isMatch) {
|
|
2645
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2646
|
+
if (currentPos === endPos) {
|
|
2647
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2648
|
+
}
|
|
2649
|
+
} else {
|
|
2650
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2651
|
+
}
|
|
2652
|
+
}
|
|
2653
|
+
};
|
|
2654
|
+
checkMatch(currentState, pos);
|
|
2655
|
+
let i = pos;
|
|
2656
|
+
while (i < endPos) {
|
|
2657
|
+
const r = input.step(i);
|
|
2658
|
+
const rune = r >> 3;
|
|
2659
|
+
const width = r & 7;
|
|
2660
|
+
if (width === 0) break;
|
|
2661
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2662
|
+
if (currentState === null) return null; // Bailout to NFA
|
|
2663
|
+
|
|
2664
|
+
i += width;
|
|
2665
|
+
checkMatch(currentState, i);
|
|
2666
|
+
if (currentState.nfaStates.length === 0) {
|
|
2667
|
+
if (anchor !== RE2Flags.UNANCHORED) break;
|
|
2668
|
+
}
|
|
2669
|
+
}
|
|
2670
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2674
|
+
const VISITED_BITS = 32;
|
|
2675
|
+
const MAX_BACKTRACK_PROG = 500;
|
|
2676
|
+
const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
|
|
2677
|
+
const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
|
|
2678
|
+
|
|
2679
|
+
class BitState {
|
|
2680
|
+
constructor() {
|
|
2681
|
+
this.end = 0;
|
|
2682
|
+
this.cap = new Int32Array(0);
|
|
2683
|
+
this.matchcap = new Int32Array(0);
|
|
2684
|
+
this.ncap = 0;
|
|
2685
|
+
|
|
2686
|
+
// Parallel arrays acting as the backtrack job stack
|
|
2687
|
+
this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2688
|
+
this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
|
|
2689
|
+
this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2690
|
+
this.jobLen = 0;
|
|
2691
|
+
this.visited = new Uint32Array(0);
|
|
2692
|
+
}
|
|
2693
|
+
reset(prog, end, ncap) {
|
|
2694
|
+
this.end = end;
|
|
2695
|
+
this.jobLen = 0;
|
|
2696
|
+
this.ncap = ncap;
|
|
2697
|
+
|
|
2698
|
+
// Bitwise shift (>>> 5) instead of Math.floor( / 32)
|
|
2699
|
+
const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
|
|
2700
|
+
if (this.visited.length < visitedSize) {
|
|
2701
|
+
this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
|
|
2702
|
+
} else {
|
|
2703
|
+
this.visited.fill(0, 0, visitedSize);
|
|
2704
|
+
}
|
|
2705
|
+
if (this.cap.length < ncap) {
|
|
2706
|
+
// Must explicitly fill with -1 as Int32Array defaults to 0
|
|
2707
|
+
this.cap = new Int32Array(ncap).fill(-1);
|
|
2708
|
+
} else {
|
|
2709
|
+
this.cap.fill(-1, 0, ncap);
|
|
2710
|
+
}
|
|
2711
|
+
if (this.matchcap.length < ncap) {
|
|
2712
|
+
this.matchcap = new Int32Array(ncap).fill(-1);
|
|
2713
|
+
} else {
|
|
2714
|
+
this.matchcap.fill(-1, 0, ncap);
|
|
2715
|
+
}
|
|
2716
|
+
}
|
|
2717
|
+
shouldVisit(pc, pos) {
|
|
2718
|
+
const n = pc * (this.end + 1) + pos;
|
|
2719
|
+
const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
|
|
2720
|
+
const mask = 1 << (n & 31); // Equivalent to n % 32
|
|
2721
|
+
|
|
2722
|
+
if ((this.visited[idx] & mask) !== 0) {
|
|
2723
|
+
return false;
|
|
2724
|
+
}
|
|
2725
|
+
this.visited[idx] |= mask;
|
|
2726
|
+
return true;
|
|
2727
|
+
}
|
|
2728
|
+
push(re2, pc, pos, arg) {
|
|
2729
|
+
if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
|
|
2730
|
+
if (this.jobLen >= this.jobPc.length) {
|
|
2731
|
+
const newSize = this.jobPc.length * 2;
|
|
2732
|
+
const newPc = new Int32Array(newSize);
|
|
2733
|
+
newPc.set(this.jobPc);
|
|
2734
|
+
this.jobPc = newPc;
|
|
2735
|
+
const newArg = new Uint8Array(newSize);
|
|
2736
|
+
newArg.set(this.jobArg);
|
|
2737
|
+
this.jobArg = newArg;
|
|
2738
|
+
const newPos = new Int32Array(newSize);
|
|
2739
|
+
newPos.set(this.jobPos);
|
|
2740
|
+
this.jobPos = newPos;
|
|
2741
|
+
}
|
|
2742
|
+
this.jobPc[this.jobLen] = pc;
|
|
2743
|
+
this.jobArg[this.jobLen] = arg ? 1 : 0;
|
|
2744
|
+
this.jobPos[this.jobLen] = pos;
|
|
2745
|
+
this.jobLen++;
|
|
2746
|
+
}
|
|
2747
|
+
}
|
|
2748
|
+
tryBacktrack(re2, input, pc, pos, anchor) {
|
|
2749
|
+
const longest = re2.longest;
|
|
2750
|
+
this.push(re2, pc, pos, false);
|
|
2751
|
+
while (this.jobLen > 0) {
|
|
2752
|
+
this.jobLen--;
|
|
2753
|
+
let currentPc = this.jobPc[this.jobLen];
|
|
2754
|
+
let arg = this.jobArg[this.jobLen] === 1;
|
|
2755
|
+
let currentPos = this.jobPos[this.jobLen];
|
|
2756
|
+
let skipShouldVisit = true;
|
|
2757
|
+
while (true) {
|
|
2758
|
+
if (!skipShouldVisit) {
|
|
2759
|
+
if (!this.shouldVisit(currentPc, currentPos)) {
|
|
2760
|
+
break;
|
|
2761
|
+
}
|
|
2762
|
+
}
|
|
2763
|
+
skipShouldVisit = false;
|
|
2764
|
+
const inst = re2.prog.getInst(currentPc);
|
|
2765
|
+
switch (inst.op) {
|
|
2766
|
+
case Inst.FAIL:
|
|
2767
|
+
{
|
|
2768
|
+
throw new RE2JSInternalException('unexpected InstFail');
|
|
2769
|
+
}
|
|
2770
|
+
case Inst.ALT:
|
|
2771
|
+
{
|
|
2772
|
+
if (arg) {
|
|
2773
|
+
arg = false;
|
|
2774
|
+
currentPc = inst.arg;
|
|
2775
|
+
continue;
|
|
2776
|
+
} else {
|
|
2777
|
+
this.push(re2, currentPc, currentPos, true);
|
|
2778
|
+
currentPc = inst.out;
|
|
2779
|
+
continue;
|
|
2780
|
+
}
|
|
2781
|
+
}
|
|
2782
|
+
case Inst.ALT_MATCH:
|
|
2783
|
+
{
|
|
2784
|
+
const outInst = re2.prog.getInst(inst.out);
|
|
2785
|
+
if (Inst.isRuneOp(outInst.op)) {
|
|
2786
|
+
this.push(re2, inst.arg, currentPos, false);
|
|
2787
|
+
currentPc = inst.out;
|
|
2788
|
+
continue;
|
|
2789
|
+
}
|
|
2790
|
+
this.push(re2, inst.out, this.end, false);
|
|
2791
|
+
currentPc = inst.arg;
|
|
2792
|
+
continue;
|
|
2793
|
+
}
|
|
2794
|
+
case Inst.RUNE:
|
|
2795
|
+
{
|
|
2796
|
+
const r = input.step(currentPos);
|
|
2797
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2798
|
+
if (!inst.matchRune(r >> 3)) break;
|
|
2799
|
+
currentPos += r & 7;
|
|
2800
|
+
currentPc = inst.out;
|
|
2801
|
+
continue;
|
|
2802
|
+
}
|
|
2803
|
+
case Inst.RUNE1:
|
|
2804
|
+
{
|
|
2805
|
+
const r = input.step(currentPos);
|
|
2806
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2807
|
+
if (r >> 3 !== inst.runes[0]) break;
|
|
2808
|
+
currentPos += r & 7;
|
|
2809
|
+
currentPc = inst.out;
|
|
2810
|
+
continue;
|
|
2811
|
+
}
|
|
2812
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2813
|
+
{
|
|
2814
|
+
const r = input.step(currentPos);
|
|
2815
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2816
|
+
if (r >> 3 === 10) break;
|
|
2817
|
+
currentPos += r & 7;
|
|
2818
|
+
currentPc = inst.out;
|
|
2819
|
+
continue;
|
|
2820
|
+
}
|
|
2821
|
+
case Inst.RUNE_ANY:
|
|
2822
|
+
{
|
|
2823
|
+
const r = input.step(currentPos);
|
|
2824
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2825
|
+
currentPos += r & 7;
|
|
2826
|
+
currentPc = inst.out;
|
|
2827
|
+
continue;
|
|
2828
|
+
}
|
|
2829
|
+
case Inst.CAPTURE:
|
|
2830
|
+
{
|
|
2831
|
+
if (arg) {
|
|
2832
|
+
this.cap[inst.arg] = currentPos;
|
|
2833
|
+
break;
|
|
2834
|
+
} else {
|
|
2835
|
+
if (inst.arg < this.ncap) {
|
|
2836
|
+
this.push(re2, currentPc, this.cap[inst.arg], true);
|
|
2837
|
+
this.cap[inst.arg] = currentPos;
|
|
2838
|
+
}
|
|
2839
|
+
currentPc = inst.out;
|
|
2840
|
+
continue;
|
|
2841
|
+
}
|
|
2842
|
+
}
|
|
2843
|
+
case Inst.EMPTY_WIDTH:
|
|
2844
|
+
{
|
|
2845
|
+
const flag = input.context(currentPos);
|
|
2846
|
+
if ((inst.arg & ~flag) !== 0) break;
|
|
2847
|
+
currentPc = inst.out;
|
|
2848
|
+
continue;
|
|
2849
|
+
}
|
|
2850
|
+
case Inst.NOP:
|
|
2851
|
+
{
|
|
2852
|
+
currentPc = inst.out;
|
|
2853
|
+
continue;
|
|
2854
|
+
}
|
|
2855
|
+
case Inst.MATCH:
|
|
2856
|
+
{
|
|
2857
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
|
|
2858
|
+
break;
|
|
2859
|
+
}
|
|
2860
|
+
if (this.ncap === 0) return true;
|
|
2861
|
+
if (this.ncap > 1) {
|
|
2862
|
+
this.cap[1] = currentPos;
|
|
2863
|
+
}
|
|
2864
|
+
const old = this.matchcap[1];
|
|
2865
|
+
if (old === -1 || longest && currentPos > 0 && currentPos > old) {
|
|
2866
|
+
this.matchcap.set(this.cap);
|
|
2867
|
+
}
|
|
2868
|
+
if (!longest) return true;
|
|
2869
|
+
if (currentPos === this.end) return true;
|
|
2870
|
+
break;
|
|
2871
|
+
}
|
|
2872
|
+
default:
|
|
2873
|
+
{
|
|
2874
|
+
throw new RE2JSInternalException('bad inst');
|
|
2875
|
+
}
|
|
2876
|
+
}
|
|
2877
|
+
break;
|
|
2878
|
+
}
|
|
2879
|
+
}
|
|
2880
|
+
return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
const bitStatePool = [];
|
|
2884
|
+
class Backtracker {
|
|
2885
|
+
static shouldBacktrack(prog) {
|
|
2886
|
+
return prog.numInst() <= MAX_BACKTRACK_PROG;
|
|
2887
|
+
}
|
|
2888
|
+
static maxBitStateLen(prog) {
|
|
2889
|
+
if (!Backtracker.shouldBacktrack(prog)) {
|
|
2890
|
+
return 0;
|
|
2891
|
+
}
|
|
2892
|
+
return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
|
|
2893
|
+
}
|
|
2894
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
2895
|
+
const startCond = re2.cond;
|
|
2896
|
+
if (startCond === Utils.EMPTY_ALL) {
|
|
2897
|
+
return null;
|
|
2898
|
+
}
|
|
2899
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2900
|
+
return null;
|
|
2901
|
+
}
|
|
2902
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
|
|
2903
|
+
return null;
|
|
2904
|
+
}
|
|
2905
|
+
const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
|
|
2906
|
+
const end = input.endPos();
|
|
2907
|
+
b.reset(re2.prog, end, ncap);
|
|
2908
|
+
let matched = false;
|
|
2909
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2910
|
+
if (b.ncap > 0) {
|
|
2911
|
+
b.cap[0] = pos;
|
|
2912
|
+
}
|
|
2913
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2914
|
+
matched = true;
|
|
2915
|
+
}
|
|
2916
|
+
} else {
|
|
2917
|
+
let width = -1;
|
|
2918
|
+
for (; pos <= end && width !== 0; pos += width) {
|
|
2919
|
+
if (re2.prefix.length > 0) {
|
|
2920
|
+
const advance = input.index(re2, pos);
|
|
2921
|
+
if (advance < 0) {
|
|
2922
|
+
break;
|
|
2923
|
+
}
|
|
2924
|
+
pos += advance;
|
|
2925
|
+
}
|
|
2926
|
+
if (b.ncap > 0) {
|
|
2927
|
+
b.cap[0] = pos;
|
|
2928
|
+
}
|
|
2929
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2930
|
+
matched = true;
|
|
2931
|
+
break;
|
|
2932
|
+
}
|
|
2933
|
+
const r = input.step(pos);
|
|
2934
|
+
width = r === MachineInputBase.EOF() ? 0 : r & 7;
|
|
2935
|
+
}
|
|
2936
|
+
}
|
|
2937
|
+
if (!matched) {
|
|
2938
|
+
bitStatePool.push(b);
|
|
2939
|
+
return null;
|
|
2940
|
+
}
|
|
2941
|
+
|
|
2942
|
+
// Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
|
|
2943
|
+
const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
|
|
2944
|
+
bitStatePool.push(b);
|
|
2945
|
+
return result;
|
|
2946
|
+
}
|
|
2947
|
+
}
|
|
2948
|
+
|
|
2949
|
+
class QueueOnePass {
|
|
2950
|
+
constructor(size) {
|
|
2951
|
+
this.sparse = new Uint32Array(size);
|
|
2952
|
+
this.dense = new Uint32Array(size);
|
|
2953
|
+
this.size = 0;
|
|
2954
|
+
this.nextIndex = 0;
|
|
2955
|
+
}
|
|
2956
|
+
empty() {
|
|
2957
|
+
return this.nextIndex >= this.size;
|
|
2958
|
+
}
|
|
2959
|
+
next() {
|
|
2960
|
+
return this.dense[this.nextIndex++];
|
|
2961
|
+
}
|
|
2962
|
+
clear() {
|
|
2963
|
+
this.size = 0;
|
|
2964
|
+
this.nextIndex = 0;
|
|
2965
|
+
}
|
|
2966
|
+
contains(u) {
|
|
2967
|
+
return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
|
|
2968
|
+
}
|
|
2969
|
+
insert(u) {
|
|
2970
|
+
if (!this.contains(u)) this.insertNew(u);
|
|
2971
|
+
}
|
|
2972
|
+
insertNew(u) {
|
|
2973
|
+
if (u >= this.sparse.length) return;
|
|
2974
|
+
this.sparse[u] = this.size;
|
|
2975
|
+
this.dense[this.size] = u;
|
|
2976
|
+
this.size++;
|
|
2977
|
+
}
|
|
2978
|
+
}
|
|
2979
|
+
const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
|
|
2980
|
+
const leftLen = leftRunes.length;
|
|
2981
|
+
const rightLen = rightRunes.length;
|
|
2982
|
+
let lx = 0,
|
|
2983
|
+
rx = 0;
|
|
2984
|
+
const merged = [];
|
|
2985
|
+
const next = [];
|
|
2986
|
+
let ok = true;
|
|
2987
|
+
let ix = -1;
|
|
2988
|
+
const extend = isLeft => {
|
|
2989
|
+
const newArray = isLeft ? leftRunes : rightRunes;
|
|
2990
|
+
const low = isLeft ? lx : rx;
|
|
2991
|
+
const pc = isLeft ? leftPC : rightPC;
|
|
2992
|
+
if (ix > 0 && newArray[low] <= merged[ix]) return false;
|
|
2993
|
+
merged.push(newArray[low], newArray[low + 1]);
|
|
2994
|
+
if (isLeft) lx += 2;else rx += 2;
|
|
2995
|
+
ix += 2;
|
|
2996
|
+
next.push(pc);
|
|
2997
|
+
return true;
|
|
2998
|
+
};
|
|
2999
|
+
while (lx < leftLen || rx < rightLen) {
|
|
3000
|
+
if (rx >= rightLen) {
|
|
3001
|
+
ok = extend(true);
|
|
3002
|
+
} else if (lx >= leftLen) {
|
|
3003
|
+
ok = extend(false);
|
|
3004
|
+
} else if (rightRunes[rx] < leftRunes[lx]) {
|
|
3005
|
+
ok = extend(false);
|
|
3006
|
+
} else {
|
|
3007
|
+
ok = extend(true);
|
|
3008
|
+
}
|
|
3009
|
+
if (!ok) return null;
|
|
3010
|
+
}
|
|
3011
|
+
return {
|
|
3012
|
+
merged,
|
|
3013
|
+
next
|
|
3014
|
+
};
|
|
3015
|
+
};
|
|
3016
|
+
class OnePassProg {
|
|
3017
|
+
constructor(prog) {
|
|
3018
|
+
this.start = prog.start;
|
|
3019
|
+
this.numCap = prog.numCap;
|
|
3020
|
+
this.inst = new Array(prog.inst.length);
|
|
3021
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3022
|
+
const orig = prog.inst[i];
|
|
3023
|
+
const inst = new Inst(orig.op);
|
|
3024
|
+
inst.out = orig.out;
|
|
3025
|
+
inst.arg = orig.arg;
|
|
3026
|
+
inst.runes = orig.runes ? orig.runes.slice() : [];
|
|
3027
|
+
inst.next = null;
|
|
3028
|
+
this.inst[i] = inst;
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
}
|
|
3032
|
+
const onePassCopy = prog => {
|
|
3033
|
+
const p = new OnePassProg(prog);
|
|
3034
|
+
|
|
3035
|
+
// Rewrites one or more common Prog constructs that enable some otherwise
|
|
3036
|
+
// non-onepass Progs to be onepass.
|
|
3037
|
+
for (let pc = 0; pc < p.inst.length; pc++) {
|
|
3038
|
+
const inst = p.inst[pc];
|
|
3039
|
+
if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
|
|
3040
|
+
let pAOther = 'out';
|
|
3041
|
+
let pAAlt = 'arg';
|
|
3042
|
+
let instAlt = p.inst[inst[pAAlt]];
|
|
3043
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
|
|
3044
|
+
pAOther = 'arg';
|
|
3045
|
+
pAAlt = 'out';
|
|
3046
|
+
instAlt = p.inst[inst[pAAlt]];
|
|
3047
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
|
|
3048
|
+
}
|
|
3049
|
+
const instOther = p.inst[inst[pAOther]];
|
|
3050
|
+
if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
|
|
3051
|
+
let pBAlt = 'out';
|
|
3052
|
+
let pBOther = 'arg';
|
|
3053
|
+
let patch = false;
|
|
3054
|
+
if (instAlt.out === pc) {
|
|
3055
|
+
patch = true;
|
|
3056
|
+
} else if (instAlt.arg === pc) {
|
|
3057
|
+
patch = true;
|
|
3058
|
+
pBAlt = 'arg';
|
|
3059
|
+
pBOther = 'out';
|
|
3060
|
+
}
|
|
3061
|
+
if (patch) instAlt[pBAlt] = inst[pAOther];
|
|
3062
|
+
if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
|
|
3063
|
+
}
|
|
3064
|
+
return p;
|
|
3065
|
+
};
|
|
3066
|
+
const makeOnePass = p => {
|
|
3067
|
+
if (p.inst.length >= 1000) return null;
|
|
3068
|
+
const instQueue = new QueueOnePass(p.inst.length);
|
|
3069
|
+
const visitQueue = new QueueOnePass(p.inst.length);
|
|
3070
|
+
const onePassRunes = new Array(p.inst.length);
|
|
3071
|
+
const m = new Array(p.inst.length).fill(false);
|
|
3072
|
+
const check = pc => {
|
|
3073
|
+
let ok = true;
|
|
3074
|
+
const inst = p.inst[pc];
|
|
3075
|
+
if (visitQueue.contains(pc)) return true;
|
|
3076
|
+
visitQueue.insert(pc);
|
|
3077
|
+
switch (inst.op) {
|
|
3078
|
+
case Inst.ALT:
|
|
3079
|
+
case Inst.ALT_MATCH:
|
|
3080
|
+
{
|
|
3081
|
+
ok = check(inst.out) && check(inst.arg);
|
|
3082
|
+
let matchOut = m[inst.out];
|
|
3083
|
+
let matchArg = m[inst.arg];
|
|
3084
|
+
if (matchOut && matchArg) return false;
|
|
3085
|
+
if (matchArg) {
|
|
3086
|
+
const tempOut = inst.out;
|
|
3087
|
+
inst.out = inst.arg;
|
|
3088
|
+
inst.arg = tempOut;
|
|
3089
|
+
const tempMatch = matchOut;
|
|
3090
|
+
matchOut = matchArg;
|
|
3091
|
+
matchArg = tempMatch;
|
|
3092
|
+
}
|
|
3093
|
+
if (matchOut) {
|
|
3094
|
+
m[pc] = true;
|
|
3095
|
+
inst.op = Inst.ALT_MATCH;
|
|
3096
|
+
}
|
|
3097
|
+
const leftRunes = onePassRunes[inst.out] || [];
|
|
3098
|
+
const rightRunes = onePassRunes[inst.arg] || [];
|
|
3099
|
+
const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
|
|
3100
|
+
if (!mergeRes) return false;
|
|
3101
|
+
onePassRunes[pc] = mergeRes.merged;
|
|
3102
|
+
inst.next = new Uint32Array(mergeRes.next);
|
|
3103
|
+
break;
|
|
3104
|
+
}
|
|
3105
|
+
case Inst.CAPTURE:
|
|
3106
|
+
case Inst.EMPTY_WIDTH:
|
|
3107
|
+
case Inst.NOP:
|
|
3108
|
+
{
|
|
3109
|
+
ok = check(inst.out);
|
|
3110
|
+
m[pc] = m[inst.out];
|
|
3111
|
+
onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
|
|
3112
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3113
|
+
break;
|
|
3114
|
+
}
|
|
3115
|
+
case Inst.MATCH:
|
|
3116
|
+
case Inst.FAIL:
|
|
3117
|
+
{
|
|
3118
|
+
m[pc] = inst.op === Inst.MATCH;
|
|
3119
|
+
break;
|
|
3120
|
+
}
|
|
3121
|
+
case Inst.RUNE:
|
|
3122
|
+
{
|
|
3123
|
+
m[pc] = false;
|
|
3124
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3125
|
+
instQueue.insert(inst.out);
|
|
3126
|
+
if (!inst.runes || inst.runes.length === 0) {
|
|
3127
|
+
onePassRunes[pc] = [];
|
|
3128
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3129
|
+
break;
|
|
3130
|
+
}
|
|
3131
|
+
let runes = [];
|
|
3132
|
+
if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3133
|
+
const r0 = inst.runes[0];
|
|
3134
|
+
runes.push(r0, r0);
|
|
3135
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3136
|
+
runes.push(r1, r1);
|
|
3137
|
+
}
|
|
3138
|
+
runes.sort((a, b) => a - b);
|
|
3139
|
+
} else {
|
|
3140
|
+
runes.push(...inst.runes);
|
|
3141
|
+
}
|
|
3142
|
+
onePassRunes[pc] = runes;
|
|
3143
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3144
|
+
inst.op = Inst.RUNE;
|
|
3145
|
+
break;
|
|
3146
|
+
}
|
|
3147
|
+
case Inst.RUNE1:
|
|
3148
|
+
{
|
|
3149
|
+
m[pc] = false;
|
|
3150
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3151
|
+
instQueue.insert(inst.out);
|
|
3152
|
+
let runes = [];
|
|
3153
|
+
if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3154
|
+
const r0 = inst.runes[0];
|
|
3155
|
+
runes.push(r0, r0);
|
|
3156
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3157
|
+
runes.push(r1, r1);
|
|
3158
|
+
}
|
|
3159
|
+
runes.sort((a, b) => a - b);
|
|
3160
|
+
} else {
|
|
3161
|
+
runes.push(inst.runes[0], inst.runes[0]);
|
|
3162
|
+
}
|
|
3163
|
+
onePassRunes[pc] = runes;
|
|
3164
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3165
|
+
inst.op = Inst.RUNE;
|
|
3166
|
+
break;
|
|
3167
|
+
}
|
|
3168
|
+
case Inst.RUNE_ANY:
|
|
3169
|
+
{
|
|
3170
|
+
m[pc] = false;
|
|
3171
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3172
|
+
instQueue.insert(inst.out);
|
|
3173
|
+
onePassRunes[pc] = [0, Unicode.MAX_RUNE];
|
|
3174
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3175
|
+
break;
|
|
3176
|
+
}
|
|
3177
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3178
|
+
{
|
|
3179
|
+
m[pc] = false;
|
|
3180
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3181
|
+
instQueue.insert(inst.out);
|
|
3182
|
+
onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
|
|
3183
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3184
|
+
break;
|
|
3185
|
+
}
|
|
3186
|
+
}
|
|
3187
|
+
return ok;
|
|
3188
|
+
};
|
|
3189
|
+
instQueue.clear();
|
|
3190
|
+
instQueue.insert(p.start);
|
|
3191
|
+
while (!instQueue.empty()) {
|
|
3192
|
+
visitQueue.clear();
|
|
3193
|
+
const pc = instQueue.next();
|
|
3194
|
+
if (!check(pc)) return null;
|
|
3195
|
+
}
|
|
3196
|
+
for (let i = 0; i < p.inst.length; i++) {
|
|
3197
|
+
if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
|
|
3198
|
+
}
|
|
3199
|
+
return p;
|
|
3200
|
+
};
|
|
3201
|
+
const cleanupOnePass = (p, original) => {
|
|
3202
|
+
for (let ix = 0; ix < original.inst.length; ix++) {
|
|
3203
|
+
const instOriginal = original.inst[ix];
|
|
3204
|
+
switch (instOriginal.op) {
|
|
3205
|
+
case Inst.ALT:
|
|
3206
|
+
case Inst.ALT_MATCH:
|
|
3207
|
+
case Inst.RUNE:
|
|
3208
|
+
break;
|
|
3209
|
+
case Inst.CAPTURE:
|
|
3210
|
+
case Inst.EMPTY_WIDTH:
|
|
3211
|
+
case Inst.NOP:
|
|
3212
|
+
case Inst.MATCH:
|
|
3213
|
+
case Inst.FAIL:
|
|
3214
|
+
p.inst[ix].next = null;
|
|
3215
|
+
break;
|
|
3216
|
+
case Inst.RUNE1:
|
|
3217
|
+
case Inst.RUNE_ANY:
|
|
3218
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3219
|
+
p.inst[ix].next = null;
|
|
3220
|
+
p.inst[ix].op = instOriginal.op;
|
|
3221
|
+
p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
|
|
3222
|
+
break;
|
|
3223
|
+
}
|
|
3224
|
+
}
|
|
3225
|
+
};
|
|
3226
|
+
class OnePass {
|
|
3227
|
+
static compile(prog) {
|
|
3228
|
+
if (prog.start === 0) return null;
|
|
3229
|
+
const startInst = prog.inst[prog.start];
|
|
3230
|
+
// onepass regexps must be strictly anchored
|
|
3231
|
+
if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
|
|
3232
|
+
return null;
|
|
3233
|
+
}
|
|
3234
|
+
let hasAlt = false;
|
|
3235
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3236
|
+
if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
|
|
3237
|
+
hasAlt = true;
|
|
3238
|
+
break;
|
|
2343
3239
|
}
|
|
2344
3240
|
}
|
|
2345
|
-
|
|
2346
|
-
|
|
3241
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3242
|
+
const inst = prog.inst[i];
|
|
3243
|
+
const opOut = prog.inst[inst.out].op;
|
|
3244
|
+
switch (inst.op) {
|
|
3245
|
+
case Inst.ALT:
|
|
3246
|
+
case Inst.ALT_MATCH:
|
|
3247
|
+
if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
|
|
3248
|
+
return null;
|
|
3249
|
+
}
|
|
3250
|
+
break;
|
|
3251
|
+
case Inst.EMPTY_WIDTH:
|
|
3252
|
+
if (opOut === Inst.MATCH) {
|
|
3253
|
+
if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
|
|
3254
|
+
continue;
|
|
3255
|
+
}
|
|
3256
|
+
return null;
|
|
3257
|
+
}
|
|
3258
|
+
break;
|
|
3259
|
+
default:
|
|
3260
|
+
if (opOut === Inst.MATCH && hasAlt) {
|
|
3261
|
+
return null;
|
|
3262
|
+
}
|
|
3263
|
+
break;
|
|
3264
|
+
}
|
|
2347
3265
|
}
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
state.nextAscii[charCode] = nextState;
|
|
2353
|
-
} else {
|
|
2354
|
-
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2355
|
-
state.nextMap.set(key, nextState);
|
|
3266
|
+
let p = onePassCopy(prog);
|
|
3267
|
+
p = makeOnePass(p);
|
|
3268
|
+
if (p !== null) {
|
|
3269
|
+
cleanupOnePass(p, prog);
|
|
2356
3270
|
}
|
|
2357
|
-
return
|
|
3271
|
+
return p;
|
|
2358
3272
|
}
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
if (
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
let
|
|
2370
|
-
let
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
3273
|
+
static next(inst, r) {
|
|
3274
|
+
const nextIdx = inst.matchRunePos(r);
|
|
3275
|
+
if (nextIdx >= 0) return inst.next[nextIdx];
|
|
3276
|
+
if (inst.op === Inst.ALT_MATCH) return inst.out;
|
|
3277
|
+
return 0; // fail
|
|
3278
|
+
}
|
|
3279
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
3280
|
+
const onepass = re2.onepass;
|
|
3281
|
+
if (!onepass) return null;
|
|
3282
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
3283
|
+
let matched = false;
|
|
3284
|
+
let r = input.step(pos);
|
|
3285
|
+
let rune = r >> 3;
|
|
3286
|
+
let width = r & 7;
|
|
3287
|
+
let r1 = MachineInputBase.EOF();
|
|
3288
|
+
let rune1 = -1;
|
|
3289
|
+
let width1 = 0;
|
|
3290
|
+
if (r !== MachineInputBase.EOF()) {
|
|
3291
|
+
r1 = input.step(pos + width);
|
|
3292
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3293
|
+
rune1 = r1 >> 3;
|
|
3294
|
+
width1 = r1 & 7;
|
|
2376
3295
|
}
|
|
2377
3296
|
}
|
|
2378
|
-
let
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
3297
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
3298
|
+
let pc = onepass.start;
|
|
3299
|
+
let inst;
|
|
3300
|
+
while (true) {
|
|
3301
|
+
inst = onepass.inst[pc];
|
|
3302
|
+
pc = inst.out;
|
|
3303
|
+
switch (inst.op) {
|
|
3304
|
+
case Inst.MATCH:
|
|
3305
|
+
{
|
|
3306
|
+
matched = true;
|
|
3307
|
+
if (matchcap.length > 0) {
|
|
3308
|
+
matchcap[0] = 0;
|
|
3309
|
+
matchcap[1] = pos;
|
|
3310
|
+
}
|
|
3311
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
3312
|
+
}
|
|
3313
|
+
case Inst.RUNE:
|
|
3314
|
+
if (!inst.matchRune(rune)) return null;
|
|
3315
|
+
break;
|
|
3316
|
+
case Inst.RUNE1:
|
|
3317
|
+
if (rune !== inst.runes[0]) return null;
|
|
3318
|
+
break;
|
|
3319
|
+
case Inst.RUNE_ANY:
|
|
3320
|
+
break;
|
|
3321
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3322
|
+
if (rune === 10) return null;
|
|
3323
|
+
break;
|
|
3324
|
+
case Inst.ALT:
|
|
3325
|
+
case Inst.ALT_MATCH:
|
|
3326
|
+
pc = OnePass.next(inst, rune);
|
|
3327
|
+
continue;
|
|
3328
|
+
case Inst.FAIL:
|
|
3329
|
+
return null;
|
|
3330
|
+
case Inst.NOP:
|
|
3331
|
+
continue;
|
|
3332
|
+
case Inst.EMPTY_WIDTH:
|
|
3333
|
+
if ((inst.arg & ~flag) !== 0) return null;
|
|
3334
|
+
continue;
|
|
3335
|
+
case Inst.CAPTURE:
|
|
3336
|
+
if (inst.arg < matchcap.length) {
|
|
3337
|
+
matchcap[inst.arg] = pos;
|
|
3338
|
+
}
|
|
3339
|
+
continue;
|
|
3340
|
+
default:
|
|
3341
|
+
throw new RE2JSInternalException('bad inst');
|
|
2387
3342
|
}
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
3343
|
+
if (width === 0) break;
|
|
3344
|
+
flag = Utils.emptyOpContext(rune, rune1);
|
|
3345
|
+
pos += width;
|
|
3346
|
+
rune = rune1;
|
|
3347
|
+
width = width1;
|
|
3348
|
+
if (rune !== -1) {
|
|
3349
|
+
r1 = input.step(pos + width);
|
|
3350
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3351
|
+
rune1 = r1 >> 3;
|
|
3352
|
+
width1 = r1 & 7;
|
|
2395
3353
|
} else {
|
|
2396
|
-
|
|
3354
|
+
rune1 = -1;
|
|
3355
|
+
width1 = 0;
|
|
2397
3356
|
}
|
|
2398
3357
|
}
|
|
2399
|
-
|
|
2400
|
-
// If we hit a dead end, and anchored, fail early
|
|
2401
|
-
if (currentState.nfaStates.length === 0) {
|
|
2402
|
-
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2403
|
-
}
|
|
2404
|
-
i += width;
|
|
2405
3358
|
}
|
|
2406
|
-
return
|
|
3359
|
+
if (!matched) return null;
|
|
3360
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
2407
3361
|
}
|
|
2408
3362
|
}
|
|
2409
3363
|
|
|
@@ -2488,7 +3442,7 @@
|
|
|
2488
3442
|
this.max = 0; // max for REPEAT
|
|
2489
3443
|
this.cap = 0; // capturing index, for CAPTURE
|
|
2490
3444
|
this.name = null; // capturing name, for CAPTURE
|
|
2491
|
-
this.namedGroups =
|
|
3445
|
+
this.namedGroups = Object.create(null); // map of group name -> capturing index
|
|
2492
3446
|
}
|
|
2493
3447
|
reinit() {
|
|
2494
3448
|
this.flags = 0;
|
|
@@ -2498,7 +3452,7 @@
|
|
|
2498
3452
|
this.min = 0;
|
|
2499
3453
|
this.max = 0;
|
|
2500
3454
|
this.name = null;
|
|
2501
|
-
this.namedGroups =
|
|
3455
|
+
this.namedGroups = Object.create(null);
|
|
2502
3456
|
}
|
|
2503
3457
|
toString() {
|
|
2504
3458
|
return this.appendTo();
|
|
@@ -2758,6 +3712,188 @@
|
|
|
2758
3712
|
}
|
|
2759
3713
|
}
|
|
2760
3714
|
|
|
3715
|
+
class Prefilter {
|
|
3716
|
+
static Type = {
|
|
3717
|
+
NONE: 0,
|
|
3718
|
+
EXACT: 1,
|
|
3719
|
+
AND: 2,
|
|
3720
|
+
OR: 3
|
|
3721
|
+
};
|
|
3722
|
+
constructor(type) {
|
|
3723
|
+
this.type = type;
|
|
3724
|
+
this.subs = [];
|
|
3725
|
+
this.str = '';
|
|
3726
|
+
this.bytes = null;
|
|
3727
|
+
}
|
|
3728
|
+
eval(input, pos) {
|
|
3729
|
+
switch (this.type) {
|
|
3730
|
+
case Prefilter.Type.NONE:
|
|
3731
|
+
return true;
|
|
3732
|
+
case Prefilter.Type.EXACT:
|
|
3733
|
+
return input.hasString(this, pos);
|
|
3734
|
+
case Prefilter.Type.AND:
|
|
3735
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3736
|
+
if (!this.subs[i].eval(input, pos)) return false;
|
|
3737
|
+
}
|
|
3738
|
+
return true;
|
|
3739
|
+
case Prefilter.Type.OR:
|
|
3740
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3741
|
+
if (this.subs[i].eval(input, pos)) return true;
|
|
3742
|
+
}
|
|
3743
|
+
return false;
|
|
3744
|
+
default:
|
|
3745
|
+
return true;
|
|
3746
|
+
}
|
|
3747
|
+
}
|
|
3748
|
+
}
|
|
3749
|
+
class PrefilterTree {
|
|
3750
|
+
static build(re) {
|
|
3751
|
+
const pf = PrefilterTree.fromRegexp(re);
|
|
3752
|
+
return PrefilterTree.simplify(pf);
|
|
3753
|
+
}
|
|
3754
|
+
static fromRegexp(re) {
|
|
3755
|
+
if (!re) return new Prefilter(Prefilter.Type.NONE);
|
|
3756
|
+
switch (re.op) {
|
|
3757
|
+
case Regexp.Op.NO_MATCH:
|
|
3758
|
+
case Regexp.Op.EMPTY_MATCH:
|
|
3759
|
+
case Regexp.Op.BEGIN_LINE:
|
|
3760
|
+
case Regexp.Op.END_LINE:
|
|
3761
|
+
case Regexp.Op.BEGIN_TEXT:
|
|
3762
|
+
case Regexp.Op.END_TEXT:
|
|
3763
|
+
case Regexp.Op.WORD_BOUNDARY:
|
|
3764
|
+
case Regexp.Op.NO_WORD_BOUNDARY:
|
|
3765
|
+
case Regexp.Op.CHAR_CLASS:
|
|
3766
|
+
case Regexp.Op.ANY_CHAR_NOT_NL:
|
|
3767
|
+
case Regexp.Op.ANY_CHAR:
|
|
3768
|
+
{
|
|
3769
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3770
|
+
}
|
|
3771
|
+
case Regexp.Op.LITERAL:
|
|
3772
|
+
{
|
|
3773
|
+
if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
|
|
3774
|
+
// Skip case-folded literals for simplicity
|
|
3775
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3776
|
+
}
|
|
3777
|
+
const pf = new Prefilter(Prefilter.Type.EXACT);
|
|
3778
|
+
let str = '';
|
|
3779
|
+
for (let i = 0; i < re.runes.length; i++) {
|
|
3780
|
+
str += String.fromCodePoint(re.runes[i]);
|
|
3781
|
+
}
|
|
3782
|
+
pf.str = str;
|
|
3783
|
+
pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
|
|
3784
|
+
return pf;
|
|
3785
|
+
}
|
|
3786
|
+
case Regexp.Op.CAPTURE:
|
|
3787
|
+
case Regexp.Op.PLUS:
|
|
3788
|
+
{
|
|
3789
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3790
|
+
}
|
|
3791
|
+
case Regexp.Op.REPEAT:
|
|
3792
|
+
{
|
|
3793
|
+
if (re.min >= 1) {
|
|
3794
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3795
|
+
}
|
|
3796
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3797
|
+
}
|
|
3798
|
+
case Regexp.Op.CONCAT:
|
|
3799
|
+
{
|
|
3800
|
+
const pf = new Prefilter(Prefilter.Type.AND);
|
|
3801
|
+
for (const sub of re.subs) {
|
|
3802
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3803
|
+
}
|
|
3804
|
+
return pf;
|
|
3805
|
+
}
|
|
3806
|
+
case Regexp.Op.ALTERNATE:
|
|
3807
|
+
{
|
|
3808
|
+
const pf = new Prefilter(Prefilter.Type.OR);
|
|
3809
|
+
for (const sub of re.subs) {
|
|
3810
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3811
|
+
}
|
|
3812
|
+
return pf;
|
|
3813
|
+
}
|
|
3814
|
+
default:
|
|
3815
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3816
|
+
}
|
|
3817
|
+
}
|
|
3818
|
+
static simplify(pf) {
|
|
3819
|
+
if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
|
|
3820
|
+
return pf;
|
|
3821
|
+
}
|
|
3822
|
+
if (pf.type === Prefilter.Type.AND) {
|
|
3823
|
+
const newSubs = [];
|
|
3824
|
+
for (const sub of pf.subs) {
|
|
3825
|
+
const s = PrefilterTree.simplify(sub);
|
|
3826
|
+
if (s.type !== Prefilter.Type.NONE) {
|
|
3827
|
+
if (s.type === Prefilter.Type.AND) {
|
|
3828
|
+
newSubs.push(...s.subs);
|
|
3829
|
+
} else {
|
|
3830
|
+
newSubs.push(s);
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
}
|
|
3834
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3835
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3836
|
+
pf.subs = newSubs;
|
|
3837
|
+
return pf;
|
|
3838
|
+
}
|
|
3839
|
+
if (pf.type === Prefilter.Type.OR) {
|
|
3840
|
+
const newSubs = [];
|
|
3841
|
+
for (const sub of pf.subs) {
|
|
3842
|
+
const s = PrefilterTree.simplify(sub);
|
|
3843
|
+
if (s.type === Prefilter.Type.NONE) {
|
|
3844
|
+
// If any branch of an OR has no requirements, the whole OR has no requirements
|
|
3845
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3846
|
+
}
|
|
3847
|
+
if (s.type === Prefilter.Type.OR) {
|
|
3848
|
+
newSubs.push(...s.subs);
|
|
3849
|
+
} else {
|
|
3850
|
+
newSubs.push(s);
|
|
3851
|
+
}
|
|
3852
|
+
}
|
|
3853
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3854
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3855
|
+
|
|
3856
|
+
// De-duplicate EXACT branches
|
|
3857
|
+
const seen = new Set();
|
|
3858
|
+
const uniqueSubs = [];
|
|
3859
|
+
for (const sub of newSubs) {
|
|
3860
|
+
if (sub.type === Prefilter.Type.EXACT) {
|
|
3861
|
+
if (!seen.has(sub.str)) {
|
|
3862
|
+
seen.add(sub.str);
|
|
3863
|
+
uniqueSubs.push(sub);
|
|
3864
|
+
}
|
|
3865
|
+
} else {
|
|
3866
|
+
uniqueSubs.push(sub);
|
|
3867
|
+
}
|
|
3868
|
+
}
|
|
3869
|
+
pf.subs = uniqueSubs;
|
|
3870
|
+
return pf;
|
|
3871
|
+
}
|
|
3872
|
+
return pf;
|
|
3873
|
+
}
|
|
3874
|
+
}
|
|
3875
|
+
|
|
3876
|
+
/**
|
|
3877
|
+
* A list of instruction pointers waiting to be patched.
|
|
3878
|
+
* Tracks both `head` and `tail` to allow O(1) appending during compilation.
|
|
3879
|
+
* * Values are encoded integers, not standard memory pointers:
|
|
3880
|
+
* - Program instruction index: `l >> 1`
|
|
3881
|
+
* - Patch `.out` field if: `(l & 1) === 0`
|
|
3882
|
+
* - Patch `.arg` field if: `(l & 1) === 1`
|
|
3883
|
+
* - `0` denotes an empty list.
|
|
3884
|
+
* * @see https://swtch.com/~rsc/regexp/regexp1.html
|
|
3885
|
+
*/
|
|
3886
|
+
class PatchList {
|
|
3887
|
+
/**
|
|
3888
|
+
* @param {number} head - Encoded pointer to the start of the patch list.
|
|
3889
|
+
* @param {number} tail - Encoded pointer to the end of the patch list.
|
|
3890
|
+
*/
|
|
3891
|
+
constructor(head = 0, tail = 0) {
|
|
3892
|
+
this.head = head;
|
|
3893
|
+
this.tail = tail;
|
|
3894
|
+
}
|
|
3895
|
+
}
|
|
3896
|
+
|
|
2761
3897
|
/**
|
|
2762
3898
|
* A Prog is a compiled regular expression program.
|
|
2763
3899
|
*/
|
|
@@ -2859,39 +3995,30 @@
|
|
|
2859
3995
|
return i.arg;
|
|
2860
3996
|
}
|
|
2861
3997
|
patch(l, val) {
|
|
2862
|
-
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
3998
|
+
let head = l.head;
|
|
3999
|
+
while (head !== 0) {
|
|
4000
|
+
const i = this.inst[head >> 1];
|
|
4001
|
+
if ((head & 1) === 0) {
|
|
4002
|
+
head = i.out;
|
|
2866
4003
|
i.out = val;
|
|
2867
4004
|
} else {
|
|
2868
|
-
|
|
4005
|
+
head = i.arg;
|
|
2869
4006
|
i.arg = val;
|
|
2870
4007
|
}
|
|
2871
4008
|
}
|
|
2872
4009
|
}
|
|
2873
4010
|
append(l1, l2) {
|
|
2874
|
-
if (l1 === 0)
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
for (;;) {
|
|
2882
|
-
const next = this.next(last);
|
|
2883
|
-
if (next === 0) {
|
|
2884
|
-
break;
|
|
2885
|
-
}
|
|
2886
|
-
last = next;
|
|
2887
|
-
}
|
|
2888
|
-
const i = this.inst[last >> 1];
|
|
2889
|
-
if ((last & 1) === 0) {
|
|
2890
|
-
i.out = l2;
|
|
4011
|
+
if (l1.head === 0) return l2;
|
|
4012
|
+
if (l2.head === 0) return l1;
|
|
4013
|
+
|
|
4014
|
+
// We know exactly where the tail is
|
|
4015
|
+
const i = this.inst[l1.tail >> 1];
|
|
4016
|
+
if ((l1.tail & 1) === 0) {
|
|
4017
|
+
i.out = l2.head;
|
|
2891
4018
|
} else {
|
|
2892
|
-
i.arg = l2;
|
|
4019
|
+
i.arg = l2.head;
|
|
2893
4020
|
}
|
|
2894
|
-
return l1;
|
|
4021
|
+
return new PatchList(l1.head, l2.tail);
|
|
2895
4022
|
}
|
|
2896
4023
|
/**
|
|
2897
4024
|
*
|
|
@@ -2920,7 +4047,7 @@
|
|
|
2920
4047
|
* @class
|
|
2921
4048
|
*/
|
|
2922
4049
|
class Frag {
|
|
2923
|
-
constructor(i = 0, out =
|
|
4050
|
+
constructor(i = 0, out = new PatchList(), nullable = false) {
|
|
2924
4051
|
this.i = i; // an instruction address (pc).
|
|
2925
4052
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2926
4053
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2945,6 +4072,33 @@
|
|
|
2945
4072
|
c.prog.start = f.i;
|
|
2946
4073
|
return c.prog;
|
|
2947
4074
|
}
|
|
4075
|
+
static compileSet(regexps) {
|
|
4076
|
+
const c = new Compiler();
|
|
4077
|
+
if (regexps.length === 0) {
|
|
4078
|
+
c.prog.start = c.newInst(Inst.FAIL).i;
|
|
4079
|
+
return c.prog;
|
|
4080
|
+
}
|
|
4081
|
+
let starts = [];
|
|
4082
|
+
for (let i = 0; i < regexps.length; i++) {
|
|
4083
|
+
const f = c.compile(regexps[i]);
|
|
4084
|
+
const m = c.newInst(Inst.MATCH);
|
|
4085
|
+
c.prog.getInst(m.i).arg = i; // Store the regex index
|
|
4086
|
+
c.prog.patch(f.out, m.i);
|
|
4087
|
+
starts.push(f.i);
|
|
4088
|
+
}
|
|
4089
|
+
|
|
4090
|
+
// Link starts together via ALT
|
|
4091
|
+
let start = starts[0];
|
|
4092
|
+
for (let i = 1; i < starts.length; i++) {
|
|
4093
|
+
const f = c.newInst(Inst.ALT);
|
|
4094
|
+
const inst = c.prog.getInst(f.i);
|
|
4095
|
+
inst.out = start;
|
|
4096
|
+
inst.arg = starts[i];
|
|
4097
|
+
start = f.i;
|
|
4098
|
+
}
|
|
4099
|
+
c.prog.start = start;
|
|
4100
|
+
return c.prog;
|
|
4101
|
+
}
|
|
2948
4102
|
constructor() {
|
|
2949
4103
|
this.prog = new Prog();
|
|
2950
4104
|
this.newInst(Inst.FAIL);
|
|
@@ -2957,7 +4111,7 @@
|
|
|
2957
4111
|
// Returns a no-op fragment. Sometimes unavoidable.
|
|
2958
4112
|
nop() {
|
|
2959
4113
|
const f = this.newInst(Inst.NOP);
|
|
2960
|
-
f.out = f.i << 1;
|
|
4114
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2961
4115
|
return f;
|
|
2962
4116
|
}
|
|
2963
4117
|
fail() {
|
|
@@ -2968,7 +4122,7 @@
|
|
|
2968
4122
|
// Given a fragment a, returns a fragment with capturing parens around a.
|
|
2969
4123
|
cap(arg) {
|
|
2970
4124
|
const f = this.newInst(Inst.CAPTURE);
|
|
2971
|
-
f.out = f.i << 1;
|
|
4125
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2972
4126
|
this.prog.getInst(f.i).arg = arg;
|
|
2973
4127
|
if (this.prog.numCap < arg + 1) {
|
|
2974
4128
|
this.prog.numCap = arg + 1;
|
|
@@ -3016,10 +4170,10 @@
|
|
|
3016
4170
|
const i = this.prog.getInst(f.i);
|
|
3017
4171
|
if (nongreedy) {
|
|
3018
4172
|
i.arg = f1.i;
|
|
3019
|
-
f.out = f.i << 1;
|
|
4173
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3020
4174
|
} else {
|
|
3021
4175
|
i.out = f1.i;
|
|
3022
|
-
f.out = f.i << 1 | 1;
|
|
4176
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3023
4177
|
}
|
|
3024
4178
|
this.prog.patch(f1.out, f.i);
|
|
3025
4179
|
return f;
|
|
@@ -3031,10 +4185,10 @@
|
|
|
3031
4185
|
const i = this.prog.getInst(f.i);
|
|
3032
4186
|
if (nongreedy) {
|
|
3033
4187
|
i.arg = f1.i;
|
|
3034
|
-
f.out = f.i << 1;
|
|
4188
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3035
4189
|
} else {
|
|
3036
4190
|
i.out = f1.i;
|
|
3037
|
-
f.out = f.i << 1 | 1;
|
|
4191
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3038
4192
|
}
|
|
3039
4193
|
f.out = this.prog.append(f.out, f1.out);
|
|
3040
4194
|
return f;
|
|
@@ -3057,7 +4211,7 @@
|
|
|
3057
4211
|
empty(op) {
|
|
3058
4212
|
const f = this.newInst(Inst.EMPTY_WIDTH);
|
|
3059
4213
|
this.prog.getInst(f.i).arg = op;
|
|
3060
|
-
f.out = f.i << 1;
|
|
4214
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3061
4215
|
return f;
|
|
3062
4216
|
}
|
|
3063
4217
|
|
|
@@ -3072,7 +4226,7 @@
|
|
|
3072
4226
|
flags &= -2;
|
|
3073
4227
|
}
|
|
3074
4228
|
i.arg = flags;
|
|
3075
|
-
f.out = f.i << 1;
|
|
4229
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3076
4230
|
if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
|
|
3077
4231
|
i.op = Inst.RUNE1;
|
|
3078
4232
|
} else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
|
|
@@ -3177,23 +4331,92 @@
|
|
|
3177
4331
|
}
|
|
3178
4332
|
switch (re.op) {
|
|
3179
4333
|
case Regexp.Op.CAPTURE:
|
|
4334
|
+
{
|
|
4335
|
+
const sub = Simplify.simplify(re.subs[0]);
|
|
4336
|
+
if (sub !== re.subs[0]) {
|
|
4337
|
+
const nre = Regexp.fromRegexp(re);
|
|
4338
|
+
nre.runes = [];
|
|
4339
|
+
nre.subs = [sub];
|
|
4340
|
+
return nre;
|
|
4341
|
+
}
|
|
4342
|
+
return re;
|
|
4343
|
+
}
|
|
3180
4344
|
case Regexp.Op.CONCAT:
|
|
3181
4345
|
case Regexp.Op.ALTERNATE:
|
|
3182
4346
|
{
|
|
3183
|
-
|
|
4347
|
+
const newSubs = [];
|
|
4348
|
+
let changed = false;
|
|
3184
4349
|
for (let i = 0; i < re.subs.length; i++) {
|
|
3185
4350
|
const sub = re.subs[i];
|
|
3186
4351
|
const nsub = Simplify.simplify(sub);
|
|
3187
|
-
if (
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
4352
|
+
if (nsub !== sub) {
|
|
4353
|
+
changed = true;
|
|
4354
|
+
}
|
|
4355
|
+
if (re.op === Regexp.Op.CONCAT) {
|
|
4356
|
+
// If any part of a CONCAT is mathematically impossible,
|
|
4357
|
+
// the entire CONCAT sequence becomes impossible.
|
|
4358
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4359
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
4360
|
+
}
|
|
4361
|
+
// Drop empty 0-width match nodes entirely from sequences
|
|
4362
|
+
if (nsub.op === Regexp.Op.EMPTY_MATCH) {
|
|
4363
|
+
changed = true;
|
|
4364
|
+
continue;
|
|
4365
|
+
}
|
|
4366
|
+
// Flatten nested concatenations
|
|
4367
|
+
if (nsub.op === Regexp.Op.CONCAT) {
|
|
4368
|
+
changed = true;
|
|
4369
|
+
newSubs.push(...nsub.subs);
|
|
4370
|
+
continue;
|
|
4371
|
+
}
|
|
4372
|
+
} else if (re.op === Regexp.Op.ALTERNATE) {
|
|
4373
|
+
// Drop impossible branches from alternations
|
|
4374
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4375
|
+
changed = true;
|
|
4376
|
+
continue;
|
|
4377
|
+
}
|
|
4378
|
+
// Flatten nested alternations
|
|
4379
|
+
if (nsub.op === Regexp.Op.ALTERNATE) {
|
|
4380
|
+
changed = true;
|
|
4381
|
+
newSubs.push(...nsub.subs);
|
|
4382
|
+
continue;
|
|
4383
|
+
}
|
|
3191
4384
|
}
|
|
3192
|
-
|
|
3193
|
-
|
|
4385
|
+
newSubs.push(nsub);
|
|
4386
|
+
}
|
|
4387
|
+
if (changed) {
|
|
4388
|
+
// If we filtered out all nodes, return the mathematically correct fallback
|
|
4389
|
+
if (newSubs.length === 0) {
|
|
4390
|
+
return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
|
|
4391
|
+
}
|
|
4392
|
+
// If only 1 node remains, we don't need a CONCAT/ALT container at all
|
|
4393
|
+
if (newSubs.length === 1) {
|
|
4394
|
+
return newSubs[0];
|
|
3194
4395
|
}
|
|
4396
|
+
const nre = Regexp.fromRegexp(re);
|
|
4397
|
+
nre.runes = [];
|
|
4398
|
+
nre.subs = newSubs;
|
|
4399
|
+
return nre;
|
|
4400
|
+
}
|
|
4401
|
+
return re;
|
|
4402
|
+
}
|
|
4403
|
+
case Regexp.Op.CHAR_CLASS:
|
|
4404
|
+
{
|
|
4405
|
+
if (re.runes === null) return re;
|
|
4406
|
+
|
|
4407
|
+
// Empty character classes match nothing.
|
|
4408
|
+
if (re.runes.length === 0) {
|
|
4409
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
3195
4410
|
}
|
|
3196
|
-
|
|
4411
|
+
// Full character classes match everything.
|
|
4412
|
+
if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
|
|
4413
|
+
return new Regexp(Regexp.Op.ANY_CHAR);
|
|
4414
|
+
}
|
|
4415
|
+
// Standard catch-all except newline
|
|
4416
|
+
if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
|
|
4417
|
+
return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
|
|
4418
|
+
}
|
|
4419
|
+
return re;
|
|
3197
4420
|
}
|
|
3198
4421
|
case Regexp.Op.STAR:
|
|
3199
4422
|
case Regexp.Op.PLUS:
|
|
@@ -3230,7 +4453,9 @@
|
|
|
3230
4453
|
}
|
|
3231
4454
|
subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
|
|
3232
4455
|
nre.subs = subs.slice(0);
|
|
3233
|
-
|
|
4456
|
+
|
|
4457
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4458
|
+
return Simplify.simplify(nre);
|
|
3234
4459
|
}
|
|
3235
4460
|
// Special case x{0} handled above.
|
|
3236
4461
|
|
|
@@ -3268,7 +4493,8 @@
|
|
|
3268
4493
|
if (prefixSubs !== null) {
|
|
3269
4494
|
const prefix = new Regexp(Regexp.Op.CONCAT);
|
|
3270
4495
|
prefix.subs = prefixSubs.slice(0);
|
|
3271
|
-
|
|
4496
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4497
|
+
return Simplify.simplify(prefix);
|
|
3272
4498
|
}
|
|
3273
4499
|
|
|
3274
4500
|
// Some degenerate case like min > max or min < max < 0.
|
|
@@ -3301,6 +4527,13 @@
|
|
|
3301
4527
|
return sub;
|
|
3302
4528
|
}
|
|
3303
4529
|
|
|
4530
|
+
// Handle impossible targets gracefully.
|
|
4531
|
+
// e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
|
|
4532
|
+
if (sub.op === Regexp.Op.NO_MATCH) {
|
|
4533
|
+
if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
|
|
4534
|
+
return new Regexp(Regexp.Op.EMPTY_MATCH);
|
|
4535
|
+
}
|
|
4536
|
+
|
|
3304
4537
|
// The operators are idempotent if the flags match.
|
|
3305
4538
|
if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
|
|
3306
4539
|
return sub;
|
|
@@ -3308,10 +4541,10 @@
|
|
|
3308
4541
|
if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
|
|
3309
4542
|
return re;
|
|
3310
4543
|
}
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
return
|
|
4544
|
+
const nre = new Regexp(op);
|
|
4545
|
+
nre.flags = flags;
|
|
4546
|
+
nre.subs = [sub];
|
|
4547
|
+
return nre;
|
|
3315
4548
|
}
|
|
3316
4549
|
}
|
|
3317
4550
|
|
|
@@ -3657,16 +4890,6 @@
|
|
|
3657
4890
|
}
|
|
3658
4891
|
}
|
|
3659
4892
|
|
|
3660
|
-
class Pair {
|
|
3661
|
-
static of(first, second) {
|
|
3662
|
-
return new Pair(first, second);
|
|
3663
|
-
}
|
|
3664
|
-
constructor(first, second) {
|
|
3665
|
-
this.first = first;
|
|
3666
|
-
this.second = second;
|
|
3667
|
-
}
|
|
3668
|
-
}
|
|
3669
|
-
|
|
3670
4893
|
// StringIterator: a stream of runes with an opaque cursor, permitting
|
|
3671
4894
|
// rewinding. The units of the cursor are not specified beyond the
|
|
3672
4895
|
// fact that ASCII characters are single width. (Cursor positions
|
|
@@ -3813,18 +5036,59 @@
|
|
|
3813
5036
|
// stride).
|
|
3814
5037
|
static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
|
|
3815
5038
|
|
|
5039
|
+
// Ascii tables
|
|
5040
|
+
static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
|
|
5041
|
+
static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
|
|
5042
|
+
// Old English long s (ſ), folds to S/s.
|
|
5043
|
+
0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
|
|
5044
|
+
]));
|
|
5045
|
+
|
|
3816
5046
|
// unicodeTable() returns the Unicode RangeTable identified by name
|
|
3817
5047
|
// and the table of additional fold-equivalent code points.
|
|
3818
5048
|
// Returns null if |name| does not identify a Unicode character range.
|
|
3819
5049
|
static unicodeTable(name) {
|
|
3820
5050
|
if (name === 'Any') {
|
|
3821
|
-
return
|
|
5051
|
+
return {
|
|
5052
|
+
tab: Parser.ANY_TABLE,
|
|
5053
|
+
fold: Parser.ANY_TABLE,
|
|
5054
|
+
sign: 1
|
|
5055
|
+
};
|
|
5056
|
+
}
|
|
5057
|
+
if (name === 'Ascii') {
|
|
5058
|
+
return {
|
|
5059
|
+
tab: Parser.ASCII_TABLE,
|
|
5060
|
+
fold: Parser.ASCII_FOLD_TABLE,
|
|
5061
|
+
sign: 1
|
|
5062
|
+
};
|
|
5063
|
+
}
|
|
5064
|
+
if (name === 'Assigned') {
|
|
5065
|
+
// Assigned is the mathematical inversion of Cn (Unassigned)
|
|
5066
|
+
return {
|
|
5067
|
+
tab: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5068
|
+
fold: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5069
|
+
sign: -1
|
|
5070
|
+
};
|
|
5071
|
+
}
|
|
5072
|
+
if (name === 'Lc') {
|
|
5073
|
+
return {
|
|
5074
|
+
tab: UnicodeTables.CATEGORIES.get('LC'),
|
|
5075
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
|
|
5076
|
+
sign: 1
|
|
5077
|
+
};
|
|
3822
5078
|
}
|
|
3823
5079
|
if (UnicodeTables.CATEGORIES.has(name)) {
|
|
3824
|
-
return
|
|
5080
|
+
return {
|
|
5081
|
+
tab: UnicodeTables.CATEGORIES.get(name),
|
|
5082
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get(name),
|
|
5083
|
+
sign: 1
|
|
5084
|
+
};
|
|
3825
5085
|
}
|
|
3826
5086
|
if (UnicodeTables.SCRIPTS.has(name)) {
|
|
3827
|
-
return
|
|
5087
|
+
return {
|
|
5088
|
+
tab: UnicodeTables.SCRIPTS.get(name),
|
|
5089
|
+
fold: UnicodeTables.FOLD_SCRIPT.get(name),
|
|
5090
|
+
sign: 1
|
|
5091
|
+
};
|
|
3828
5092
|
}
|
|
3829
5093
|
return null;
|
|
3830
5094
|
}
|
|
@@ -4133,7 +5397,7 @@
|
|
|
4133
5397
|
this.flags = flags;
|
|
4134
5398
|
// number of capturing groups seen
|
|
4135
5399
|
this.numCap = 0;
|
|
4136
|
-
this.namedGroups =
|
|
5400
|
+
this.namedGroups = Object.create(null);
|
|
4137
5401
|
// Stack of parsed expressions.
|
|
4138
5402
|
this.stack = [];
|
|
4139
5403
|
this.free = null;
|
|
@@ -4977,9 +6241,11 @@
|
|
|
4977
6241
|
const i = lit.indexOf('\\E');
|
|
4978
6242
|
if (i >= 0) {
|
|
4979
6243
|
lit = lit.substring(0, i);
|
|
6244
|
+
t.skipString(lit);
|
|
6245
|
+
t.skipString('\\E');
|
|
6246
|
+
} else {
|
|
6247
|
+
t.skipString(lit);
|
|
4980
6248
|
}
|
|
4981
|
-
t.skipString(lit);
|
|
4982
|
-
t.skipString('\\E');
|
|
4983
6249
|
let j = 0;
|
|
4984
6250
|
while (j < lit.length) {
|
|
4985
6251
|
const codepoint = lit.codePointAt(j);
|
|
@@ -4995,6 +6261,9 @@
|
|
|
4995
6261
|
t.rewindTo(savedPos);
|
|
4996
6262
|
break;
|
|
4997
6263
|
}
|
|
6264
|
+
} else {
|
|
6265
|
+
// Unconditionally rewind if PERL_X is off, or if string ended abruptly
|
|
6266
|
+
t.rewindTo(savedPos);
|
|
4998
6267
|
}
|
|
4999
6268
|
const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
|
|
5000
6269
|
re.flags = this.flags;
|
|
@@ -5320,8 +6589,11 @@
|
|
|
5320
6589
|
if (pair === null) {
|
|
5321
6590
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
|
|
5322
6591
|
}
|
|
5323
|
-
|
|
5324
|
-
|
|
6592
|
+
if (pair.sign < 0) {
|
|
6593
|
+
sign = 0 - sign;
|
|
6594
|
+
}
|
|
6595
|
+
const tab = pair.tab;
|
|
6596
|
+
const fold = pair.fold; // fold-equivalent table
|
|
5325
6597
|
// Variation of CharClass.appendGroup() for tables.
|
|
5326
6598
|
if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
|
|
5327
6599
|
cc.appendTableWithSign(tab, sign);
|
|
@@ -5465,6 +6737,7 @@
|
|
|
5465
6737
|
res.prefixUTF8 = re2.prefixUTF8;
|
|
5466
6738
|
res.prefixComplete = re2.prefixComplete;
|
|
5467
6739
|
res.prefixRune = re2.prefixRune;
|
|
6740
|
+
res.prefilter = re2.prefilter;
|
|
5468
6741
|
return res;
|
|
5469
6742
|
}
|
|
5470
6743
|
|
|
@@ -5507,8 +6780,10 @@
|
|
|
5507
6780
|
let re = Parser.parse(expr, mode);
|
|
5508
6781
|
const maxCap = re.maxCap();
|
|
5509
6782
|
re = Simplify.simplify(re);
|
|
6783
|
+
const prefilter = PrefilterTree.build(re);
|
|
5510
6784
|
const prog = Compiler.compileRegexp(re);
|
|
5511
6785
|
const re2 = new RE2(expr, prog, maxCap, longest);
|
|
6786
|
+
re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
|
|
5512
6787
|
const [prefixCompl, prefixStr] = prog.prefix();
|
|
5513
6788
|
re2.prefixComplete = prefixCompl;
|
|
5514
6789
|
re2.prefix = prefixStr;
|
|
@@ -5540,12 +6815,78 @@
|
|
|
5540
6815
|
this.prefixComplete = false; // true if prefix is the entire regexp
|
|
5541
6816
|
this.prefixRune = 0; // first rune in prefix
|
|
5542
6817
|
this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
|
|
5543
|
-
this.dfa = new DFA(prog); //
|
|
6818
|
+
this.dfa = new DFA(this.prog); // initialize Lazy DFA
|
|
6819
|
+
this.onepass = OnePass.compile(this.prog); // compile OnePass
|
|
6820
|
+
this.prefilter = null;
|
|
6821
|
+
}
|
|
6822
|
+
matchPrefixComplete(input, pos, anchor, ncap) {
|
|
6823
|
+
// If strictly anchored, execution must start at 0
|
|
6824
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
6825
|
+
return null;
|
|
6826
|
+
}
|
|
6827
|
+
let matchStart = -1;
|
|
6828
|
+
let matchEnd = -1;
|
|
6829
|
+
const pLen = input.prefixLength(this);
|
|
6830
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
6831
|
+
const idx = input.index(this, pos);
|
|
6832
|
+
if (idx < 0) return null;
|
|
6833
|
+
matchStart = pos + idx;
|
|
6834
|
+
matchEnd = matchStart + pLen;
|
|
6835
|
+
} else if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
6836
|
+
if (input.endPos() !== pLen) return null;
|
|
6837
|
+
const idx = input.index(this, 0);
|
|
6838
|
+
if (idx !== 0) return null;
|
|
6839
|
+
matchStart = 0;
|
|
6840
|
+
matchEnd = pLen;
|
|
6841
|
+
} else if (anchor === RE2Flags.ANCHOR_START) {
|
|
6842
|
+
const idx = input.index(this, 0);
|
|
6843
|
+
if (idx !== 0) return null;
|
|
6844
|
+
matchStart = 0;
|
|
6845
|
+
matchEnd = pLen;
|
|
6846
|
+
}
|
|
6847
|
+
if (matchStart < 0) return null;
|
|
6848
|
+
|
|
6849
|
+
// If captures are requested (e.g. findSubmatch instead of test), populate bounds
|
|
6850
|
+
if (ncap > 0) {
|
|
6851
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
6852
|
+
matchcap[0] = matchStart;
|
|
6853
|
+
matchcap[1] = matchEnd;
|
|
6854
|
+
return Array.from(matchcap);
|
|
6855
|
+
}
|
|
6856
|
+
return []; // Matched successfully, but no capture data requested
|
|
5544
6857
|
}
|
|
5545
6858
|
executeEngine(input, pos, anchor, ncap) {
|
|
6859
|
+
// LITERAL FAST PATH
|
|
6860
|
+
// If the entire regex is just a literal string (and no nested capture boundaries are requested),
|
|
6861
|
+
// bypass all state machines and execute via V8's blistering fast native indexOf
|
|
6862
|
+
if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
|
|
6863
|
+
return this.matchPrefixComplete(input, pos, anchor, ncap);
|
|
6864
|
+
}
|
|
6865
|
+
|
|
6866
|
+
// PREFILTER FAST PATH
|
|
6867
|
+
// If the unanchored query requires specific literal strings (e.g. "a.*b"),
|
|
6868
|
+
// verify those strings exist using high-speed JS string searches before waking up the state engines.
|
|
6869
|
+
if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
|
|
6870
|
+
if (!this.prefilter.eval(input, pos)) {
|
|
6871
|
+
return null;
|
|
6872
|
+
}
|
|
6873
|
+
}
|
|
6874
|
+
|
|
6875
|
+
// FAST PATH: OnePass DFA engine.
|
|
6876
|
+
// If compiled successfully, it perfectly supports capture groups
|
|
6877
|
+
// and is blisteringly fast since it skips thread queues completely.
|
|
6878
|
+
if (this.onepass !== null) {
|
|
6879
|
+
return OnePass.execute(this, input, pos, anchor, ncap);
|
|
6880
|
+
}
|
|
6881
|
+
|
|
5546
6882
|
// If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
|
|
5547
6883
|
// We must use the NFA.
|
|
5548
6884
|
if (ncap > 0) {
|
|
6885
|
+
// Backtracker bit-state execution bounds check
|
|
6886
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6887
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6888
|
+
}
|
|
6889
|
+
// NFA execution
|
|
5549
6890
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5550
6891
|
}
|
|
5551
6892
|
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
@@ -5554,6 +6895,11 @@
|
|
|
5554
6895
|
return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
|
|
5555
6896
|
}
|
|
5556
6897
|
|
|
6898
|
+
// Backtracker bit-state execution bounds check
|
|
6899
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6900
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6901
|
+
}
|
|
6902
|
+
|
|
5557
6903
|
// Fallback to NFA
|
|
5558
6904
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5559
6905
|
}
|
|
@@ -6134,6 +7480,50 @@
|
|
|
6134
7480
|
}
|
|
6135
7481
|
}
|
|
6136
7482
|
|
|
7483
|
+
class RE2Set {
|
|
7484
|
+
constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
|
|
7485
|
+
this.anchor = anchor;
|
|
7486
|
+
this.flags = flags;
|
|
7487
|
+
this.regexps = [];
|
|
7488
|
+
this.prog = null;
|
|
7489
|
+
this.dfa = null;
|
|
7490
|
+
this.dummyRe2 = null;
|
|
7491
|
+
}
|
|
7492
|
+
add(pattern) {
|
|
7493
|
+
if (this.prog) {
|
|
7494
|
+
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
7495
|
+
}
|
|
7496
|
+
const re = Parser.parse(pattern, this.flags);
|
|
7497
|
+
this.regexps.push(Simplify.simplify(re));
|
|
7498
|
+
return this.regexps.length - 1;
|
|
7499
|
+
}
|
|
7500
|
+
compile() {
|
|
7501
|
+
if (this.prog) return;
|
|
7502
|
+
this.prog = Compiler.compileSet(this.regexps);
|
|
7503
|
+
this.dfa = new DFA(this.prog);
|
|
7504
|
+
this.dummyRe2 = {
|
|
7505
|
+
prog: this.prog,
|
|
7506
|
+
cond: this.prog.startCond(),
|
|
7507
|
+
prefix: '',
|
|
7508
|
+
prefixRune: 0,
|
|
7509
|
+
longest: false
|
|
7510
|
+
};
|
|
7511
|
+
}
|
|
7512
|
+
match(input) {
|
|
7513
|
+
if (!this.prog) this.compile();
|
|
7514
|
+
const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7515
|
+
|
|
7516
|
+
// Fast path: Try the blistering fast DFA
|
|
7517
|
+
const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
|
|
7518
|
+
if (dfaResult !== null) return dfaResult;
|
|
7519
|
+
|
|
7520
|
+
// Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
|
|
7521
|
+
const machine = Machine.fromRE2(this.dummyRe2);
|
|
7522
|
+
machine.init(0);
|
|
7523
|
+
return machine.matchSet(machineInput, 0, this.anchor);
|
|
7524
|
+
}
|
|
7525
|
+
}
|
|
7526
|
+
|
|
6137
7527
|
/**
|
|
6138
7528
|
* Transform JS regex string to RE2 regex string
|
|
6139
7529
|
*/
|
|
@@ -6216,7 +7606,8 @@
|
|
|
6216
7606
|
default:
|
|
6217
7607
|
{
|
|
6218
7608
|
result += '\\';
|
|
6219
|
-
let
|
|
7609
|
+
let cp = data.codePointAt(i + 1);
|
|
7610
|
+
let symSize = Utils.charCount(cp);
|
|
6220
7611
|
result += data.substring(i + 1, i + 1 + symSize);
|
|
6221
7612
|
i += symSize + 1;
|
|
6222
7613
|
continue;
|
|
@@ -6236,7 +7627,8 @@
|
|
|
6236
7627
|
continue;
|
|
6237
7628
|
}
|
|
6238
7629
|
}
|
|
6239
|
-
let
|
|
7630
|
+
let cp = data.codePointAt(i);
|
|
7631
|
+
let symSize = Utils.charCount(cp);
|
|
6240
7632
|
result += data.substring(i, i + symSize);
|
|
6241
7633
|
i += symSize;
|
|
6242
7634
|
}
|
|
@@ -6599,7 +7991,9 @@
|
|
|
6599
7991
|
exports.RE2JSException = RE2JSException;
|
|
6600
7992
|
exports.RE2JSFlagsException = RE2JSFlagsException;
|
|
6601
7993
|
exports.RE2JSGroupException = RE2JSGroupException;
|
|
7994
|
+
exports.RE2JSInternalException = RE2JSInternalException;
|
|
6602
7995
|
exports.RE2JSSyntaxException = RE2JSSyntaxException;
|
|
7996
|
+
exports.RE2Set = RE2Set;
|
|
6603
7997
|
|
|
6604
7998
|
}));
|
|
6605
7999
|
//# sourceMappingURL=index.umd.js.map
|