re2js 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -30
- package/build/index.cjs.cjs +1557 -163
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +71 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +1556 -164
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +1557 -163
- package/build/index.umd.js.map +1 -1
- package/package.json +2 -2
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.0
|
|
5
|
+
* @version v2.1.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -56,6 +56,23 @@ class RE2Flags {
|
|
|
56
56
|
/**
|
|
57
57
|
* Various constants and helper for unicode codepoints.
|
|
58
58
|
*/
|
|
59
|
+
const ASCII_SIZE = 128;
|
|
60
|
+
const ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
|
|
61
|
+
const ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
|
|
62
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
63
|
+
if (i >= 97 && i <= 122) {
|
|
64
|
+
// a-z
|
|
65
|
+
ASCII_TO_UPPER[i] = i - 32;
|
|
66
|
+
} else {
|
|
67
|
+
ASCII_TO_UPPER[i] = i;
|
|
68
|
+
}
|
|
69
|
+
if (i >= 65 && i <= 90) {
|
|
70
|
+
// A-Z
|
|
71
|
+
ASCII_TO_LOWER[i] = i + 32;
|
|
72
|
+
} else {
|
|
73
|
+
ASCII_TO_LOWER[i] = i;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
59
76
|
class Codepoint {
|
|
60
77
|
// codePointAt(0)
|
|
61
78
|
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
@@ -63,6 +80,7 @@ class Codepoint {
|
|
|
63
80
|
// convert unicode codepoint to upper case codepoint
|
|
64
81
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
65
82
|
static toUpperCase(codepoint) {
|
|
83
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_UPPER[codepoint];
|
|
66
84
|
const s = String.fromCodePoint(codepoint).toUpperCase();
|
|
67
85
|
if (s.length > 1) {
|
|
68
86
|
return codepoint;
|
|
@@ -77,6 +95,7 @@ class Codepoint {
|
|
|
77
95
|
// convert unicode codepoint to lower case codepoint
|
|
78
96
|
// return same codepoint, if cannot do it (or codepoint not have lower variation)
|
|
79
97
|
static toLowerCase(codepoint) {
|
|
98
|
+
if (codepoint < ASCII_SIZE) return ASCII_TO_LOWER[codepoint];
|
|
80
99
|
const s = String.fromCodePoint(codepoint).toLowerCase();
|
|
81
100
|
if (s.length > 1) {
|
|
82
101
|
return codepoint;
|
|
@@ -199,9 +218,11 @@ class UnicodeTables {
|
|
|
199
218
|
C: () => new UnicodeRangeTable(decodeRanges('AfBgDgBBOrWrWBHHBCBICCVuMuMnBBBzBBBE4B4BBGBcDBHQBXhGhGxBBB8BBBmDNB8BBByBBBQddBCCMEBgBHBsCiFiFJBBDBBXIICCBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBPMMBEB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKMMBDBbEByBPBDBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBjoIBvLBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCB-FCBHBBHBBHBBECBIIIBLBDBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIB-BGGBLBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMBxhBPBXJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBF-6DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBrCHBxDUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEvq3VBxHvw-FB', false)),
|
|
200
219
|
Cc: () => new UnicodeRangeTable(decodeRanges('AfgDgB', true)),
|
|
201
220
|
Cf: () => new UnicodeRangeTable(decodeRanges('tFzqBzqBBEBXhGhGyBhMhMBxCxCs5D9-B9-BBDBbEByBEBCJBw03B6H6HBBBimEQQj7IPBhjiBDBwmFHBn0rYffB+CB', false)),
|
|
221
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('4bBBHDBICCVuMuMnBBBzBBBE4B4BBGBcDBHKBvI9B9BBmDmDBMB8BBByBBBQddBCCMEBgBDDBDBuHJJBDDBXXICCBBBFBBKBBDBBFHBCDBDGGBaaBEEHDBDBBXIIDGDBCCGDBDBBECBCGBFCCBFBSJBEKKEXXIDDGBBLIEBCCBNBFBBNGBIEEJBBDBBXIIDGGBKKBDDBEEBFBEDBDGGBTTBIBDHHBBBEFFBBBDCCDCBDCBECBNDBGCBEFFBCCBEBCNBWEBOEEYRRBKKEFFBFBDEEBCCBFFBLLBFBXEEYLLGBBKEEFGBDFBDFFBLLELBOEE0BEEHDBRBBbEETCBZKKCBBICBCDBHCCJFBLBBELB7BDBekBBDCCGZZCYYBGGCIILBBFfBpClBlBBCBoBlBlBQOOBjBBnGCCBDBCBB6LFFBIICFFBqBqBFBBiBFFBIICFFBQQ6BFFBkCkCBhBhBBBBbFB3CBBHBB+UCB6CGBXIBZIBVLBOEEDLB-CBBLFBLFBbFB6CGBsBEBnCJBgBNNBCBNDBCCBrBBBGKBtBDBbFBMCB-BBBiCeeBMMBEBLFBPBBgBwBBuCnFnFBGB9BCBQCB-BEBsBBBMHBsBEB3QBBHBBnBBBHBBJGCgBBB2BQQPBBHUUBEEKmDmDNBBcOOBBBiBOBiBOBtEDB7UVBMUB14BBBhB+K+KBDBuBCCBDBCBB5BGBDNBZIBI4BI-DhBBb6C6CBKB3GZBxC3C3CBoDoDBDBsB-C-C3CIBxBuzcuzcBBB4BIB9KTB5FHBvGBBDCCJUB8BCBLFB5BHBnCHBNFB1DKBfCBvCMMBCBiB4B4BBHBPBBLBBoDXBdJBHBBHBBHIBIII9BDB-DBBLFBl9KLBYDByBDBvzIBBrDlBBILBGEBbGGCGD+DPB+NBB3BGBCfBrBFB0BUUFDBGoEoEBCC-FCBHBBHBBHBBECBIIIBIBGBBNbbUDDQBBPhBB8DEBEDBuBCB5COOBBBCuBBvBhEBeCByBOBdDBlBIBfEBsBEBfmBmBBCBPpBB-EBBLFBlBDBlBDBpBHB1BKBNQQIDDMQQIDDBBB1BLB4JIBXJBJXBHrBrBKkCBHBBCtBtBDCBCBBYpCpCBGBKvBBUDDBDBiBCBcEBC-BB5BDBVBBzBDDBDBJEEeBBEDBLGBKGBhCfBoBDBNIB3BCBeBBcEBbGBFLBIvCBqC2BB0BMB0BGBvBHBLFBnBCBeHBDvGBgBrBrBEBBDPBE2BBtBHBrBVBblBBdTBYIBvCDBlBIBlCJBCBBaGBLFB2BTTBGBoBIBhDVVBJBTwBwBB8BBICCFQQMFB8BEBLFBFJJBDDBXXIDDGLLBDDBEEBCCBEBCEBIBBICBGKBLCCBCCnBLLCBBCFFLDDBGBDcB9CGGBcBpCHBLlFB3BBBnBhBBmCKBLFBOSB7BFBLFBVbBcBBQDBY4FB9BjDB0CLBJBBCBBJDDfDDBNNBHBLlCBJBBvBBBMaBpCHB0CMBqCGBL1FBjBNBLFBKuBuBPJBeCBhBBBXPPBnCBIDDtBCBCDDKHBLFBHDDmBDDHGBL1JBaGBSqBqBBBBe0CBCOBzBMB8clDBwDGGBJBlGryCBkDMB3iBJB88DEBoS41GB7Bl2BB6RGBgBLLBCByCLLBEBfBBHJBnCJBLIIWEBUvNB7BlGB8CkDBsCDB6BGBS-BBGKBDNB5-FHB3mBoBBLm3IBFIIDkJkJBNBCcBEBBCNBFHBtMjoCBsDEBOCBKGBLBBJ76DB7HFB1NrCBvBBBYIB1D7BB3HJBoBBBjGUBnC5DBVLBVLB4CIBamEB2CoCoCDBBCBBDBBFNNCIIiCFFBJJIddFGGCCBI1K1KBlJlJB-V-VBNBGQQBuiBBgBFBH0GBISSBIIDGGBDB-BgBBCvDBuBCBPBBLDBD-JBgBQB7BEBCvOBrB1GBsBDBC-OBIFFDQQmGBBRoBBtCDBLDBDwYBlCrCB+BhGBFccDCCBCCLFFCCCBEBCDBCECEDDCBBCICDCCBFFIKFCLLSEBEGGSzBBDtIBtBDBlDLBQBBQQQmBJBvF3BBeMBtBDBKGBDNBH5EB5eDBSCBOCB4DDBgDFBNDBCOBNDB5BHBLFBpBHBfBBNDBD9BB1KLBPBBOCBLEB5BGBQBBMFBKGB0EnDnDBkgBBh3pBfB7hEFB-GBBj0FNBypHOBvThtCB-QhvBBs6EEBhjEwi3VBCdBhD-DBxHvw-BB---BBB---BBB', false)),
|
|
202
222
|
Co: () => new UnicodeRangeTable(decodeRanges('gg4B-nGh4hc9--BD9--B', true)),
|
|
203
223
|
Cs: () => new UnicodeRangeTable(decodeRanges('gg2B--B', true)),
|
|
204
224
|
L: () => new UnicodeRangeTable(decodeRanges('hCZBHZBwBLLFGGBVBCeBCpOBFLBPEBICCiEEBCBBDDBCHHCCBCCCBSBCyCBCqEBJlFBClBBDHHBnBBoCaBFDBuBqBBkBBBCiDBCQQBIIBLLBBBDRRCdBe4CBMZZBfBKBBFGGBUBFKKEYYBXBIKBGXBCFBSpBB7B1BBETTIJBQPBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNGB7BBBCCCBDBCXBCCCBIBCBBKDDBDBCWWBCBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNSSBkBBCGGDqBBCsKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPBR1CBDFBErTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBkBFFkC4CBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBzC+C+CBtBBSHB3BdBOBBLrBBbjBBqBCBLjBBDKBGqBBDCBqBDBCFBCBBEGGB+FBhC1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGB1DOORMBmDFFDJBCEEBDBHGCBCBCKBDDBGEBF1B1BB8zC8zCBjHBHDBEBBNlBBCGGD3BBIRRBVBKGBCGBCGBCGBCGBCGBCGBCGBxC2O2OBrBrBBDBGBBF1CBHCBC5CBCDBGqBBC9CBSfBxBPBhQ-tGBhCs0VBkCtBBDsIBEPBLBBVuBBReBDlCByBIBDmDBDiCBDBBCCCBGBWPBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBdRRBDBCJBLEBCoBBYCBCHBVWBEEEBwBBCEEBDDBDBDCCZCBDKBICBNFBDFBDFBKGBCGBCqBBCNBHyDBej9KBNWBFwBBloItLBDpDBnBGBNEBGCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBlBZBHZBM4CBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBxB9EBTjBBFjBBFnBBJzBBNKBCOBCGBCBBCKBCOBCGBCBBEzBBN2JBKVBLHBZFBCpBBCIBmCFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4ByBBOyBBOjBBnBbBKWB7HpBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB1D-BBgBHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBBRDBgBUBGSBxNsBB0G-BBhEqCBGjCjCBLBhCBBCddB2-FBJ1mBBqBJBo3IDBCGBCBBCiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBn7F0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BeBHFBmI9BBzEsBBLGBRiKiKBcBTrBBlPbBlHdBDwPwPBFBCDBCBBCOBCkGB8BjCBI1lB1lBBCBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
225
|
+
LC: () => new UnicodeRangeTable(decodeRanges('hCZBHZB7BLLBVBCeBCiGBCDBFvGBCaBhGDBDBBECBCHHCCBCCCBSBCyCBCqEBJlFBClBBKoBB44ClBBCGGDqBBDCBhV1CBDFBjkCKBGqBBDCBhCrBBgCMBChBBmD1IBDFBDlBBDFBDHBCGCBdBD0BBCGBCEEBBBCGBEDBDFBFMBGCBCGBmIFFDJBCEEBDBHGCBCBCFBFDDBCBGEBF1B1BB8zC8zCB6DBDmDBHDBEBBNlBBCGGzoetBBTbBnEtCBCWBEDBC9BBDBBCCCBGBZBBE2Z2ZBpBBGIBIvCBh6TGBNEBqgBZBHZBmlBvCBhDjBBFjBB1DKBCOBCGBCBBCKBCOBCGBCBBk2ByBBOyBB+CVBLVB74C-BBhrV-BBhsZ0CBCmCBCBBDDDBDDBCBCLBCCCBFBCgCBCDBDHBCGBCbBCDBCEBCEEBFBCzKBDYBCYBCeBCYBCeBCYBCeBCYBCeBCYBCHB15BJBCTBHFB2uCjCB', false)),
|
|
205
226
|
Ll: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBC2BCBQCBuBCDECBBBDCCDEEBFFDEEBBBDDDCCCDCCBCCDEECDDBDDBBBHGDCOCBSCBDDCEEC4BCBFBDDDBCCFICBjCBCaBiGCCEEEBBBTccBhBBCBBECBCWCBDBCGDB0B0BBuBBCgBCK0BCDMCBgDCxBoBBo6CqBBDCB5XFBjkCIBC2D2DBqBBgCMBChBBnD0ECBHBCgDCBHBJFBLHBJHBJFBLHBJHBJNBDHBJHBJHBJEBCBBHEEBBBCBBJDBDBBJHBLCBCBBzIEEBEEcKFDBBJDBF2B2Bs1CvBBCEEBGCFCCBCCBEBGiDCBIICFFNlBBCGG0oesBCUaCoEMCBBBC+BCBGBCCCDICFCCDCCBBBCSCGGGCMCFCCDEECICbEE2ZqBBGIBIvCBh6TGBNEBqhBZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfB7sZZBbGBCRBbZBbDBCCCBFBCKBbZBbZBbZBbZBbZBbZBbZBbZBbbBdYBCFBbYBCFBbYBCFBbYBCFBbYBCFBC15B15BBIBCTBHFB4vChBB', false)),
|
|
206
227
|
Lm: () => new UnicodeRangeTable(decodeRanges('wVRBFLBPEBICCmEGG-OnHnHlFBBuIBBFgBgBKEEhFoFoF1mBgEgE2R72B72BsDkTkTxOFBvF+BBOjBjBBjBByVOORMBg-CBByHgGgG2OsBsBBDBGiDiDB+C+CBBB34bjnBjnBBEBvIzDzDdBB6DIBxCYYqDCBEBB2OXXqEtDtDWBBoDDBKngVngVuBBBh-BFBCpBBCIB0sBhBhBxuXDB9PCBpBBBnRMBhCBBCtgQtgQBCBCGBCBByhM9BBqGGBudgjBgjB', false)),
|
|
207
228
|
Lo: () => new UnicodeRangeTable(decodeRanges('qFQQhIFFBCBxG8Z8ZBZBFDBuBfBCJBkBBBCiDBCZZBLLBBBDRRCdBe4CBMZZBfBWVBrBYBIKBGXBCFBSoBB8B1BBETTIJBROBFHBDBBDVBCGBCEEBCBERROBBCCBPBBLJJBEBFBBDVBCGBCBBCBBCBBgBDBCUUBBBRIBCCBCVBCGBCBBCEBETTQBBYMMBGBDBBDVBCGBCBBCEBEffBCCBBBQSSCFBECBCDBEBBCCCBEEBEEBBBELBX1B1BBGBCCBCWBCPBEbbBBBDDDBffFHBCCBCWBCJBCEBEgBgBBCCBQQBSSBHBCCBCoBBDRRGCBJCBZFBGRBEXBCIBCDDBFB7BvBBCBBNFB8BBBCCCBDBCXBCCCBIBCBBKDDBDBYDBhBgCgCBGBCjBBcEB0DqBBVRRBEBFDBEEEBIIBBBFMBNyDyDBnKBCDBDGBCCCBCBDoBBCDBDgBBCDBDGBCCCBCBDOBC4BBCDBDiCBmBPByDrTBDQBCZBGqCBHHBIRBOSBPRBPMBCCBQzBBpBkCkCBhBBC0BBIEBDhBBCGGBkCBLeByBdBDEBMrBBFZB3BWBK0BBxFuBBSHB3BdBOBBLrBBbjBBqBCBLdByDDBCFBCBBE7hB7hBBCB4-C3BBZWBKGBCGBCGBCGBCGBCGBCGBCGBoR2B2BF1CBJCCB4CBFGGBpBBC9CBSfBxBPBhQ-tGBhC0wUBC2jBBkCnBBJrIBFPBLBBjCyByBBkCBqFoDoDEGBCCBCDBCWBezBBPxBB-BFBECCBMMBaBLWBacBIuBBuBEBDIBLEBCoBBYCBCHBVPBCFBEEEBwBBCEEBDDBDBDCCZBBEKBIPPBEBDFBDFBKGBCGByEiBBej9KBNWBFwBBloItLBDpDBkCCCBIBCMBCEBCCCBCCBCCBqDBiBqLBT-BBD1BBpBLB1DEBCmEBqDJBCsBBDeBEFBDFBDFBDCBkBLBCZBCSBCBBCOBDNBjB6DBmMcBEwBBwBfBOTBCHBHlBBLdBDjBBFHBhEtCBjDnBBJzBB9CzBBN2JBKVBLHB5EFBDCCBqBBCBBEDDBVBLWBKeBiCSBCBBLVBLZBnC3BBHBBhCQQBCBCCBCcBrBcBEcBkBHBCbBc1BBLVBLSBORBvDoCB4FjBBnBDBCxJxJBoBBHBBRCB8BcBLJJBUBrBRBvBUBcWBN0BB6BBBDOOBrBBhBYBbjBBeDDJiBBENNBuBBPDBWCCkBRBCYBUBBgCGBCCCBCBCOBCJBIuBBnBHBDBBDVBCGBCBBCEBETTNEBfJBCDDClBBCaaCtBtBBzBBTDBVCBfvBBVBBC5F5FBtBBqBDBlBvBBV8B8BBpBBOoCoCBZBmBGB6FrBB0GHBDDDBGBCBBCXBQCC-CHBDmBBRCCdLLBmBBIWWMtBBUTTBnCBoGgBBgBIBCkBBSyByBBcBxDGBCBBClBBWaaBEBCBBCfBPoKoKBRBQCCBLBChBB9DwCwCB4cBnHjGBtyCgDBQvhBBSFBa68DBGmSB61GdBj3B4RBIeBSuCBSdBTvBB0BUBGSB0NnBB2MqCBGwFwFB2-FBJ1mBBqBJB43IiJBQeeBBBDPPBCBJrMBloCqDBGMBEIBIJBxzI2P2PBrBBiBiKiKBcBTrBBlPaBmHdBDwPwPBFBCDBCBBCOBCkGB8pBDBCaBCBBCDDCJBCDBCCCHFFCECBBBCBBCDDCICBCCDDBCGBCDBCDBCCCBIBCQBGCBCEBCQBlqE-2pBBhB5hEBH9GBDh0FBPwpHBQtTBjtC9QBjvBq6EBG-iEB', false)),
|
|
@@ -410,8 +431,11 @@ class UnicodeTables {
|
|
|
410
431
|
Zanabazar_Square: () => new UnicodeRangeTable(decodeRanges('gwmCnC', true))
|
|
411
432
|
});
|
|
412
433
|
static FOLD_CATEGORIES = new LazyMap({
|
|
413
|
-
|
|
414
|
-
|
|
434
|
+
C: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
435
|
+
Cn: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false)),
|
|
436
|
+
L: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
437
|
+
LC: () => new UnicodeRangeTable(decodeRanges('latkpBtkpBCAB', false)),
|
|
438
|
+
Ll: () => new UnicodeRangeTable(decodeRanges('hCZBmDWBCGBiBuBCEECDOCDuBCBECEBBCCCBCCBBBDDBCBBCCBEBBCBBCECBCCDCCBCCBBBCCCBEEIBBCBBCBBCOCDQCDBBCCCBBBC4BCIBBCBBDCCBCBCGC3HrBrBCEEJHHCCBCCCBCCBPBCIBkBJJCUCGDDCBBDyBBxBgBCK2BCBMCD+CCDlBBq6ClBBCGGzW1CB0kCHHBpBBDCBhK0ECKgDCKHBJFBLHBJHBJFBMGCJHBZHBJHBJHBJEBMEBMDBNEBMEBqJEEBHHxC9zC9zCBuBBxBCCBBBDGCBCBCDDJCBCgDCJCCFuqeuqeCqBCUaCoEMCE8BCLECBICFCCDCCEUCBDBCEBCOCBCBCCCBEECKCZs5Vs5VBYBmmBnBBpEjBB9EKBCOBCGBCBBr3ByBB+EVB75CfBhsVfBh1ehBB', false)),
|
|
415
439
|
Lt: () => new UnicodeRangeTable(decodeRanges('kOCCBCCBCClBCCtsHHBJHBJHBMQQwBAB', false)),
|
|
416
440
|
Lu: () => new UnicodeRangeTable(decodeRanges('hDZB7BqBqBBWBCHBCuBCEECDOCDsBCDECBBBDCCDEEGDDECBDDDCCCDFFDEECDDECCGBBCBBCBBCOCBSCDBBCEECkBCEQCJDDBCCFICBEBCBBCCCBEEBCCBCBCEBDCCBDDIDDCBBEFBGLLBnFnFsBCCEEEBBBvBDBCdBCBBECBCWCBDBCGD1BvBBCgBCK0BCDMCBgDCyBlBBq6CqBBDCB5XFBjkCIBCvHvHERRzD0ECGGGC8CCBHBJFBLHBJHBJFBMGCJHBJNBzBBBNSSBPPBEEpL2B2Bs1CvBBCEEBGCHDDLiDCJCCFNNBkBBCGG0oesBCUaCoEMCE8BCLCCDICFFFCBBDSCMOCFCCDEEGECb9a9advCBi8UZBumBnBBpEjBB8EKBCOBCGBCBBk4ByBB+DVB75CfBhsVfBj1ehBB', false)),
|
|
417
441
|
M: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
@@ -420,7 +444,9 @@ class UnicodeTables {
|
|
|
420
444
|
static FOLD_SCRIPT = new LazyMap({
|
|
421
445
|
Common: () => new UnicodeRangeTable(decodeRanges('8cgBgB', false)),
|
|
422
446
|
Greek: () => new UnicodeRangeTable(decodeRanges('1FwUwU', false)),
|
|
423
|
-
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false))
|
|
447
|
+
Inherited: () => new UnicodeRangeTable(decodeRanges('5cgBgBlgHAB', false)),
|
|
448
|
+
Latin: () => new UnicodeRangeTable(decodeRanges('y+pBCC', false)),
|
|
449
|
+
Unknown: () => new UnicodeRangeTable(decodeRanges('z+pBCC', false))
|
|
424
450
|
});
|
|
425
451
|
}
|
|
426
452
|
|
|
@@ -674,7 +700,7 @@ class Utils {
|
|
|
674
700
|
|
|
675
701
|
// Returns the array of runes in the specified Java UTF-16 string.
|
|
676
702
|
static stringToRunes(str) {
|
|
677
|
-
return String(str)
|
|
703
|
+
return Array.from(String(str)).map(s => s.codePointAt(0));
|
|
678
704
|
}
|
|
679
705
|
|
|
680
706
|
// Returns the Java UTF-16 string containing the single rune |r|.
|
|
@@ -945,6 +971,14 @@ class MachineInputBase {
|
|
|
945
971
|
endPos() {
|
|
946
972
|
return this.end;
|
|
947
973
|
}
|
|
974
|
+
hasString() {
|
|
975
|
+
return false;
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Helper for the exact-literal fast-path execution router
|
|
979
|
+
prefixLength() {
|
|
980
|
+
return 0;
|
|
981
|
+
}
|
|
948
982
|
}
|
|
949
983
|
|
|
950
984
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
@@ -956,6 +990,14 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
956
990
|
this.start = start;
|
|
957
991
|
this.end = end;
|
|
958
992
|
}
|
|
993
|
+
hasString(prefilter, pos) {
|
|
994
|
+
const target = prefilter.bytes;
|
|
995
|
+
if (target.length === 0) return true;
|
|
996
|
+
|
|
997
|
+
// Reuse the high-speed indexOf method already implemented below
|
|
998
|
+
const idx = this.indexOf(this.bytes, target, this.start + pos);
|
|
999
|
+
return idx !== -1 && idx <= this.end - target.length;
|
|
1000
|
+
}
|
|
959
1001
|
|
|
960
1002
|
// Returns the rune at the specified index; the units are
|
|
961
1003
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1032,10 +1074,10 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1032
1074
|
indexOf(source, target, fromIndex = 0) {
|
|
1033
1075
|
let targetLength = target.length;
|
|
1034
1076
|
if (targetLength === 0) {
|
|
1035
|
-
return -1;
|
|
1077
|
+
return fromIndex <= this.end ? fromIndex : -1;
|
|
1036
1078
|
}
|
|
1037
|
-
let
|
|
1038
|
-
for (let i = fromIndex; i <=
|
|
1079
|
+
let limit = this.end - targetLength;
|
|
1080
|
+
for (let i = fromIndex; i <= limit; i++) {
|
|
1039
1081
|
for (let j = 0; j < targetLength; j++) {
|
|
1040
1082
|
if (source[i + j] !== target[j]) {
|
|
1041
1083
|
break;
|
|
@@ -1046,6 +1088,9 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1046
1088
|
}
|
|
1047
1089
|
return -1;
|
|
1048
1090
|
}
|
|
1091
|
+
prefixLength(re2) {
|
|
1092
|
+
return re2.prefixUTF8.length;
|
|
1093
|
+
}
|
|
1049
1094
|
}
|
|
1050
1095
|
|
|
1051
1096
|
// |pos| and |width| are in JS "char" units.
|
|
@@ -1056,6 +1101,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1056
1101
|
this.start = start;
|
|
1057
1102
|
this.end = end;
|
|
1058
1103
|
}
|
|
1104
|
+
hasString(prefilter, pos) {
|
|
1105
|
+
const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
|
|
1106
|
+
return idx !== -1 && idx <= this.end - prefilter.str.length;
|
|
1107
|
+
}
|
|
1059
1108
|
|
|
1060
1109
|
// Returns the rune at the specified index; the units are
|
|
1061
1110
|
// unspecified, but could be UTF-8 byte, UTF-16 char, or rune
|
|
@@ -1101,6 +1150,9 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1101
1150
|
const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
|
|
1102
1151
|
return Utils.emptyOpContext(r1, r2);
|
|
1103
1152
|
}
|
|
1153
|
+
prefixLength(re2) {
|
|
1154
|
+
return re2.prefix.length;
|
|
1155
|
+
}
|
|
1104
1156
|
}
|
|
1105
1157
|
class MachineInput {
|
|
1106
1158
|
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
@@ -1191,6 +1243,17 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
1191
1243
|
}
|
|
1192
1244
|
}
|
|
1193
1245
|
|
|
1246
|
+
/**
|
|
1247
|
+
* An exception thrown for internal engine errors, such as corrupted bytecodes.
|
|
1248
|
+
*/
|
|
1249
|
+
class RE2JSInternalException extends RE2JSException {
|
|
1250
|
+
/** @param {string} message */
|
|
1251
|
+
constructor(message) {
|
|
1252
|
+
super(message);
|
|
1253
|
+
this.name = 'RE2JSInternalException';
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1194
1257
|
/**
|
|
1195
1258
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
1196
1259
|
*
|
|
@@ -1393,6 +1456,23 @@ class Matcher {
|
|
|
1393
1456
|
}
|
|
1394
1457
|
return this.substring(start, end);
|
|
1395
1458
|
}
|
|
1459
|
+
|
|
1460
|
+
/**
|
|
1461
|
+
* Returns a dictionary map of all named capturing groups and their matched values.
|
|
1462
|
+
* If a group was not matched, its value will be `null`.
|
|
1463
|
+
* @returns {Record<string, string|null>}
|
|
1464
|
+
*/
|
|
1465
|
+
getNamedGroups() {
|
|
1466
|
+
if (!this.hasMatch) {
|
|
1467
|
+
throw new RE2JSGroupException('perhaps no match attempted');
|
|
1468
|
+
}
|
|
1469
|
+
const result = {};
|
|
1470
|
+
for (const name of Object.keys(this.namedGroups)) {
|
|
1471
|
+
result[name] = this.group(name);
|
|
1472
|
+
}
|
|
1473
|
+
return result;
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1396
1476
|
/**
|
|
1397
1477
|
* Returns the number of subgroups in this pattern.
|
|
1398
1478
|
*
|
|
@@ -1817,16 +1897,20 @@ class Inst {
|
|
|
1817
1897
|
}
|
|
1818
1898
|
return r === r0;
|
|
1819
1899
|
}
|
|
1820
|
-
|
|
1821
|
-
//
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1900
|
+
const len = this.runes.length;
|
|
1901
|
+
// If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
|
|
1902
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1903
|
+
for (let j = 0; j < len; j += 2) {
|
|
1904
|
+
if (r < this.runes[j]) {
|
|
1905
|
+
return false;
|
|
1906
|
+
}
|
|
1907
|
+
if (r <= this.runes[j + 1]) {
|
|
1908
|
+
return true;
|
|
1909
|
+
}
|
|
1828
1910
|
}
|
|
1911
|
+
return false; // Stop here
|
|
1829
1912
|
}
|
|
1913
|
+
|
|
1830
1914
|
// Otherwise binary search.
|
|
1831
1915
|
let lo = 0;
|
|
1832
1916
|
let hi = this.runes.length / 2 | 0;
|
|
@@ -1844,6 +1928,40 @@ class Inst {
|
|
|
1844
1928
|
}
|
|
1845
1929
|
return false;
|
|
1846
1930
|
}
|
|
1931
|
+
|
|
1932
|
+
// matchRunePos checks whether the instruction matches (and consumes) r.
|
|
1933
|
+
// If so, it returns the index of the matching rune pair.
|
|
1934
|
+
// If not, it returns -1.
|
|
1935
|
+
matchRunePos(r) {
|
|
1936
|
+
if (this.runes.length === 1) {
|
|
1937
|
+
const r0 = this.runes[0];
|
|
1938
|
+
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
1939
|
+
return Unicode.equalsIgnoreCase(r0, r) ? 0 : -1;
|
|
1940
|
+
}
|
|
1941
|
+
return r === r0 ? 0 : -1;
|
|
1942
|
+
}
|
|
1943
|
+
const len = this.runes.length;
|
|
1944
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
1945
|
+
for (let j = 0; j < len; j += 2) {
|
|
1946
|
+
if (r < this.runes[j]) return -1;
|
|
1947
|
+
if (r <= this.runes[j + 1]) return Math.floor(j / 2);
|
|
1948
|
+
}
|
|
1949
|
+
return -1;
|
|
1950
|
+
}
|
|
1951
|
+
let lo = 0;
|
|
1952
|
+
let hi = Math.floor(len / 2);
|
|
1953
|
+
while (lo < hi) {
|
|
1954
|
+
const m = lo + hi >> 1;
|
|
1955
|
+
const c = this.runes[2 * m];
|
|
1956
|
+
if (c <= r) {
|
|
1957
|
+
if (r <= this.runes[2 * m + 1]) return m;
|
|
1958
|
+
lo = m + 1;
|
|
1959
|
+
} else {
|
|
1960
|
+
hi = m;
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
return -1;
|
|
1964
|
+
}
|
|
1847
1965
|
/**
|
|
1848
1966
|
*
|
|
1849
1967
|
* @returns {string}
|
|
@@ -1859,7 +1977,7 @@ class Inst {
|
|
|
1859
1977
|
case Inst.EMPTY_WIDTH:
|
|
1860
1978
|
return `empty ${this.arg} -> ${this.out}`;
|
|
1861
1979
|
case Inst.MATCH:
|
|
1862
|
-
return
|
|
1980
|
+
return `match${this.arg !== 0 ? ` ${this.arg}` : ''}`;
|
|
1863
1981
|
case Inst.FAIL:
|
|
1864
1982
|
return 'fail';
|
|
1865
1983
|
case Inst.NOP:
|
|
@@ -1885,7 +2003,7 @@ class Inst {
|
|
|
1885
2003
|
class Thread {
|
|
1886
2004
|
constructor() {
|
|
1887
2005
|
this.inst = null;
|
|
1888
|
-
this.cap =
|
|
2006
|
+
this.cap = null; // Initialized to Int32Array later
|
|
1889
2007
|
}
|
|
1890
2008
|
}
|
|
1891
2009
|
|
|
@@ -1913,9 +2031,11 @@ class Queue {
|
|
|
1913
2031
|
return j;
|
|
1914
2032
|
}
|
|
1915
2033
|
clear() {
|
|
1916
|
-
|
|
1917
|
-
this.
|
|
1918
|
-
|
|
2034
|
+
// Prevent memory leaks by nulling out used object references
|
|
2035
|
+
for (let i = 0; i < this.size; i++) {
|
|
2036
|
+
this.denseThreads[i] = null;
|
|
2037
|
+
}
|
|
2038
|
+
// The sparse set logic safely ignores stale integers in Typed Arrays.
|
|
1919
2039
|
this.size = 0;
|
|
1920
2040
|
}
|
|
1921
2041
|
toString() {
|
|
@@ -1944,7 +2064,8 @@ class Machine {
|
|
|
1944
2064
|
m.pool = [];
|
|
1945
2065
|
m.poolSize = 0;
|
|
1946
2066
|
m.matched = false;
|
|
1947
|
-
|
|
2067
|
+
// Use Int32Array instead of standard JS array
|
|
2068
|
+
m.matchcap = new Int32Array(m.prog.numCap < 2 ? 2 : m.prog.numCap);
|
|
1948
2069
|
m.ncap = 0;
|
|
1949
2070
|
return m;
|
|
1950
2071
|
}
|
|
@@ -1958,27 +2079,30 @@ class Machine {
|
|
|
1958
2079
|
if (ncap > this.matchcap.length) {
|
|
1959
2080
|
this.initNewCap(ncap);
|
|
1960
2081
|
} else {
|
|
1961
|
-
this.resetCap(
|
|
2082
|
+
this.resetCap();
|
|
1962
2083
|
}
|
|
1963
2084
|
}
|
|
1964
|
-
|
|
2085
|
+
|
|
2086
|
+
// Wipes existing typed array memory without reallocating
|
|
2087
|
+
resetCap() {
|
|
1965
2088
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1966
2089
|
const t = this.pool[i];
|
|
1967
|
-
t.cap
|
|
2090
|
+
t.cap.fill(0);
|
|
1968
2091
|
}
|
|
1969
2092
|
}
|
|
1970
2093
|
initNewCap(ncap) {
|
|
1971
2094
|
for (let i = 0; i < this.poolSize; i++) {
|
|
1972
2095
|
const t = this.pool[i];
|
|
1973
|
-
t.cap =
|
|
2096
|
+
t.cap = new Int32Array(ncap);
|
|
1974
2097
|
}
|
|
1975
|
-
this.matchcap =
|
|
2098
|
+
this.matchcap = new Int32Array(ncap);
|
|
1976
2099
|
}
|
|
1977
2100
|
submatches() {
|
|
1978
2101
|
if (this.ncap === 0) {
|
|
1979
2102
|
return Utils.emptyInts();
|
|
1980
2103
|
}
|
|
1981
|
-
|
|
2104
|
+
// Use subarray() to create a zero-allocation view before converting
|
|
2105
|
+
return Array.from(this.matchcap.subarray(0, this.ncap));
|
|
1982
2106
|
}
|
|
1983
2107
|
|
|
1984
2108
|
// alloc() allocates a new thread with the given instruction.
|
|
@@ -1990,6 +2114,7 @@ class Machine {
|
|
|
1990
2114
|
t = this.pool[this.poolSize];
|
|
1991
2115
|
} else {
|
|
1992
2116
|
t = new Thread();
|
|
2117
|
+
t.cap = new Int32Array(this.matchcap.length);
|
|
1993
2118
|
}
|
|
1994
2119
|
t.inst = inst;
|
|
1995
2120
|
return t;
|
|
@@ -2019,7 +2144,7 @@ class Machine {
|
|
|
2019
2144
|
return false;
|
|
2020
2145
|
}
|
|
2021
2146
|
this.matched = false;
|
|
2022
|
-
this.matchcap
|
|
2147
|
+
this.matchcap.fill(-1);
|
|
2023
2148
|
let runq = this.q0;
|
|
2024
2149
|
let nextq = this.q1;
|
|
2025
2150
|
let r = input.step(pos);
|
|
@@ -2090,6 +2215,85 @@ class Machine {
|
|
|
2090
2215
|
this.freeQueue(nextq);
|
|
2091
2216
|
return this.matched;
|
|
2092
2217
|
}
|
|
2218
|
+
matchSet(input, pos, anchor) {
|
|
2219
|
+
const startCond = this.re2.cond;
|
|
2220
|
+
if (startCond === Utils.EMPTY_ALL) return [];
|
|
2221
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2222
|
+
return [];
|
|
2223
|
+
}
|
|
2224
|
+
let runq = this.q0;
|
|
2225
|
+
let nextq = this.q1;
|
|
2226
|
+
let r = input.step(pos);
|
|
2227
|
+
let rune = r >> 3;
|
|
2228
|
+
let width = r & 7;
|
|
2229
|
+
let rune1 = -1;
|
|
2230
|
+
let width1 = 0;
|
|
2231
|
+
if (r !== MachineInputBase.EOF()) {
|
|
2232
|
+
r = input.step(pos + width);
|
|
2233
|
+
rune1 = r >> 3;
|
|
2234
|
+
width1 = r & 7;
|
|
2235
|
+
}
|
|
2236
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
2237
|
+
const matches = new Set();
|
|
2238
|
+
while (true) {
|
|
2239
|
+
if (runq.isEmpty()) {
|
|
2240
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
|
|
2241
|
+
}
|
|
2242
|
+
if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
|
|
2243
|
+
this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
|
|
2244
|
+
}
|
|
2245
|
+
const nextPos = pos + width;
|
|
2246
|
+
flag = input.context(nextPos);
|
|
2247
|
+
for (let j = 0; j < runq.size; j++) {
|
|
2248
|
+
let t = runq.denseThreads[j];
|
|
2249
|
+
if (t === null) continue;
|
|
2250
|
+
const i = t.inst;
|
|
2251
|
+
let add = false;
|
|
2252
|
+
switch (i.op) {
|
|
2253
|
+
case Inst.MATCH:
|
|
2254
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
|
|
2255
|
+
matches.add(i.arg); // Record the matched Set ID
|
|
2256
|
+
break;
|
|
2257
|
+
case Inst.RUNE:
|
|
2258
|
+
add = i.matchRune(rune);
|
|
2259
|
+
break;
|
|
2260
|
+
case Inst.RUNE1:
|
|
2261
|
+
add = rune === i.runes[0];
|
|
2262
|
+
break;
|
|
2263
|
+
case Inst.RUNE_ANY:
|
|
2264
|
+
add = true;
|
|
2265
|
+
break;
|
|
2266
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2267
|
+
add = rune !== Codepoint.CODES.get('\n');
|
|
2268
|
+
break;
|
|
2269
|
+
default:
|
|
2270
|
+
throw new RE2JSInternalException('bad inst');
|
|
2271
|
+
}
|
|
2272
|
+
if (add) {
|
|
2273
|
+
t = this.add(nextq, i.out, nextPos, t.cap, flag, t);
|
|
2274
|
+
}
|
|
2275
|
+
if (t !== null) {
|
|
2276
|
+
this.freeThread(t);
|
|
2277
|
+
runq.denseThreads[j] = null;
|
|
2278
|
+
}
|
|
2279
|
+
}
|
|
2280
|
+
runq.clear();
|
|
2281
|
+
if (width === 0) break;
|
|
2282
|
+
pos += width;
|
|
2283
|
+
rune = rune1;
|
|
2284
|
+
width = width1;
|
|
2285
|
+
if (rune !== -1) {
|
|
2286
|
+
r = input.step(pos + width);
|
|
2287
|
+
rune1 = r >> 3;
|
|
2288
|
+
width1 = r & 7;
|
|
2289
|
+
}
|
|
2290
|
+
const tmpq = runq;
|
|
2291
|
+
runq = nextq;
|
|
2292
|
+
nextq = tmpq;
|
|
2293
|
+
}
|
|
2294
|
+
this.freeQueue(nextq);
|
|
2295
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2296
|
+
}
|
|
2093
2297
|
step(runq, nextq, pos, nextPos, c, nextCond, anchor, atEnd) {
|
|
2094
2298
|
const longest = this.re2.longest;
|
|
2095
2299
|
for (let j = 0; j < runq.size; j++) {
|
|
@@ -2110,7 +2314,9 @@ class Machine {
|
|
|
2110
2314
|
}
|
|
2111
2315
|
if (this.ncap > 0 && (!longest || !this.matched || this.matchcap[1] < pos)) {
|
|
2112
2316
|
t.cap[1] = pos;
|
|
2113
|
-
|
|
2317
|
+
// Using subarray creates a fast view, avoiding a full array copy
|
|
2318
|
+
// until the submatches are finalized at the very end.
|
|
2319
|
+
this.matchcap.set(t.cap.subarray(0, this.ncap));
|
|
2114
2320
|
}
|
|
2115
2321
|
if (!longest) {
|
|
2116
2322
|
this.freeQueue(runq, j + 1);
|
|
@@ -2130,7 +2336,7 @@ class Machine {
|
|
|
2130
2336
|
add = c !== Codepoint.CODES.get('\n');
|
|
2131
2337
|
break;
|
|
2132
2338
|
default:
|
|
2133
|
-
throw new
|
|
2339
|
+
throw new RE2JSInternalException('bad inst');
|
|
2134
2340
|
}
|
|
2135
2341
|
if (add) {
|
|
2136
2342
|
t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t);
|
|
@@ -2188,6 +2394,7 @@ class Machine {
|
|
|
2188
2394
|
t.inst = inst;
|
|
2189
2395
|
}
|
|
2190
2396
|
if (this.ncap > 0 && t.cap !== cap) {
|
|
2397
|
+
// Direct assignment utilizing Typed Array performance
|
|
2191
2398
|
for (let c = 0; c < this.ncap; c++) {
|
|
2192
2399
|
t.cap[c] = cap[c];
|
|
2193
2400
|
}
|
|
@@ -2222,20 +2429,23 @@ const arraysEqual = (a, b) => {
|
|
|
2222
2429
|
return true;
|
|
2223
2430
|
};
|
|
2224
2431
|
class DFAState {
|
|
2225
|
-
constructor(nfaStates, isMatch) {
|
|
2432
|
+
constructor(nfaStates, isMatch, matchIDs = []) {
|
|
2226
2433
|
this.nfaStates = nfaStates; // Int32Array of Instruction PCs
|
|
2227
2434
|
this.isMatch = isMatch; // Boolean
|
|
2435
|
+
this.matchIDs = matchIDs; // Array of integers indicating which Set patterns matched
|
|
2228
2436
|
this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups
|
|
2229
2437
|
this.nextMap = new Map(); // Cache of Char -> DFAState
|
|
2230
2438
|
}
|
|
2231
2439
|
}
|
|
2232
2440
|
class DFA {
|
|
2441
|
+
static MAX_CACHE_CLEARS = 5;
|
|
2233
2442
|
constructor(prog) {
|
|
2234
2443
|
this.prog = prog;
|
|
2235
2444
|
this.stateCache = new Map(); // hash(number) -> DFAState[]
|
|
2236
2445
|
this.stateCount = 0; // Tracks total states for memory limits
|
|
2237
2446
|
this.startState = null;
|
|
2238
2447
|
this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection)
|
|
2448
|
+
this.cacheClears = 0; // Track thrashing
|
|
2239
2449
|
this.failed = false; // mark if DFA cannot work with provided prog
|
|
2240
2450
|
}
|
|
2241
2451
|
|
|
@@ -2244,6 +2454,7 @@ class DFA {
|
|
|
2244
2454
|
const closure = new Set();
|
|
2245
2455
|
const stack = [...pcs];
|
|
2246
2456
|
let isMatch = false;
|
|
2457
|
+
const matchIDs = [];
|
|
2247
2458
|
while (stack.length > 0) {
|
|
2248
2459
|
const pc = stack.pop();
|
|
2249
2460
|
if (closure.has(pc)) continue;
|
|
@@ -2252,6 +2463,7 @@ class DFA {
|
|
|
2252
2463
|
switch (inst.op) {
|
|
2253
2464
|
case Inst.MATCH:
|
|
2254
2465
|
isMatch = true;
|
|
2466
|
+
if (!matchIDs.includes(inst.arg)) matchIDs.push(inst.arg);
|
|
2255
2467
|
break;
|
|
2256
2468
|
case Inst.ALT:
|
|
2257
2469
|
case Inst.ALT_MATCH:
|
|
@@ -2269,9 +2481,11 @@ class DFA {
|
|
|
2269
2481
|
}
|
|
2270
2482
|
}
|
|
2271
2483
|
const sortedPCs = Int32Array.from(closure).sort();
|
|
2484
|
+
matchIDs.sort((a, b) => a - b);
|
|
2272
2485
|
return {
|
|
2273
2486
|
pcs: sortedPCs,
|
|
2274
|
-
isMatch
|
|
2487
|
+
isMatch,
|
|
2488
|
+
matchIDs
|
|
2275
2489
|
};
|
|
2276
2490
|
}
|
|
2277
2491
|
|
|
@@ -2297,6 +2511,8 @@ class DFA {
|
|
|
2297
2511
|
bucket = [];
|
|
2298
2512
|
this.stateCache.set(hash, bucket);
|
|
2299
2513
|
}
|
|
2514
|
+
|
|
2515
|
+
// DFA already failed once - exit
|
|
2300
2516
|
if (this.failed) return null;
|
|
2301
2517
|
|
|
2302
2518
|
// Safety: prevent memory exhaustion from state explosion
|
|
@@ -2305,12 +2521,18 @@ class DFA {
|
|
|
2305
2521
|
this.stateCache.clear();
|
|
2306
2522
|
this.stateCount = 0;
|
|
2307
2523
|
this.startState = null;
|
|
2308
|
-
this.
|
|
2524
|
+
this.cacheClears++;
|
|
2525
|
+
|
|
2526
|
+
// If this regex causes continuous cache thrashing, permanently fall back to NFA
|
|
2527
|
+
// to avoid spending CPU cycles constantly rebuilding the DFA tree.
|
|
2528
|
+
if (this.cacheClears >= DFA.MAX_CACHE_CLEARS) {
|
|
2529
|
+
this.failed = true;
|
|
2530
|
+
}
|
|
2309
2531
|
return null;
|
|
2310
2532
|
}
|
|
2311
2533
|
|
|
2312
2534
|
// State not found, create it and add to bucket
|
|
2313
|
-
const state = new DFAState(sortedPCs, closureResult.isMatch);
|
|
2535
|
+
const state = new DFAState(sortedPCs, closureResult.isMatch, closureResult.matchIDs);
|
|
2314
2536
|
bucket.push(state);
|
|
2315
2537
|
this.stateCount++;
|
|
2316
2538
|
return state;
|
|
@@ -2330,76 +2552,808 @@ class DFA {
|
|
|
2330
2552
|
return state.nextMap.get(key);
|
|
2331
2553
|
}
|
|
2332
2554
|
}
|
|
2333
|
-
const nextPCs = [];
|
|
2334
|
-
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2335
|
-
const pc = state.nfaStates[i];
|
|
2336
|
-
const inst = this.prog.getInst(pc);
|
|
2337
|
-
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2338
|
-
nextPCs.push(inst.out);
|
|
2555
|
+
const nextPCs = [];
|
|
2556
|
+
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2557
|
+
const pc = state.nfaStates[i];
|
|
2558
|
+
const inst = this.prog.getInst(pc);
|
|
2559
|
+
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2560
|
+
nextPCs.push(inst.out);
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
2564
|
+
nextPCs.push(this.prog.start);
|
|
2565
|
+
}
|
|
2566
|
+
const nextState = this.getState(nextPCs);
|
|
2567
|
+
|
|
2568
|
+
// Cache the result
|
|
2569
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2570
|
+
state.nextAscii[charCode] = nextState;
|
|
2571
|
+
} else {
|
|
2572
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2573
|
+
state.nextMap.set(key, nextState);
|
|
2574
|
+
}
|
|
2575
|
+
return nextState;
|
|
2576
|
+
}
|
|
2577
|
+
|
|
2578
|
+
// The hot loop: Execute the Lazy DFA
|
|
2579
|
+
match(input, pos, anchor) {
|
|
2580
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2581
|
+
return false;
|
|
2582
|
+
}
|
|
2583
|
+
if (!this.startState) {
|
|
2584
|
+
this.startState = this.getState([this.prog.start]);
|
|
2585
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2586
|
+
}
|
|
2587
|
+
let endPos = input.endPos();
|
|
2588
|
+
let currentState = this.startState;
|
|
2589
|
+
if (currentState.isMatch) {
|
|
2590
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2591
|
+
if (pos === endPos) return true;
|
|
2592
|
+
} else {
|
|
2593
|
+
return true;
|
|
2594
|
+
}
|
|
2595
|
+
}
|
|
2596
|
+
let i = pos;
|
|
2597
|
+
while (i < endPos) {
|
|
2598
|
+
const r = input.step(i);
|
|
2599
|
+
const rune = r >> 3;
|
|
2600
|
+
const width = r & 7;
|
|
2601
|
+
|
|
2602
|
+
// prevent infinite loop on EOF
|
|
2603
|
+
if (width === 0) {
|
|
2604
|
+
break;
|
|
2605
|
+
}
|
|
2606
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2607
|
+
|
|
2608
|
+
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2609
|
+
if (currentState === null) return null;
|
|
2610
|
+
if (currentState.isMatch) {
|
|
2611
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2612
|
+
if (i + width === endPos) return true;
|
|
2613
|
+
} else {
|
|
2614
|
+
return true;
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
|
|
2618
|
+
// If we hit a dead end, and anchored, fail early
|
|
2619
|
+
if (currentState.nfaStates.length === 0) {
|
|
2620
|
+
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2621
|
+
}
|
|
2622
|
+
i += width;
|
|
2623
|
+
}
|
|
2624
|
+
return false;
|
|
2625
|
+
}
|
|
2626
|
+
|
|
2627
|
+
// The hot loop for evaluating Multi-Pattern Sets
|
|
2628
|
+
matchSet(input, pos, anchor) {
|
|
2629
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2630
|
+
return [];
|
|
2631
|
+
}
|
|
2632
|
+
if (!this.startState) {
|
|
2633
|
+
this.startState = this.getState([this.prog.start]);
|
|
2634
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2635
|
+
}
|
|
2636
|
+
let endPos = input.endPos();
|
|
2637
|
+
let currentState = this.startState;
|
|
2638
|
+
const matches = new Set();
|
|
2639
|
+
const checkMatch = (state, currentPos) => {
|
|
2640
|
+
if (state.isMatch) {
|
|
2641
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2642
|
+
if (currentPos === endPos) {
|
|
2643
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2644
|
+
}
|
|
2645
|
+
} else {
|
|
2646
|
+
state.matchIDs.forEach(id => matches.add(id));
|
|
2647
|
+
}
|
|
2648
|
+
}
|
|
2649
|
+
};
|
|
2650
|
+
checkMatch(currentState, pos);
|
|
2651
|
+
let i = pos;
|
|
2652
|
+
while (i < endPos) {
|
|
2653
|
+
const r = input.step(i);
|
|
2654
|
+
const rune = r >> 3;
|
|
2655
|
+
const width = r & 7;
|
|
2656
|
+
if (width === 0) break;
|
|
2657
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2658
|
+
if (currentState === null) return null; // Bailout to NFA
|
|
2659
|
+
|
|
2660
|
+
i += width;
|
|
2661
|
+
checkMatch(currentState, i);
|
|
2662
|
+
if (currentState.nfaStates.length === 0) {
|
|
2663
|
+
if (anchor !== RE2Flags.UNANCHORED) break;
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2666
|
+
return Array.from(matches).sort((a, b) => a - b);
|
|
2667
|
+
}
|
|
2668
|
+
}
|
|
2669
|
+
|
|
2670
|
+
const VISITED_BITS = 32;
|
|
2671
|
+
const MAX_BACKTRACK_PROG = 500;
|
|
2672
|
+
const INITIAL_JOB_CAPACITY = 256; // Starting size for the job stack arrays
|
|
2673
|
+
const MAX_BACKTRACK_VECTOR = 256 * 1024; // 32 KB limit for the visited bit-mask
|
|
2674
|
+
|
|
2675
|
+
class BitState {
|
|
2676
|
+
constructor() {
|
|
2677
|
+
this.end = 0;
|
|
2678
|
+
this.cap = new Int32Array(0);
|
|
2679
|
+
this.matchcap = new Int32Array(0);
|
|
2680
|
+
this.ncap = 0;
|
|
2681
|
+
|
|
2682
|
+
// Parallel arrays acting as the backtrack job stack
|
|
2683
|
+
this.jobPc = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2684
|
+
this.jobArg = new Uint8Array(INITIAL_JOB_CAPACITY);
|
|
2685
|
+
this.jobPos = new Int32Array(INITIAL_JOB_CAPACITY);
|
|
2686
|
+
this.jobLen = 0;
|
|
2687
|
+
this.visited = new Uint32Array(0);
|
|
2688
|
+
}
|
|
2689
|
+
reset(prog, end, ncap) {
|
|
2690
|
+
this.end = end;
|
|
2691
|
+
this.jobLen = 0;
|
|
2692
|
+
this.ncap = ncap;
|
|
2693
|
+
|
|
2694
|
+
// Bitwise shift (>>> 5) instead of Math.floor( / 32)
|
|
2695
|
+
const visitedSize = prog.numInst() * (end + 1) + VISITED_BITS - 1 >>> 5;
|
|
2696
|
+
if (this.visited.length < visitedSize) {
|
|
2697
|
+
this.visited = new Uint32Array(Math.floor(MAX_BACKTRACK_VECTOR / VISITED_BITS));
|
|
2698
|
+
} else {
|
|
2699
|
+
this.visited.fill(0, 0, visitedSize);
|
|
2700
|
+
}
|
|
2701
|
+
if (this.cap.length < ncap) {
|
|
2702
|
+
// Must explicitly fill with -1 as Int32Array defaults to 0
|
|
2703
|
+
this.cap = new Int32Array(ncap).fill(-1);
|
|
2704
|
+
} else {
|
|
2705
|
+
this.cap.fill(-1, 0, ncap);
|
|
2706
|
+
}
|
|
2707
|
+
if (this.matchcap.length < ncap) {
|
|
2708
|
+
this.matchcap = new Int32Array(ncap).fill(-1);
|
|
2709
|
+
} else {
|
|
2710
|
+
this.matchcap.fill(-1, 0, ncap);
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
shouldVisit(pc, pos) {
|
|
2714
|
+
const n = pc * (this.end + 1) + pos;
|
|
2715
|
+
const idx = n >>> 5; // Equivalent to Math.floor(n / 32)
|
|
2716
|
+
const mask = 1 << (n & 31); // Equivalent to n % 32
|
|
2717
|
+
|
|
2718
|
+
if ((this.visited[idx] & mask) !== 0) {
|
|
2719
|
+
return false;
|
|
2720
|
+
}
|
|
2721
|
+
this.visited[idx] |= mask;
|
|
2722
|
+
return true;
|
|
2723
|
+
}
|
|
2724
|
+
push(re2, pc, pos, arg) {
|
|
2725
|
+
if (re2.prog.getInst(pc).op !== Inst.FAIL && (arg || this.shouldVisit(pc, pos))) {
|
|
2726
|
+
if (this.jobLen >= this.jobPc.length) {
|
|
2727
|
+
const newSize = this.jobPc.length * 2;
|
|
2728
|
+
const newPc = new Int32Array(newSize);
|
|
2729
|
+
newPc.set(this.jobPc);
|
|
2730
|
+
this.jobPc = newPc;
|
|
2731
|
+
const newArg = new Uint8Array(newSize);
|
|
2732
|
+
newArg.set(this.jobArg);
|
|
2733
|
+
this.jobArg = newArg;
|
|
2734
|
+
const newPos = new Int32Array(newSize);
|
|
2735
|
+
newPos.set(this.jobPos);
|
|
2736
|
+
this.jobPos = newPos;
|
|
2737
|
+
}
|
|
2738
|
+
this.jobPc[this.jobLen] = pc;
|
|
2739
|
+
this.jobArg[this.jobLen] = arg ? 1 : 0;
|
|
2740
|
+
this.jobPos[this.jobLen] = pos;
|
|
2741
|
+
this.jobLen++;
|
|
2742
|
+
}
|
|
2743
|
+
}
|
|
2744
|
+
tryBacktrack(re2, input, pc, pos, anchor) {
|
|
2745
|
+
const longest = re2.longest;
|
|
2746
|
+
this.push(re2, pc, pos, false);
|
|
2747
|
+
while (this.jobLen > 0) {
|
|
2748
|
+
this.jobLen--;
|
|
2749
|
+
let currentPc = this.jobPc[this.jobLen];
|
|
2750
|
+
let arg = this.jobArg[this.jobLen] === 1;
|
|
2751
|
+
let currentPos = this.jobPos[this.jobLen];
|
|
2752
|
+
let skipShouldVisit = true;
|
|
2753
|
+
while (true) {
|
|
2754
|
+
if (!skipShouldVisit) {
|
|
2755
|
+
if (!this.shouldVisit(currentPc, currentPos)) {
|
|
2756
|
+
break;
|
|
2757
|
+
}
|
|
2758
|
+
}
|
|
2759
|
+
skipShouldVisit = false;
|
|
2760
|
+
const inst = re2.prog.getInst(currentPc);
|
|
2761
|
+
switch (inst.op) {
|
|
2762
|
+
case Inst.FAIL:
|
|
2763
|
+
{
|
|
2764
|
+
throw new RE2JSInternalException('unexpected InstFail');
|
|
2765
|
+
}
|
|
2766
|
+
case Inst.ALT:
|
|
2767
|
+
{
|
|
2768
|
+
if (arg) {
|
|
2769
|
+
arg = false;
|
|
2770
|
+
currentPc = inst.arg;
|
|
2771
|
+
continue;
|
|
2772
|
+
} else {
|
|
2773
|
+
this.push(re2, currentPc, currentPos, true);
|
|
2774
|
+
currentPc = inst.out;
|
|
2775
|
+
continue;
|
|
2776
|
+
}
|
|
2777
|
+
}
|
|
2778
|
+
case Inst.ALT_MATCH:
|
|
2779
|
+
{
|
|
2780
|
+
const outInst = re2.prog.getInst(inst.out);
|
|
2781
|
+
if (Inst.isRuneOp(outInst.op)) {
|
|
2782
|
+
this.push(re2, inst.arg, currentPos, false);
|
|
2783
|
+
currentPc = inst.out;
|
|
2784
|
+
continue;
|
|
2785
|
+
}
|
|
2786
|
+
this.push(re2, inst.out, this.end, false);
|
|
2787
|
+
currentPc = inst.arg;
|
|
2788
|
+
continue;
|
|
2789
|
+
}
|
|
2790
|
+
case Inst.RUNE:
|
|
2791
|
+
{
|
|
2792
|
+
const r = input.step(currentPos);
|
|
2793
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2794
|
+
if (!inst.matchRune(r >> 3)) break;
|
|
2795
|
+
currentPos += r & 7;
|
|
2796
|
+
currentPc = inst.out;
|
|
2797
|
+
continue;
|
|
2798
|
+
}
|
|
2799
|
+
case Inst.RUNE1:
|
|
2800
|
+
{
|
|
2801
|
+
const r = input.step(currentPos);
|
|
2802
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2803
|
+
if (r >> 3 !== inst.runes[0]) break;
|
|
2804
|
+
currentPos += r & 7;
|
|
2805
|
+
currentPc = inst.out;
|
|
2806
|
+
continue;
|
|
2807
|
+
}
|
|
2808
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
2809
|
+
{
|
|
2810
|
+
const r = input.step(currentPos);
|
|
2811
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2812
|
+
if (r >> 3 === 10) break;
|
|
2813
|
+
currentPos += r & 7;
|
|
2814
|
+
currentPc = inst.out;
|
|
2815
|
+
continue;
|
|
2816
|
+
}
|
|
2817
|
+
case Inst.RUNE_ANY:
|
|
2818
|
+
{
|
|
2819
|
+
const r = input.step(currentPos);
|
|
2820
|
+
if (r === MachineInputBase.EOF()) break;
|
|
2821
|
+
currentPos += r & 7;
|
|
2822
|
+
currentPc = inst.out;
|
|
2823
|
+
continue;
|
|
2824
|
+
}
|
|
2825
|
+
case Inst.CAPTURE:
|
|
2826
|
+
{
|
|
2827
|
+
if (arg) {
|
|
2828
|
+
this.cap[inst.arg] = currentPos;
|
|
2829
|
+
break;
|
|
2830
|
+
} else {
|
|
2831
|
+
if (inst.arg < this.ncap) {
|
|
2832
|
+
this.push(re2, currentPc, this.cap[inst.arg], true);
|
|
2833
|
+
this.cap[inst.arg] = currentPos;
|
|
2834
|
+
}
|
|
2835
|
+
currentPc = inst.out;
|
|
2836
|
+
continue;
|
|
2837
|
+
}
|
|
2838
|
+
}
|
|
2839
|
+
case Inst.EMPTY_WIDTH:
|
|
2840
|
+
{
|
|
2841
|
+
const flag = input.context(currentPos);
|
|
2842
|
+
if ((inst.arg & ~flag) !== 0) break;
|
|
2843
|
+
currentPc = inst.out;
|
|
2844
|
+
continue;
|
|
2845
|
+
}
|
|
2846
|
+
case Inst.NOP:
|
|
2847
|
+
{
|
|
2848
|
+
currentPc = inst.out;
|
|
2849
|
+
continue;
|
|
2850
|
+
}
|
|
2851
|
+
case Inst.MATCH:
|
|
2852
|
+
{
|
|
2853
|
+
if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== this.end) {
|
|
2854
|
+
break;
|
|
2855
|
+
}
|
|
2856
|
+
if (this.ncap === 0) return true;
|
|
2857
|
+
if (this.ncap > 1) {
|
|
2858
|
+
this.cap[1] = currentPos;
|
|
2859
|
+
}
|
|
2860
|
+
const old = this.matchcap[1];
|
|
2861
|
+
if (old === -1 || longest && currentPos > 0 && currentPos > old) {
|
|
2862
|
+
this.matchcap.set(this.cap);
|
|
2863
|
+
}
|
|
2864
|
+
if (!longest) return true;
|
|
2865
|
+
if (currentPos === this.end) return true;
|
|
2866
|
+
break;
|
|
2867
|
+
}
|
|
2868
|
+
default:
|
|
2869
|
+
{
|
|
2870
|
+
throw new RE2JSInternalException('bad inst');
|
|
2871
|
+
}
|
|
2872
|
+
}
|
|
2873
|
+
break;
|
|
2874
|
+
}
|
|
2875
|
+
}
|
|
2876
|
+
return longest && this.matchcap.length > 1 && this.matchcap[1] >= 0;
|
|
2877
|
+
}
|
|
2878
|
+
}
|
|
2879
|
+
const bitStatePool = [];
|
|
2880
|
+
class Backtracker {
|
|
2881
|
+
static shouldBacktrack(prog) {
|
|
2882
|
+
return prog.numInst() <= MAX_BACKTRACK_PROG;
|
|
2883
|
+
}
|
|
2884
|
+
static maxBitStateLen(prog) {
|
|
2885
|
+
if (!Backtracker.shouldBacktrack(prog)) {
|
|
2886
|
+
return 0;
|
|
2887
|
+
}
|
|
2888
|
+
return Math.floor(MAX_BACKTRACK_VECTOR / prog.numInst());
|
|
2889
|
+
}
|
|
2890
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
2891
|
+
const startCond = re2.cond;
|
|
2892
|
+
if (startCond === Utils.EMPTY_ALL) {
|
|
2893
|
+
return null;
|
|
2894
|
+
}
|
|
2895
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2896
|
+
return null;
|
|
2897
|
+
}
|
|
2898
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
|
|
2899
|
+
return null;
|
|
2900
|
+
}
|
|
2901
|
+
const b = bitStatePool.length > 0 ? bitStatePool.pop() : new BitState();
|
|
2902
|
+
const end = input.endPos();
|
|
2903
|
+
b.reset(re2.prog, end, ncap);
|
|
2904
|
+
let matched = false;
|
|
2905
|
+
if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 || anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2906
|
+
if (b.ncap > 0) {
|
|
2907
|
+
b.cap[0] = pos;
|
|
2908
|
+
}
|
|
2909
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2910
|
+
matched = true;
|
|
2911
|
+
}
|
|
2912
|
+
} else {
|
|
2913
|
+
let width = -1;
|
|
2914
|
+
for (; pos <= end && width !== 0; pos += width) {
|
|
2915
|
+
if (re2.prefix.length > 0) {
|
|
2916
|
+
const advance = input.index(re2, pos);
|
|
2917
|
+
if (advance < 0) {
|
|
2918
|
+
break;
|
|
2919
|
+
}
|
|
2920
|
+
pos += advance;
|
|
2921
|
+
}
|
|
2922
|
+
if (b.ncap > 0) {
|
|
2923
|
+
b.cap[0] = pos;
|
|
2924
|
+
}
|
|
2925
|
+
if (b.tryBacktrack(re2, input, re2.prog.start, pos, anchor)) {
|
|
2926
|
+
matched = true;
|
|
2927
|
+
break;
|
|
2928
|
+
}
|
|
2929
|
+
const r = input.step(pos);
|
|
2930
|
+
width = r === MachineInputBase.EOF() ? 0 : r & 7;
|
|
2931
|
+
}
|
|
2932
|
+
}
|
|
2933
|
+
if (!matched) {
|
|
2934
|
+
bitStatePool.push(b);
|
|
2935
|
+
return null;
|
|
2936
|
+
}
|
|
2937
|
+
|
|
2938
|
+
// Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
|
|
2939
|
+
const result = ncap === 0 ? [] : Array.from(b.matchcap.subarray(0, ncap));
|
|
2940
|
+
bitStatePool.push(b);
|
|
2941
|
+
return result;
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2944
|
+
|
|
2945
|
+
class QueueOnePass {
|
|
2946
|
+
constructor(size) {
|
|
2947
|
+
this.sparse = new Uint32Array(size);
|
|
2948
|
+
this.dense = new Uint32Array(size);
|
|
2949
|
+
this.size = 0;
|
|
2950
|
+
this.nextIndex = 0;
|
|
2951
|
+
}
|
|
2952
|
+
empty() {
|
|
2953
|
+
return this.nextIndex >= this.size;
|
|
2954
|
+
}
|
|
2955
|
+
next() {
|
|
2956
|
+
return this.dense[this.nextIndex++];
|
|
2957
|
+
}
|
|
2958
|
+
clear() {
|
|
2959
|
+
this.size = 0;
|
|
2960
|
+
this.nextIndex = 0;
|
|
2961
|
+
}
|
|
2962
|
+
contains(u) {
|
|
2963
|
+
return u < this.sparse.length && this.sparse[u] < this.size && this.dense[this.sparse[u]] === u;
|
|
2964
|
+
}
|
|
2965
|
+
insert(u) {
|
|
2966
|
+
if (!this.contains(u)) this.insertNew(u);
|
|
2967
|
+
}
|
|
2968
|
+
insertNew(u) {
|
|
2969
|
+
if (u >= this.sparse.length) return;
|
|
2970
|
+
this.sparse[u] = this.size;
|
|
2971
|
+
this.dense[this.size] = u;
|
|
2972
|
+
this.size++;
|
|
2973
|
+
}
|
|
2974
|
+
}
|
|
2975
|
+
const mergeRuneSets = (leftRunes, rightRunes, leftPC, rightPC) => {
|
|
2976
|
+
const leftLen = leftRunes.length;
|
|
2977
|
+
const rightLen = rightRunes.length;
|
|
2978
|
+
let lx = 0,
|
|
2979
|
+
rx = 0;
|
|
2980
|
+
const merged = [];
|
|
2981
|
+
const next = [];
|
|
2982
|
+
let ok = true;
|
|
2983
|
+
let ix = -1;
|
|
2984
|
+
const extend = isLeft => {
|
|
2985
|
+
const newArray = isLeft ? leftRunes : rightRunes;
|
|
2986
|
+
const low = isLeft ? lx : rx;
|
|
2987
|
+
const pc = isLeft ? leftPC : rightPC;
|
|
2988
|
+
if (ix > 0 && newArray[low] <= merged[ix]) return false;
|
|
2989
|
+
merged.push(newArray[low], newArray[low + 1]);
|
|
2990
|
+
if (isLeft) lx += 2;else rx += 2;
|
|
2991
|
+
ix += 2;
|
|
2992
|
+
next.push(pc);
|
|
2993
|
+
return true;
|
|
2994
|
+
};
|
|
2995
|
+
while (lx < leftLen || rx < rightLen) {
|
|
2996
|
+
if (rx >= rightLen) {
|
|
2997
|
+
ok = extend(true);
|
|
2998
|
+
} else if (lx >= leftLen) {
|
|
2999
|
+
ok = extend(false);
|
|
3000
|
+
} else if (rightRunes[rx] < leftRunes[lx]) {
|
|
3001
|
+
ok = extend(false);
|
|
3002
|
+
} else {
|
|
3003
|
+
ok = extend(true);
|
|
3004
|
+
}
|
|
3005
|
+
if (!ok) return null;
|
|
3006
|
+
}
|
|
3007
|
+
return {
|
|
3008
|
+
merged,
|
|
3009
|
+
next
|
|
3010
|
+
};
|
|
3011
|
+
};
|
|
3012
|
+
class OnePassProg {
|
|
3013
|
+
constructor(prog) {
|
|
3014
|
+
this.start = prog.start;
|
|
3015
|
+
this.numCap = prog.numCap;
|
|
3016
|
+
this.inst = new Array(prog.inst.length);
|
|
3017
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3018
|
+
const orig = prog.inst[i];
|
|
3019
|
+
const inst = new Inst(orig.op);
|
|
3020
|
+
inst.out = orig.out;
|
|
3021
|
+
inst.arg = orig.arg;
|
|
3022
|
+
inst.runes = orig.runes ? orig.runes.slice() : [];
|
|
3023
|
+
inst.next = null;
|
|
3024
|
+
this.inst[i] = inst;
|
|
3025
|
+
}
|
|
3026
|
+
}
|
|
3027
|
+
}
|
|
3028
|
+
const onePassCopy = prog => {
|
|
3029
|
+
const p = new OnePassProg(prog);
|
|
3030
|
+
|
|
3031
|
+
// Rewrites one or more common Prog constructs that enable some otherwise
|
|
3032
|
+
// non-onepass Progs to be onepass.
|
|
3033
|
+
for (let pc = 0; pc < p.inst.length; pc++) {
|
|
3034
|
+
const inst = p.inst[pc];
|
|
3035
|
+
if (inst.op !== Inst.ALT && inst.op !== Inst.ALT_MATCH) continue;
|
|
3036
|
+
let pAOther = 'out';
|
|
3037
|
+
let pAAlt = 'arg';
|
|
3038
|
+
let instAlt = p.inst[inst[pAAlt]];
|
|
3039
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) {
|
|
3040
|
+
pAOther = 'arg';
|
|
3041
|
+
pAAlt = 'out';
|
|
3042
|
+
instAlt = p.inst[inst[pAAlt]];
|
|
3043
|
+
if (instAlt.op !== Inst.ALT && instAlt.op !== Inst.ALT_MATCH) continue;
|
|
3044
|
+
}
|
|
3045
|
+
const instOther = p.inst[inst[pAOther]];
|
|
3046
|
+
if (instOther.op === Inst.ALT || instOther.op === Inst.ALT_MATCH) continue;
|
|
3047
|
+
let pBAlt = 'out';
|
|
3048
|
+
let pBOther = 'arg';
|
|
3049
|
+
let patch = false;
|
|
3050
|
+
if (instAlt.out === pc) {
|
|
3051
|
+
patch = true;
|
|
3052
|
+
} else if (instAlt.arg === pc) {
|
|
3053
|
+
patch = true;
|
|
3054
|
+
pBAlt = 'arg';
|
|
3055
|
+
pBOther = 'out';
|
|
3056
|
+
}
|
|
3057
|
+
if (patch) instAlt[pBAlt] = inst[pAOther];
|
|
3058
|
+
if (inst[pAOther] === instAlt[pBAlt]) inst[pAAlt] = instAlt[pBOther];
|
|
3059
|
+
}
|
|
3060
|
+
return p;
|
|
3061
|
+
};
|
|
3062
|
+
const makeOnePass = p => {
|
|
3063
|
+
if (p.inst.length >= 1000) return null;
|
|
3064
|
+
const instQueue = new QueueOnePass(p.inst.length);
|
|
3065
|
+
const visitQueue = new QueueOnePass(p.inst.length);
|
|
3066
|
+
const onePassRunes = new Array(p.inst.length);
|
|
3067
|
+
const m = new Array(p.inst.length).fill(false);
|
|
3068
|
+
const check = pc => {
|
|
3069
|
+
let ok = true;
|
|
3070
|
+
const inst = p.inst[pc];
|
|
3071
|
+
if (visitQueue.contains(pc)) return true;
|
|
3072
|
+
visitQueue.insert(pc);
|
|
3073
|
+
switch (inst.op) {
|
|
3074
|
+
case Inst.ALT:
|
|
3075
|
+
case Inst.ALT_MATCH:
|
|
3076
|
+
{
|
|
3077
|
+
ok = check(inst.out) && check(inst.arg);
|
|
3078
|
+
let matchOut = m[inst.out];
|
|
3079
|
+
let matchArg = m[inst.arg];
|
|
3080
|
+
if (matchOut && matchArg) return false;
|
|
3081
|
+
if (matchArg) {
|
|
3082
|
+
const tempOut = inst.out;
|
|
3083
|
+
inst.out = inst.arg;
|
|
3084
|
+
inst.arg = tempOut;
|
|
3085
|
+
const tempMatch = matchOut;
|
|
3086
|
+
matchOut = matchArg;
|
|
3087
|
+
matchArg = tempMatch;
|
|
3088
|
+
}
|
|
3089
|
+
if (matchOut) {
|
|
3090
|
+
m[pc] = true;
|
|
3091
|
+
inst.op = Inst.ALT_MATCH;
|
|
3092
|
+
}
|
|
3093
|
+
const leftRunes = onePassRunes[inst.out] || [];
|
|
3094
|
+
const rightRunes = onePassRunes[inst.arg] || [];
|
|
3095
|
+
const mergeRes = mergeRuneSets(leftRunes, rightRunes, inst.out, inst.arg);
|
|
3096
|
+
if (!mergeRes) return false;
|
|
3097
|
+
onePassRunes[pc] = mergeRes.merged;
|
|
3098
|
+
inst.next = new Uint32Array(mergeRes.next);
|
|
3099
|
+
break;
|
|
3100
|
+
}
|
|
3101
|
+
case Inst.CAPTURE:
|
|
3102
|
+
case Inst.EMPTY_WIDTH:
|
|
3103
|
+
case Inst.NOP:
|
|
3104
|
+
{
|
|
3105
|
+
ok = check(inst.out);
|
|
3106
|
+
m[pc] = m[inst.out];
|
|
3107
|
+
onePassRunes[pc] = onePassRunes[inst.out] ? onePassRunes[inst.out].slice() : [];
|
|
3108
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3109
|
+
break;
|
|
3110
|
+
}
|
|
3111
|
+
case Inst.MATCH:
|
|
3112
|
+
case Inst.FAIL:
|
|
3113
|
+
{
|
|
3114
|
+
m[pc] = inst.op === Inst.MATCH;
|
|
3115
|
+
break;
|
|
3116
|
+
}
|
|
3117
|
+
case Inst.RUNE:
|
|
3118
|
+
{
|
|
3119
|
+
m[pc] = false;
|
|
3120
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3121
|
+
instQueue.insert(inst.out);
|
|
3122
|
+
if (!inst.runes || inst.runes.length === 0) {
|
|
3123
|
+
onePassRunes[pc] = [];
|
|
3124
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3125
|
+
break;
|
|
3126
|
+
}
|
|
3127
|
+
let runes = [];
|
|
3128
|
+
if (inst.runes.length === 1 && (inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3129
|
+
const r0 = inst.runes[0];
|
|
3130
|
+
runes.push(r0, r0);
|
|
3131
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3132
|
+
runes.push(r1, r1);
|
|
3133
|
+
}
|
|
3134
|
+
runes.sort((a, b) => a - b);
|
|
3135
|
+
} else {
|
|
3136
|
+
runes.push(...inst.runes);
|
|
3137
|
+
}
|
|
3138
|
+
onePassRunes[pc] = runes;
|
|
3139
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3140
|
+
inst.op = Inst.RUNE;
|
|
3141
|
+
break;
|
|
3142
|
+
}
|
|
3143
|
+
case Inst.RUNE1:
|
|
3144
|
+
{
|
|
3145
|
+
m[pc] = false;
|
|
3146
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3147
|
+
instQueue.insert(inst.out);
|
|
3148
|
+
let runes = [];
|
|
3149
|
+
if ((inst.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
3150
|
+
const r0 = inst.runes[0];
|
|
3151
|
+
runes.push(r0, r0);
|
|
3152
|
+
for (let r1 = Unicode.simpleFold(r0); r1 !== r0; r1 = Unicode.simpleFold(r1)) {
|
|
3153
|
+
runes.push(r1, r1);
|
|
3154
|
+
}
|
|
3155
|
+
runes.sort((a, b) => a - b);
|
|
3156
|
+
} else {
|
|
3157
|
+
runes.push(inst.runes[0], inst.runes[0]);
|
|
3158
|
+
}
|
|
3159
|
+
onePassRunes[pc] = runes;
|
|
3160
|
+
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
3161
|
+
inst.op = Inst.RUNE;
|
|
3162
|
+
break;
|
|
3163
|
+
}
|
|
3164
|
+
case Inst.RUNE_ANY:
|
|
3165
|
+
{
|
|
3166
|
+
m[pc] = false;
|
|
3167
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3168
|
+
instQueue.insert(inst.out);
|
|
3169
|
+
onePassRunes[pc] = [0, Unicode.MAX_RUNE];
|
|
3170
|
+
inst.next = new Uint32Array([inst.out]);
|
|
3171
|
+
break;
|
|
3172
|
+
}
|
|
3173
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3174
|
+
{
|
|
3175
|
+
m[pc] = false;
|
|
3176
|
+
if (inst.next && inst.next.length > 0) break;
|
|
3177
|
+
instQueue.insert(inst.out);
|
|
3178
|
+
onePassRunes[pc] = [0, 9, 11, Unicode.MAX_RUNE]; // \n is 10
|
|
3179
|
+
inst.next = new Uint32Array(Math.floor(onePassRunes[pc].length / 2) + 1).fill(inst.out);
|
|
3180
|
+
break;
|
|
3181
|
+
}
|
|
3182
|
+
}
|
|
3183
|
+
return ok;
|
|
3184
|
+
};
|
|
3185
|
+
instQueue.clear();
|
|
3186
|
+
instQueue.insert(p.start);
|
|
3187
|
+
while (!instQueue.empty()) {
|
|
3188
|
+
visitQueue.clear();
|
|
3189
|
+
const pc = instQueue.next();
|
|
3190
|
+
if (!check(pc)) return null;
|
|
3191
|
+
}
|
|
3192
|
+
for (let i = 0; i < p.inst.length; i++) {
|
|
3193
|
+
if (onePassRunes[i]) p.inst[i].runes = onePassRunes[i];
|
|
3194
|
+
}
|
|
3195
|
+
return p;
|
|
3196
|
+
};
|
|
3197
|
+
const cleanupOnePass = (p, original) => {
|
|
3198
|
+
for (let ix = 0; ix < original.inst.length; ix++) {
|
|
3199
|
+
const instOriginal = original.inst[ix];
|
|
3200
|
+
switch (instOriginal.op) {
|
|
3201
|
+
case Inst.ALT:
|
|
3202
|
+
case Inst.ALT_MATCH:
|
|
3203
|
+
case Inst.RUNE:
|
|
3204
|
+
break;
|
|
3205
|
+
case Inst.CAPTURE:
|
|
3206
|
+
case Inst.EMPTY_WIDTH:
|
|
3207
|
+
case Inst.NOP:
|
|
3208
|
+
case Inst.MATCH:
|
|
3209
|
+
case Inst.FAIL:
|
|
3210
|
+
p.inst[ix].next = null;
|
|
3211
|
+
break;
|
|
3212
|
+
case Inst.RUNE1:
|
|
3213
|
+
case Inst.RUNE_ANY:
|
|
3214
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3215
|
+
p.inst[ix].next = null;
|
|
3216
|
+
p.inst[ix].op = instOriginal.op;
|
|
3217
|
+
p.inst[ix].runes = instOriginal.runes ? instOriginal.runes.slice() : [];
|
|
3218
|
+
break;
|
|
3219
|
+
}
|
|
3220
|
+
}
|
|
3221
|
+
};
|
|
3222
|
+
class OnePass {
|
|
3223
|
+
static compile(prog) {
|
|
3224
|
+
if (prog.start === 0) return null;
|
|
3225
|
+
const startInst = prog.inst[prog.start];
|
|
3226
|
+
// onepass regexps must be strictly anchored
|
|
3227
|
+
if (startInst.op !== Inst.EMPTY_WIDTH || (startInst.arg & Utils.EMPTY_BEGIN_TEXT) === 0) {
|
|
3228
|
+
return null;
|
|
3229
|
+
}
|
|
3230
|
+
let hasAlt = false;
|
|
3231
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3232
|
+
if (prog.inst[i].op === Inst.ALT || prog.inst[i].op === Inst.ALT_MATCH) {
|
|
3233
|
+
hasAlt = true;
|
|
3234
|
+
break;
|
|
2339
3235
|
}
|
|
2340
3236
|
}
|
|
2341
|
-
|
|
2342
|
-
|
|
3237
|
+
for (let i = 0; i < prog.inst.length; i++) {
|
|
3238
|
+
const inst = prog.inst[i];
|
|
3239
|
+
const opOut = prog.inst[inst.out].op;
|
|
3240
|
+
switch (inst.op) {
|
|
3241
|
+
case Inst.ALT:
|
|
3242
|
+
case Inst.ALT_MATCH:
|
|
3243
|
+
if (opOut === Inst.MATCH || prog.inst[inst.arg].op === Inst.MATCH) {
|
|
3244
|
+
return null;
|
|
3245
|
+
}
|
|
3246
|
+
break;
|
|
3247
|
+
case Inst.EMPTY_WIDTH:
|
|
3248
|
+
if (opOut === Inst.MATCH) {
|
|
3249
|
+
if ((inst.arg & Utils.EMPTY_END_TEXT) === Utils.EMPTY_END_TEXT) {
|
|
3250
|
+
continue;
|
|
3251
|
+
}
|
|
3252
|
+
return null;
|
|
3253
|
+
}
|
|
3254
|
+
break;
|
|
3255
|
+
default:
|
|
3256
|
+
if (opOut === Inst.MATCH && hasAlt) {
|
|
3257
|
+
return null;
|
|
3258
|
+
}
|
|
3259
|
+
break;
|
|
3260
|
+
}
|
|
2343
3261
|
}
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
state.nextAscii[charCode] = nextState;
|
|
2349
|
-
} else {
|
|
2350
|
-
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2351
|
-
state.nextMap.set(key, nextState);
|
|
3262
|
+
let p = onePassCopy(prog);
|
|
3263
|
+
p = makeOnePass(p);
|
|
3264
|
+
if (p !== null) {
|
|
3265
|
+
cleanupOnePass(p, prog);
|
|
2352
3266
|
}
|
|
2353
|
-
return
|
|
3267
|
+
return p;
|
|
2354
3268
|
}
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
if (
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
let
|
|
2366
|
-
let
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
3269
|
+
static next(inst, r) {
|
|
3270
|
+
const nextIdx = inst.matchRunePos(r);
|
|
3271
|
+
if (nextIdx >= 0) return inst.next[nextIdx];
|
|
3272
|
+
if (inst.op === Inst.ALT_MATCH) return inst.out;
|
|
3273
|
+
return 0; // fail
|
|
3274
|
+
}
|
|
3275
|
+
static execute(re2, input, pos, anchor, ncap) {
|
|
3276
|
+
const onepass = re2.onepass;
|
|
3277
|
+
if (!onepass) return null;
|
|
3278
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
3279
|
+
let matched = false;
|
|
3280
|
+
let r = input.step(pos);
|
|
3281
|
+
let rune = r >> 3;
|
|
3282
|
+
let width = r & 7;
|
|
3283
|
+
let r1 = MachineInputBase.EOF();
|
|
3284
|
+
let rune1 = -1;
|
|
3285
|
+
let width1 = 0;
|
|
3286
|
+
if (r !== MachineInputBase.EOF()) {
|
|
3287
|
+
r1 = input.step(pos + width);
|
|
3288
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3289
|
+
rune1 = r1 >> 3;
|
|
3290
|
+
width1 = r1 & 7;
|
|
2372
3291
|
}
|
|
2373
3292
|
}
|
|
2374
|
-
let
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
3293
|
+
let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
|
|
3294
|
+
let pc = onepass.start;
|
|
3295
|
+
let inst;
|
|
3296
|
+
while (true) {
|
|
3297
|
+
inst = onepass.inst[pc];
|
|
3298
|
+
pc = inst.out;
|
|
3299
|
+
switch (inst.op) {
|
|
3300
|
+
case Inst.MATCH:
|
|
3301
|
+
{
|
|
3302
|
+
matched = true;
|
|
3303
|
+
if (matchcap.length > 0) {
|
|
3304
|
+
matchcap[0] = 0;
|
|
3305
|
+
matchcap[1] = pos;
|
|
3306
|
+
}
|
|
3307
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
3308
|
+
}
|
|
3309
|
+
case Inst.RUNE:
|
|
3310
|
+
if (!inst.matchRune(rune)) return null;
|
|
3311
|
+
break;
|
|
3312
|
+
case Inst.RUNE1:
|
|
3313
|
+
if (rune !== inst.runes[0]) return null;
|
|
3314
|
+
break;
|
|
3315
|
+
case Inst.RUNE_ANY:
|
|
3316
|
+
break;
|
|
3317
|
+
case Inst.RUNE_ANY_NOT_NL:
|
|
3318
|
+
if (rune === 10) return null;
|
|
3319
|
+
break;
|
|
3320
|
+
case Inst.ALT:
|
|
3321
|
+
case Inst.ALT_MATCH:
|
|
3322
|
+
pc = OnePass.next(inst, rune);
|
|
3323
|
+
continue;
|
|
3324
|
+
case Inst.FAIL:
|
|
3325
|
+
return null;
|
|
3326
|
+
case Inst.NOP:
|
|
3327
|
+
continue;
|
|
3328
|
+
case Inst.EMPTY_WIDTH:
|
|
3329
|
+
if ((inst.arg & ~flag) !== 0) return null;
|
|
3330
|
+
continue;
|
|
3331
|
+
case Inst.CAPTURE:
|
|
3332
|
+
if (inst.arg < matchcap.length) {
|
|
3333
|
+
matchcap[inst.arg] = pos;
|
|
3334
|
+
}
|
|
3335
|
+
continue;
|
|
3336
|
+
default:
|
|
3337
|
+
throw new RE2JSInternalException('bad inst');
|
|
2383
3338
|
}
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
3339
|
+
if (width === 0) break;
|
|
3340
|
+
flag = Utils.emptyOpContext(rune, rune1);
|
|
3341
|
+
pos += width;
|
|
3342
|
+
rune = rune1;
|
|
3343
|
+
width = width1;
|
|
3344
|
+
if (rune !== -1) {
|
|
3345
|
+
r1 = input.step(pos + width);
|
|
3346
|
+
if (r1 !== MachineInputBase.EOF()) {
|
|
3347
|
+
rune1 = r1 >> 3;
|
|
3348
|
+
width1 = r1 & 7;
|
|
2391
3349
|
} else {
|
|
2392
|
-
|
|
3350
|
+
rune1 = -1;
|
|
3351
|
+
width1 = 0;
|
|
2393
3352
|
}
|
|
2394
3353
|
}
|
|
2395
|
-
|
|
2396
|
-
// If we hit a dead end, and anchored, fail early
|
|
2397
|
-
if (currentState.nfaStates.length === 0) {
|
|
2398
|
-
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2399
|
-
}
|
|
2400
|
-
i += width;
|
|
2401
3354
|
}
|
|
2402
|
-
return
|
|
3355
|
+
if (!matched) return null;
|
|
3356
|
+
return ncap === 0 ? [] : Array.from(matchcap);
|
|
2403
3357
|
}
|
|
2404
3358
|
}
|
|
2405
3359
|
|
|
@@ -2484,7 +3438,7 @@ class Regexp {
|
|
|
2484
3438
|
this.max = 0; // max for REPEAT
|
|
2485
3439
|
this.cap = 0; // capturing index, for CAPTURE
|
|
2486
3440
|
this.name = null; // capturing name, for CAPTURE
|
|
2487
|
-
this.namedGroups =
|
|
3441
|
+
this.namedGroups = Object.create(null); // map of group name -> capturing index
|
|
2488
3442
|
}
|
|
2489
3443
|
reinit() {
|
|
2490
3444
|
this.flags = 0;
|
|
@@ -2494,7 +3448,7 @@ class Regexp {
|
|
|
2494
3448
|
this.min = 0;
|
|
2495
3449
|
this.max = 0;
|
|
2496
3450
|
this.name = null;
|
|
2497
|
-
this.namedGroups =
|
|
3451
|
+
this.namedGroups = Object.create(null);
|
|
2498
3452
|
}
|
|
2499
3453
|
toString() {
|
|
2500
3454
|
return this.appendTo();
|
|
@@ -2754,6 +3708,188 @@ class Regexp {
|
|
|
2754
3708
|
}
|
|
2755
3709
|
}
|
|
2756
3710
|
|
|
3711
|
+
class Prefilter {
|
|
3712
|
+
static Type = {
|
|
3713
|
+
NONE: 0,
|
|
3714
|
+
EXACT: 1,
|
|
3715
|
+
AND: 2,
|
|
3716
|
+
OR: 3
|
|
3717
|
+
};
|
|
3718
|
+
constructor(type) {
|
|
3719
|
+
this.type = type;
|
|
3720
|
+
this.subs = [];
|
|
3721
|
+
this.str = '';
|
|
3722
|
+
this.bytes = null;
|
|
3723
|
+
}
|
|
3724
|
+
eval(input, pos) {
|
|
3725
|
+
switch (this.type) {
|
|
3726
|
+
case Prefilter.Type.NONE:
|
|
3727
|
+
return true;
|
|
3728
|
+
case Prefilter.Type.EXACT:
|
|
3729
|
+
return input.hasString(this, pos);
|
|
3730
|
+
case Prefilter.Type.AND:
|
|
3731
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3732
|
+
if (!this.subs[i].eval(input, pos)) return false;
|
|
3733
|
+
}
|
|
3734
|
+
return true;
|
|
3735
|
+
case Prefilter.Type.OR:
|
|
3736
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
3737
|
+
if (this.subs[i].eval(input, pos)) return true;
|
|
3738
|
+
}
|
|
3739
|
+
return false;
|
|
3740
|
+
default:
|
|
3741
|
+
return true;
|
|
3742
|
+
}
|
|
3743
|
+
}
|
|
3744
|
+
}
|
|
3745
|
+
class PrefilterTree {
|
|
3746
|
+
static build(re) {
|
|
3747
|
+
const pf = PrefilterTree.fromRegexp(re);
|
|
3748
|
+
return PrefilterTree.simplify(pf);
|
|
3749
|
+
}
|
|
3750
|
+
static fromRegexp(re) {
|
|
3751
|
+
if (!re) return new Prefilter(Prefilter.Type.NONE);
|
|
3752
|
+
switch (re.op) {
|
|
3753
|
+
case Regexp.Op.NO_MATCH:
|
|
3754
|
+
case Regexp.Op.EMPTY_MATCH:
|
|
3755
|
+
case Regexp.Op.BEGIN_LINE:
|
|
3756
|
+
case Regexp.Op.END_LINE:
|
|
3757
|
+
case Regexp.Op.BEGIN_TEXT:
|
|
3758
|
+
case Regexp.Op.END_TEXT:
|
|
3759
|
+
case Regexp.Op.WORD_BOUNDARY:
|
|
3760
|
+
case Regexp.Op.NO_WORD_BOUNDARY:
|
|
3761
|
+
case Regexp.Op.CHAR_CLASS:
|
|
3762
|
+
case Regexp.Op.ANY_CHAR_NOT_NL:
|
|
3763
|
+
case Regexp.Op.ANY_CHAR:
|
|
3764
|
+
{
|
|
3765
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3766
|
+
}
|
|
3767
|
+
case Regexp.Op.LITERAL:
|
|
3768
|
+
{
|
|
3769
|
+
if (re.runes.length === 0 || (re.flags & RE2Flags.FOLD_CASE) !== 0) {
|
|
3770
|
+
// Skip case-folded literals for simplicity
|
|
3771
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3772
|
+
}
|
|
3773
|
+
const pf = new Prefilter(Prefilter.Type.EXACT);
|
|
3774
|
+
let str = '';
|
|
3775
|
+
for (let i = 0; i < re.runes.length; i++) {
|
|
3776
|
+
str += String.fromCodePoint(re.runes[i]);
|
|
3777
|
+
}
|
|
3778
|
+
pf.str = str;
|
|
3779
|
+
pf.bytes = Utils.stringToUtf8ByteArray(pf.str);
|
|
3780
|
+
return pf;
|
|
3781
|
+
}
|
|
3782
|
+
case Regexp.Op.CAPTURE:
|
|
3783
|
+
case Regexp.Op.PLUS:
|
|
3784
|
+
{
|
|
3785
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3786
|
+
}
|
|
3787
|
+
case Regexp.Op.REPEAT:
|
|
3788
|
+
{
|
|
3789
|
+
if (re.min >= 1) {
|
|
3790
|
+
return PrefilterTree.fromRegexp(re.subs[0]);
|
|
3791
|
+
}
|
|
3792
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3793
|
+
}
|
|
3794
|
+
case Regexp.Op.CONCAT:
|
|
3795
|
+
{
|
|
3796
|
+
const pf = new Prefilter(Prefilter.Type.AND);
|
|
3797
|
+
for (const sub of re.subs) {
|
|
3798
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3799
|
+
}
|
|
3800
|
+
return pf;
|
|
3801
|
+
}
|
|
3802
|
+
case Regexp.Op.ALTERNATE:
|
|
3803
|
+
{
|
|
3804
|
+
const pf = new Prefilter(Prefilter.Type.OR);
|
|
3805
|
+
for (const sub of re.subs) {
|
|
3806
|
+
pf.subs.push(PrefilterTree.fromRegexp(sub));
|
|
3807
|
+
}
|
|
3808
|
+
return pf;
|
|
3809
|
+
}
|
|
3810
|
+
default:
|
|
3811
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3812
|
+
}
|
|
3813
|
+
}
|
|
3814
|
+
static simplify(pf) {
|
|
3815
|
+
if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
|
|
3816
|
+
return pf;
|
|
3817
|
+
}
|
|
3818
|
+
if (pf.type === Prefilter.Type.AND) {
|
|
3819
|
+
const newSubs = [];
|
|
3820
|
+
for (const sub of pf.subs) {
|
|
3821
|
+
const s = PrefilterTree.simplify(sub);
|
|
3822
|
+
if (s.type !== Prefilter.Type.NONE) {
|
|
3823
|
+
if (s.type === Prefilter.Type.AND) {
|
|
3824
|
+
newSubs.push(...s.subs);
|
|
3825
|
+
} else {
|
|
3826
|
+
newSubs.push(s);
|
|
3827
|
+
}
|
|
3828
|
+
}
|
|
3829
|
+
}
|
|
3830
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3831
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3832
|
+
pf.subs = newSubs;
|
|
3833
|
+
return pf;
|
|
3834
|
+
}
|
|
3835
|
+
if (pf.type === Prefilter.Type.OR) {
|
|
3836
|
+
const newSubs = [];
|
|
3837
|
+
for (const sub of pf.subs) {
|
|
3838
|
+
const s = PrefilterTree.simplify(sub);
|
|
3839
|
+
if (s.type === Prefilter.Type.NONE) {
|
|
3840
|
+
// If any branch of an OR has no requirements, the whole OR has no requirements
|
|
3841
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
3842
|
+
}
|
|
3843
|
+
if (s.type === Prefilter.Type.OR) {
|
|
3844
|
+
newSubs.push(...s.subs);
|
|
3845
|
+
} else {
|
|
3846
|
+
newSubs.push(s);
|
|
3847
|
+
}
|
|
3848
|
+
}
|
|
3849
|
+
if (newSubs.length === 0) return new Prefilter(Prefilter.Type.NONE);
|
|
3850
|
+
if (newSubs.length === 1) return newSubs[0];
|
|
3851
|
+
|
|
3852
|
+
// De-duplicate EXACT branches
|
|
3853
|
+
const seen = new Set();
|
|
3854
|
+
const uniqueSubs = [];
|
|
3855
|
+
for (const sub of newSubs) {
|
|
3856
|
+
if (sub.type === Prefilter.Type.EXACT) {
|
|
3857
|
+
if (!seen.has(sub.str)) {
|
|
3858
|
+
seen.add(sub.str);
|
|
3859
|
+
uniqueSubs.push(sub);
|
|
3860
|
+
}
|
|
3861
|
+
} else {
|
|
3862
|
+
uniqueSubs.push(sub);
|
|
3863
|
+
}
|
|
3864
|
+
}
|
|
3865
|
+
pf.subs = uniqueSubs;
|
|
3866
|
+
return pf;
|
|
3867
|
+
}
|
|
3868
|
+
return pf;
|
|
3869
|
+
}
|
|
3870
|
+
}
|
|
3871
|
+
|
|
3872
|
+
/**
|
|
3873
|
+
* A list of instruction pointers waiting to be patched.
|
|
3874
|
+
* Tracks both `head` and `tail` to allow O(1) appending during compilation.
|
|
3875
|
+
* * Values are encoded integers, not standard memory pointers:
|
|
3876
|
+
* - Program instruction index: `l >> 1`
|
|
3877
|
+
* - Patch `.out` field if: `(l & 1) === 0`
|
|
3878
|
+
* - Patch `.arg` field if: `(l & 1) === 1`
|
|
3879
|
+
* - `0` denotes an empty list.
|
|
3880
|
+
* * @see https://swtch.com/~rsc/regexp/regexp1.html
|
|
3881
|
+
*/
|
|
3882
|
+
class PatchList {
|
|
3883
|
+
/**
|
|
3884
|
+
* @param {number} head - Encoded pointer to the start of the patch list.
|
|
3885
|
+
* @param {number} tail - Encoded pointer to the end of the patch list.
|
|
3886
|
+
*/
|
|
3887
|
+
constructor(head = 0, tail = 0) {
|
|
3888
|
+
this.head = head;
|
|
3889
|
+
this.tail = tail;
|
|
3890
|
+
}
|
|
3891
|
+
}
|
|
3892
|
+
|
|
2757
3893
|
/**
|
|
2758
3894
|
* A Prog is a compiled regular expression program.
|
|
2759
3895
|
*/
|
|
@@ -2855,39 +3991,30 @@ class Prog {
|
|
|
2855
3991
|
return i.arg;
|
|
2856
3992
|
}
|
|
2857
3993
|
patch(l, val) {
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
3994
|
+
let head = l.head;
|
|
3995
|
+
while (head !== 0) {
|
|
3996
|
+
const i = this.inst[head >> 1];
|
|
3997
|
+
if ((head & 1) === 0) {
|
|
3998
|
+
head = i.out;
|
|
2862
3999
|
i.out = val;
|
|
2863
4000
|
} else {
|
|
2864
|
-
|
|
4001
|
+
head = i.arg;
|
|
2865
4002
|
i.arg = val;
|
|
2866
4003
|
}
|
|
2867
4004
|
}
|
|
2868
4005
|
}
|
|
2869
4006
|
append(l1, l2) {
|
|
2870
|
-
if (l1 === 0)
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
for (;;) {
|
|
2878
|
-
const next = this.next(last);
|
|
2879
|
-
if (next === 0) {
|
|
2880
|
-
break;
|
|
2881
|
-
}
|
|
2882
|
-
last = next;
|
|
2883
|
-
}
|
|
2884
|
-
const i = this.inst[last >> 1];
|
|
2885
|
-
if ((last & 1) === 0) {
|
|
2886
|
-
i.out = l2;
|
|
4007
|
+
if (l1.head === 0) return l2;
|
|
4008
|
+
if (l2.head === 0) return l1;
|
|
4009
|
+
|
|
4010
|
+
// We know exactly where the tail is
|
|
4011
|
+
const i = this.inst[l1.tail >> 1];
|
|
4012
|
+
if ((l1.tail & 1) === 0) {
|
|
4013
|
+
i.out = l2.head;
|
|
2887
4014
|
} else {
|
|
2888
|
-
i.arg = l2;
|
|
4015
|
+
i.arg = l2.head;
|
|
2889
4016
|
}
|
|
2890
|
-
return l1;
|
|
4017
|
+
return new PatchList(l1.head, l2.tail);
|
|
2891
4018
|
}
|
|
2892
4019
|
/**
|
|
2893
4020
|
*
|
|
@@ -2916,7 +4043,7 @@ class Prog {
|
|
|
2916
4043
|
* @class
|
|
2917
4044
|
*/
|
|
2918
4045
|
class Frag {
|
|
2919
|
-
constructor(i = 0, out =
|
|
4046
|
+
constructor(i = 0, out = new PatchList(), nullable = false) {
|
|
2920
4047
|
this.i = i; // an instruction address (pc).
|
|
2921
4048
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2922
4049
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2941,6 +4068,33 @@ class Compiler {
|
|
|
2941
4068
|
c.prog.start = f.i;
|
|
2942
4069
|
return c.prog;
|
|
2943
4070
|
}
|
|
4071
|
+
static compileSet(regexps) {
|
|
4072
|
+
const c = new Compiler();
|
|
4073
|
+
if (regexps.length === 0) {
|
|
4074
|
+
c.prog.start = c.newInst(Inst.FAIL).i;
|
|
4075
|
+
return c.prog;
|
|
4076
|
+
}
|
|
4077
|
+
let starts = [];
|
|
4078
|
+
for (let i = 0; i < regexps.length; i++) {
|
|
4079
|
+
const f = c.compile(regexps[i]);
|
|
4080
|
+
const m = c.newInst(Inst.MATCH);
|
|
4081
|
+
c.prog.getInst(m.i).arg = i; // Store the regex index
|
|
4082
|
+
c.prog.patch(f.out, m.i);
|
|
4083
|
+
starts.push(f.i);
|
|
4084
|
+
}
|
|
4085
|
+
|
|
4086
|
+
// Link starts together via ALT
|
|
4087
|
+
let start = starts[0];
|
|
4088
|
+
for (let i = 1; i < starts.length; i++) {
|
|
4089
|
+
const f = c.newInst(Inst.ALT);
|
|
4090
|
+
const inst = c.prog.getInst(f.i);
|
|
4091
|
+
inst.out = start;
|
|
4092
|
+
inst.arg = starts[i];
|
|
4093
|
+
start = f.i;
|
|
4094
|
+
}
|
|
4095
|
+
c.prog.start = start;
|
|
4096
|
+
return c.prog;
|
|
4097
|
+
}
|
|
2944
4098
|
constructor() {
|
|
2945
4099
|
this.prog = new Prog();
|
|
2946
4100
|
this.newInst(Inst.FAIL);
|
|
@@ -2953,7 +4107,7 @@ class Compiler {
|
|
|
2953
4107
|
// Returns a no-op fragment. Sometimes unavoidable.
|
|
2954
4108
|
nop() {
|
|
2955
4109
|
const f = this.newInst(Inst.NOP);
|
|
2956
|
-
f.out = f.i << 1;
|
|
4110
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2957
4111
|
return f;
|
|
2958
4112
|
}
|
|
2959
4113
|
fail() {
|
|
@@ -2964,7 +4118,7 @@ class Compiler {
|
|
|
2964
4118
|
// Given a fragment a, returns a fragment with capturing parens around a.
|
|
2965
4119
|
cap(arg) {
|
|
2966
4120
|
const f = this.newInst(Inst.CAPTURE);
|
|
2967
|
-
f.out = f.i << 1;
|
|
4121
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
2968
4122
|
this.prog.getInst(f.i).arg = arg;
|
|
2969
4123
|
if (this.prog.numCap < arg + 1) {
|
|
2970
4124
|
this.prog.numCap = arg + 1;
|
|
@@ -3012,10 +4166,10 @@ class Compiler {
|
|
|
3012
4166
|
const i = this.prog.getInst(f.i);
|
|
3013
4167
|
if (nongreedy) {
|
|
3014
4168
|
i.arg = f1.i;
|
|
3015
|
-
f.out = f.i << 1;
|
|
4169
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3016
4170
|
} else {
|
|
3017
4171
|
i.out = f1.i;
|
|
3018
|
-
f.out = f.i << 1 | 1;
|
|
4172
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3019
4173
|
}
|
|
3020
4174
|
this.prog.patch(f1.out, f.i);
|
|
3021
4175
|
return f;
|
|
@@ -3027,10 +4181,10 @@ class Compiler {
|
|
|
3027
4181
|
const i = this.prog.getInst(f.i);
|
|
3028
4182
|
if (nongreedy) {
|
|
3029
4183
|
i.arg = f1.i;
|
|
3030
|
-
f.out = f.i << 1;
|
|
4184
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3031
4185
|
} else {
|
|
3032
4186
|
i.out = f1.i;
|
|
3033
|
-
f.out = f.i << 1 | 1;
|
|
4187
|
+
f.out = new PatchList(f.i << 1 | 1, f.i << 1 | 1);
|
|
3034
4188
|
}
|
|
3035
4189
|
f.out = this.prog.append(f.out, f1.out);
|
|
3036
4190
|
return f;
|
|
@@ -3053,7 +4207,7 @@ class Compiler {
|
|
|
3053
4207
|
empty(op) {
|
|
3054
4208
|
const f = this.newInst(Inst.EMPTY_WIDTH);
|
|
3055
4209
|
this.prog.getInst(f.i).arg = op;
|
|
3056
|
-
f.out = f.i << 1;
|
|
4210
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3057
4211
|
return f;
|
|
3058
4212
|
}
|
|
3059
4213
|
|
|
@@ -3068,7 +4222,7 @@ class Compiler {
|
|
|
3068
4222
|
flags &= -2;
|
|
3069
4223
|
}
|
|
3070
4224
|
i.arg = flags;
|
|
3071
|
-
f.out = f.i << 1;
|
|
4225
|
+
f.out = new PatchList(f.i << 1, f.i << 1);
|
|
3072
4226
|
if ((flags & RE2Flags.FOLD_CASE) === 0 && runes.length === 1 || runes.length === 2 && runes[0] === runes[1]) {
|
|
3073
4227
|
i.op = Inst.RUNE1;
|
|
3074
4228
|
} else if (runes.length === 2 && runes[0] === 0 && runes[1] === Unicode.MAX_RUNE) {
|
|
@@ -3173,23 +4327,92 @@ class Simplify {
|
|
|
3173
4327
|
}
|
|
3174
4328
|
switch (re.op) {
|
|
3175
4329
|
case Regexp.Op.CAPTURE:
|
|
4330
|
+
{
|
|
4331
|
+
const sub = Simplify.simplify(re.subs[0]);
|
|
4332
|
+
if (sub !== re.subs[0]) {
|
|
4333
|
+
const nre = Regexp.fromRegexp(re);
|
|
4334
|
+
nre.runes = [];
|
|
4335
|
+
nre.subs = [sub];
|
|
4336
|
+
return nre;
|
|
4337
|
+
}
|
|
4338
|
+
return re;
|
|
4339
|
+
}
|
|
3176
4340
|
case Regexp.Op.CONCAT:
|
|
3177
4341
|
case Regexp.Op.ALTERNATE:
|
|
3178
4342
|
{
|
|
3179
|
-
|
|
4343
|
+
const newSubs = [];
|
|
4344
|
+
let changed = false;
|
|
3180
4345
|
for (let i = 0; i < re.subs.length; i++) {
|
|
3181
4346
|
const sub = re.subs[i];
|
|
3182
4347
|
const nsub = Simplify.simplify(sub);
|
|
3183
|
-
if (
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
4348
|
+
if (nsub !== sub) {
|
|
4349
|
+
changed = true;
|
|
4350
|
+
}
|
|
4351
|
+
if (re.op === Regexp.Op.CONCAT) {
|
|
4352
|
+
// If any part of a CONCAT is mathematically impossible,
|
|
4353
|
+
// the entire CONCAT sequence becomes impossible.
|
|
4354
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4355
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
4356
|
+
}
|
|
4357
|
+
// Drop empty 0-width match nodes entirely from sequences
|
|
4358
|
+
if (nsub.op === Regexp.Op.EMPTY_MATCH) {
|
|
4359
|
+
changed = true;
|
|
4360
|
+
continue;
|
|
4361
|
+
}
|
|
4362
|
+
// Flatten nested concatenations
|
|
4363
|
+
if (nsub.op === Regexp.Op.CONCAT) {
|
|
4364
|
+
changed = true;
|
|
4365
|
+
newSubs.push(...nsub.subs);
|
|
4366
|
+
continue;
|
|
4367
|
+
}
|
|
4368
|
+
} else if (re.op === Regexp.Op.ALTERNATE) {
|
|
4369
|
+
// Drop impossible branches from alternations
|
|
4370
|
+
if (nsub.op === Regexp.Op.NO_MATCH) {
|
|
4371
|
+
changed = true;
|
|
4372
|
+
continue;
|
|
4373
|
+
}
|
|
4374
|
+
// Flatten nested alternations
|
|
4375
|
+
if (nsub.op === Regexp.Op.ALTERNATE) {
|
|
4376
|
+
changed = true;
|
|
4377
|
+
newSubs.push(...nsub.subs);
|
|
4378
|
+
continue;
|
|
4379
|
+
}
|
|
3187
4380
|
}
|
|
3188
|
-
|
|
3189
|
-
|
|
4381
|
+
newSubs.push(nsub);
|
|
4382
|
+
}
|
|
4383
|
+
if (changed) {
|
|
4384
|
+
// If we filtered out all nodes, return the mathematically correct fallback
|
|
4385
|
+
if (newSubs.length === 0) {
|
|
4386
|
+
return new Regexp(re.op === Regexp.Op.CONCAT ? Regexp.Op.EMPTY_MATCH : Regexp.Op.NO_MATCH);
|
|
4387
|
+
}
|
|
4388
|
+
// If only 1 node remains, we don't need a CONCAT/ALT container at all
|
|
4389
|
+
if (newSubs.length === 1) {
|
|
4390
|
+
return newSubs[0];
|
|
3190
4391
|
}
|
|
4392
|
+
const nre = Regexp.fromRegexp(re);
|
|
4393
|
+
nre.runes = [];
|
|
4394
|
+
nre.subs = newSubs;
|
|
4395
|
+
return nre;
|
|
4396
|
+
}
|
|
4397
|
+
return re;
|
|
4398
|
+
}
|
|
4399
|
+
case Regexp.Op.CHAR_CLASS:
|
|
4400
|
+
{
|
|
4401
|
+
if (re.runes === null) return re;
|
|
4402
|
+
|
|
4403
|
+
// Empty character classes match nothing.
|
|
4404
|
+
if (re.runes.length === 0) {
|
|
4405
|
+
return new Regexp(Regexp.Op.NO_MATCH);
|
|
3191
4406
|
}
|
|
3192
|
-
|
|
4407
|
+
// Full character classes match everything.
|
|
4408
|
+
if (re.runes.length === 2 && re.runes[0] === 0 && re.runes[1] === Unicode.MAX_RUNE) {
|
|
4409
|
+
return new Regexp(Regexp.Op.ANY_CHAR);
|
|
4410
|
+
}
|
|
4411
|
+
// Standard catch-all except newline
|
|
4412
|
+
if (re.runes.length === 4 && re.runes[0] === 0 && re.runes[1] === Codepoint.CODES.get('\n') - 1 && re.runes[2] === Codepoint.CODES.get('\n') + 1 && re.runes[3] === Unicode.MAX_RUNE) {
|
|
4413
|
+
return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
|
|
4414
|
+
}
|
|
4415
|
+
return re;
|
|
3193
4416
|
}
|
|
3194
4417
|
case Regexp.Op.STAR:
|
|
3195
4418
|
case Regexp.Op.PLUS:
|
|
@@ -3226,7 +4449,9 @@ class Simplify {
|
|
|
3226
4449
|
}
|
|
3227
4450
|
subs.push(Simplify.simplify1(Regexp.Op.PLUS, re.flags, sub, null));
|
|
3228
4451
|
nre.subs = subs.slice(0);
|
|
3229
|
-
|
|
4452
|
+
|
|
4453
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4454
|
+
return Simplify.simplify(nre);
|
|
3230
4455
|
}
|
|
3231
4456
|
// Special case x{0} handled above.
|
|
3232
4457
|
|
|
@@ -3264,7 +4489,8 @@ class Simplify {
|
|
|
3264
4489
|
if (prefixSubs !== null) {
|
|
3265
4490
|
const prefix = new Regexp(Regexp.Op.CONCAT);
|
|
3266
4491
|
prefix.subs = prefixSubs.slice(0);
|
|
3267
|
-
|
|
4492
|
+
// Ensure newly created CONCAT is properly flattened
|
|
4493
|
+
return Simplify.simplify(prefix);
|
|
3268
4494
|
}
|
|
3269
4495
|
|
|
3270
4496
|
// Some degenerate case like min > max or min < max < 0.
|
|
@@ -3297,6 +4523,13 @@ class Simplify {
|
|
|
3297
4523
|
return sub;
|
|
3298
4524
|
}
|
|
3299
4525
|
|
|
4526
|
+
// Handle impossible targets gracefully.
|
|
4527
|
+
// e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
|
|
4528
|
+
if (sub.op === Regexp.Op.NO_MATCH) {
|
|
4529
|
+
if (op === Regexp.Op.PLUS) return sub; // 1+ times is impossible
|
|
4530
|
+
return new Regexp(Regexp.Op.EMPTY_MATCH);
|
|
4531
|
+
}
|
|
4532
|
+
|
|
3300
4533
|
// The operators are idempotent if the flags match.
|
|
3301
4534
|
if (op === sub.op && (flags & RE2Flags.NON_GREEDY) === (sub.flags & RE2Flags.NON_GREEDY)) {
|
|
3302
4535
|
return sub;
|
|
@@ -3304,10 +4537,10 @@ class Simplify {
|
|
|
3304
4537
|
if (re !== null && re.op === op && (re.flags & RE2Flags.NON_GREEDY) === (flags & RE2Flags.NON_GREEDY) && sub === re.subs[0]) {
|
|
3305
4538
|
return re;
|
|
3306
4539
|
}
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
return
|
|
4540
|
+
const nre = new Regexp(op);
|
|
4541
|
+
nre.flags = flags;
|
|
4542
|
+
nre.subs = [sub];
|
|
4543
|
+
return nre;
|
|
3311
4544
|
}
|
|
3312
4545
|
}
|
|
3313
4546
|
|
|
@@ -3653,16 +4886,6 @@ class CharClass {
|
|
|
3653
4886
|
}
|
|
3654
4887
|
}
|
|
3655
4888
|
|
|
3656
|
-
class Pair {
|
|
3657
|
-
static of(first, second) {
|
|
3658
|
-
return new Pair(first, second);
|
|
3659
|
-
}
|
|
3660
|
-
constructor(first, second) {
|
|
3661
|
-
this.first = first;
|
|
3662
|
-
this.second = second;
|
|
3663
|
-
}
|
|
3664
|
-
}
|
|
3665
|
-
|
|
3666
4889
|
// StringIterator: a stream of runes with an opaque cursor, permitting
|
|
3667
4890
|
// rewinding. The units of the cursor are not specified beyond the
|
|
3668
4891
|
// fact that ASCII characters are single width. (Cursor positions
|
|
@@ -3809,18 +5032,59 @@ class Parser {
|
|
|
3809
5032
|
// stride).
|
|
3810
5033
|
static ANY_TABLE = new UnicodeRangeTable(new Uint32Array([0, Unicode.MAX_RUNE, 1]));
|
|
3811
5034
|
|
|
5035
|
+
// Ascii tables
|
|
5036
|
+
static ASCII_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1]));
|
|
5037
|
+
static ASCII_FOLD_TABLE = new UnicodeRangeTable(new Uint32Array([0, 0x7f, 1, 0x017f, 0x017f, 1,
|
|
5038
|
+
// Old English long s (ſ), folds to S/s.
|
|
5039
|
+
0x212a, 0x212a, 1 // Kelvin K, folds to K/k.
|
|
5040
|
+
]));
|
|
5041
|
+
|
|
3812
5042
|
// unicodeTable() returns the Unicode RangeTable identified by name
|
|
3813
5043
|
// and the table of additional fold-equivalent code points.
|
|
3814
5044
|
// Returns null if |name| does not identify a Unicode character range.
|
|
3815
5045
|
static unicodeTable(name) {
|
|
3816
5046
|
if (name === 'Any') {
|
|
3817
|
-
return
|
|
5047
|
+
return {
|
|
5048
|
+
tab: Parser.ANY_TABLE,
|
|
5049
|
+
fold: Parser.ANY_TABLE,
|
|
5050
|
+
sign: 1
|
|
5051
|
+
};
|
|
5052
|
+
}
|
|
5053
|
+
if (name === 'Ascii') {
|
|
5054
|
+
return {
|
|
5055
|
+
tab: Parser.ASCII_TABLE,
|
|
5056
|
+
fold: Parser.ASCII_FOLD_TABLE,
|
|
5057
|
+
sign: 1
|
|
5058
|
+
};
|
|
5059
|
+
}
|
|
5060
|
+
if (name === 'Assigned') {
|
|
5061
|
+
// Assigned is the mathematical inversion of Cn (Unassigned)
|
|
5062
|
+
return {
|
|
5063
|
+
tab: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5064
|
+
fold: UnicodeTables.CATEGORIES.get('Cn'),
|
|
5065
|
+
sign: -1
|
|
5066
|
+
};
|
|
5067
|
+
}
|
|
5068
|
+
if (name === 'Lc') {
|
|
5069
|
+
return {
|
|
5070
|
+
tab: UnicodeTables.CATEGORIES.get('LC'),
|
|
5071
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get('LC'),
|
|
5072
|
+
sign: 1
|
|
5073
|
+
};
|
|
3818
5074
|
}
|
|
3819
5075
|
if (UnicodeTables.CATEGORIES.has(name)) {
|
|
3820
|
-
return
|
|
5076
|
+
return {
|
|
5077
|
+
tab: UnicodeTables.CATEGORIES.get(name),
|
|
5078
|
+
fold: UnicodeTables.FOLD_CATEGORIES.get(name),
|
|
5079
|
+
sign: 1
|
|
5080
|
+
};
|
|
3821
5081
|
}
|
|
3822
5082
|
if (UnicodeTables.SCRIPTS.has(name)) {
|
|
3823
|
-
return
|
|
5083
|
+
return {
|
|
5084
|
+
tab: UnicodeTables.SCRIPTS.get(name),
|
|
5085
|
+
fold: UnicodeTables.FOLD_SCRIPT.get(name),
|
|
5086
|
+
sign: 1
|
|
5087
|
+
};
|
|
3824
5088
|
}
|
|
3825
5089
|
return null;
|
|
3826
5090
|
}
|
|
@@ -4129,7 +5393,7 @@ class Parser {
|
|
|
4129
5393
|
this.flags = flags;
|
|
4130
5394
|
// number of capturing groups seen
|
|
4131
5395
|
this.numCap = 0;
|
|
4132
|
-
this.namedGroups =
|
|
5396
|
+
this.namedGroups = Object.create(null);
|
|
4133
5397
|
// Stack of parsed expressions.
|
|
4134
5398
|
this.stack = [];
|
|
4135
5399
|
this.free = null;
|
|
@@ -4973,9 +6237,11 @@ class Parser {
|
|
|
4973
6237
|
const i = lit.indexOf('\\E');
|
|
4974
6238
|
if (i >= 0) {
|
|
4975
6239
|
lit = lit.substring(0, i);
|
|
6240
|
+
t.skipString(lit);
|
|
6241
|
+
t.skipString('\\E');
|
|
6242
|
+
} else {
|
|
6243
|
+
t.skipString(lit);
|
|
4976
6244
|
}
|
|
4977
|
-
t.skipString(lit);
|
|
4978
|
-
t.skipString('\\E');
|
|
4979
6245
|
let j = 0;
|
|
4980
6246
|
while (j < lit.length) {
|
|
4981
6247
|
const codepoint = lit.codePointAt(j);
|
|
@@ -4991,6 +6257,9 @@ class Parser {
|
|
|
4991
6257
|
t.rewindTo(savedPos);
|
|
4992
6258
|
break;
|
|
4993
6259
|
}
|
|
6260
|
+
} else {
|
|
6261
|
+
// Unconditionally rewind if PERL_X is off, or if string ended abruptly
|
|
6262
|
+
t.rewindTo(savedPos);
|
|
4994
6263
|
}
|
|
4995
6264
|
const re = this.newRegexp(Regexp.Op.CHAR_CLASS);
|
|
4996
6265
|
re.flags = this.flags;
|
|
@@ -5316,8 +6585,11 @@ class Parser {
|
|
|
5316
6585
|
if (pair === null) {
|
|
5317
6586
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_CHAR_RANGE, t.from(startPos));
|
|
5318
6587
|
}
|
|
5319
|
-
|
|
5320
|
-
|
|
6588
|
+
if (pair.sign < 0) {
|
|
6589
|
+
sign = 0 - sign;
|
|
6590
|
+
}
|
|
6591
|
+
const tab = pair.tab;
|
|
6592
|
+
const fold = pair.fold; // fold-equivalent table
|
|
5321
6593
|
// Variation of CharClass.appendGroup() for tables.
|
|
5322
6594
|
if ((this.flags & RE2Flags.FOLD_CASE) === 0 || fold === null) {
|
|
5323
6595
|
cc.appendTableWithSign(tab, sign);
|
|
@@ -5461,6 +6733,7 @@ class RE2 {
|
|
|
5461
6733
|
res.prefixUTF8 = re2.prefixUTF8;
|
|
5462
6734
|
res.prefixComplete = re2.prefixComplete;
|
|
5463
6735
|
res.prefixRune = re2.prefixRune;
|
|
6736
|
+
res.prefilter = re2.prefilter;
|
|
5464
6737
|
return res;
|
|
5465
6738
|
}
|
|
5466
6739
|
|
|
@@ -5503,8 +6776,10 @@ class RE2 {
|
|
|
5503
6776
|
let re = Parser.parse(expr, mode);
|
|
5504
6777
|
const maxCap = re.maxCap();
|
|
5505
6778
|
re = Simplify.simplify(re);
|
|
6779
|
+
const prefilter = PrefilterTree.build(re);
|
|
5506
6780
|
const prog = Compiler.compileRegexp(re);
|
|
5507
6781
|
const re2 = new RE2(expr, prog, maxCap, longest);
|
|
6782
|
+
re2.prefilter = prefilter.type === Prefilter.Type.NONE ? null : prefilter;
|
|
5508
6783
|
const [prefixCompl, prefixStr] = prog.prefix();
|
|
5509
6784
|
re2.prefixComplete = prefixCompl;
|
|
5510
6785
|
re2.prefix = prefixStr;
|
|
@@ -5536,12 +6811,78 @@ class RE2 {
|
|
|
5536
6811
|
this.prefixComplete = false; // true if prefix is the entire regexp
|
|
5537
6812
|
this.prefixRune = 0; // first rune in prefix
|
|
5538
6813
|
this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
|
|
5539
|
-
this.dfa = new DFA(prog); //
|
|
6814
|
+
this.dfa = new DFA(this.prog); // initialize Lazy DFA
|
|
6815
|
+
this.onepass = OnePass.compile(this.prog); // compile OnePass
|
|
6816
|
+
this.prefilter = null;
|
|
6817
|
+
}
|
|
6818
|
+
matchPrefixComplete(input, pos, anchor, ncap) {
|
|
6819
|
+
// If strictly anchored, execution must start at 0
|
|
6820
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
6821
|
+
return null;
|
|
6822
|
+
}
|
|
6823
|
+
let matchStart = -1;
|
|
6824
|
+
let matchEnd = -1;
|
|
6825
|
+
const pLen = input.prefixLength(this);
|
|
6826
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
6827
|
+
const idx = input.index(this, pos);
|
|
6828
|
+
if (idx < 0) return null;
|
|
6829
|
+
matchStart = pos + idx;
|
|
6830
|
+
matchEnd = matchStart + pLen;
|
|
6831
|
+
} else if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
6832
|
+
if (input.endPos() !== pLen) return null;
|
|
6833
|
+
const idx = input.index(this, 0);
|
|
6834
|
+
if (idx !== 0) return null;
|
|
6835
|
+
matchStart = 0;
|
|
6836
|
+
matchEnd = pLen;
|
|
6837
|
+
} else if (anchor === RE2Flags.ANCHOR_START) {
|
|
6838
|
+
const idx = input.index(this, 0);
|
|
6839
|
+
if (idx !== 0) return null;
|
|
6840
|
+
matchStart = 0;
|
|
6841
|
+
matchEnd = pLen;
|
|
6842
|
+
}
|
|
6843
|
+
if (matchStart < 0) return null;
|
|
6844
|
+
|
|
6845
|
+
// If captures are requested (e.g. findSubmatch instead of test), populate bounds
|
|
6846
|
+
if (ncap > 0) {
|
|
6847
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
6848
|
+
matchcap[0] = matchStart;
|
|
6849
|
+
matchcap[1] = matchEnd;
|
|
6850
|
+
return Array.from(matchcap);
|
|
6851
|
+
}
|
|
6852
|
+
return []; // Matched successfully, but no capture data requested
|
|
5540
6853
|
}
|
|
5541
6854
|
executeEngine(input, pos, anchor, ncap) {
|
|
6855
|
+
// LITERAL FAST PATH
|
|
6856
|
+
// If the entire regex is just a literal string (and no nested capture boundaries are requested),
|
|
6857
|
+
// bypass all state machines and execute via V8's blistering fast native indexOf
|
|
6858
|
+
if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
|
|
6859
|
+
return this.matchPrefixComplete(input, pos, anchor, ncap);
|
|
6860
|
+
}
|
|
6861
|
+
|
|
6862
|
+
// PREFILTER FAST PATH
|
|
6863
|
+
// If the unanchored query requires specific literal strings (e.g. "a.*b"),
|
|
6864
|
+
// verify those strings exist using high-speed JS string searches before waking up the state engines.
|
|
6865
|
+
if (this.prefilter !== null && anchor === RE2Flags.UNANCHORED) {
|
|
6866
|
+
if (!this.prefilter.eval(input, pos)) {
|
|
6867
|
+
return null;
|
|
6868
|
+
}
|
|
6869
|
+
}
|
|
6870
|
+
|
|
6871
|
+
// FAST PATH: OnePass DFA engine.
|
|
6872
|
+
// If compiled successfully, it perfectly supports capture groups
|
|
6873
|
+
// and is blisteringly fast since it skips thread queues completely.
|
|
6874
|
+
if (this.onepass !== null) {
|
|
6875
|
+
return OnePass.execute(this, input, pos, anchor, ncap);
|
|
6876
|
+
}
|
|
6877
|
+
|
|
5542
6878
|
// If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
|
|
5543
6879
|
// We must use the NFA.
|
|
5544
6880
|
if (ncap > 0) {
|
|
6881
|
+
// Backtracker bit-state execution bounds check
|
|
6882
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6883
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6884
|
+
}
|
|
6885
|
+
// NFA execution
|
|
5545
6886
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5546
6887
|
}
|
|
5547
6888
|
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
@@ -5550,6 +6891,11 @@ class RE2 {
|
|
|
5550
6891
|
return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
|
|
5551
6892
|
}
|
|
5552
6893
|
|
|
6894
|
+
// Backtracker bit-state execution bounds check
|
|
6895
|
+
if (input.endPos() <= Backtracker.maxBitStateLen(this.prog)) {
|
|
6896
|
+
return Backtracker.execute(this, input, pos, anchor, ncap);
|
|
6897
|
+
}
|
|
6898
|
+
|
|
5553
6899
|
// Fallback to NFA
|
|
5554
6900
|
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5555
6901
|
}
|
|
@@ -6130,6 +7476,50 @@ class RE2 {
|
|
|
6130
7476
|
}
|
|
6131
7477
|
}
|
|
6132
7478
|
|
|
7479
|
+
class RE2Set {
|
|
7480
|
+
constructor(anchor = RE2Flags.UNANCHORED, flags = RE2Flags.PERL) {
|
|
7481
|
+
this.anchor = anchor;
|
|
7482
|
+
this.flags = flags;
|
|
7483
|
+
this.regexps = [];
|
|
7484
|
+
this.prog = null;
|
|
7485
|
+
this.dfa = null;
|
|
7486
|
+
this.dummyRe2 = null;
|
|
7487
|
+
}
|
|
7488
|
+
add(pattern) {
|
|
7489
|
+
if (this.prog) {
|
|
7490
|
+
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
7491
|
+
}
|
|
7492
|
+
const re = Parser.parse(pattern, this.flags);
|
|
7493
|
+
this.regexps.push(Simplify.simplify(re));
|
|
7494
|
+
return this.regexps.length - 1;
|
|
7495
|
+
}
|
|
7496
|
+
compile() {
|
|
7497
|
+
if (this.prog) return;
|
|
7498
|
+
this.prog = Compiler.compileSet(this.regexps);
|
|
7499
|
+
this.dfa = new DFA(this.prog);
|
|
7500
|
+
this.dummyRe2 = {
|
|
7501
|
+
prog: this.prog,
|
|
7502
|
+
cond: this.prog.startCond(),
|
|
7503
|
+
prefix: '',
|
|
7504
|
+
prefixRune: 0,
|
|
7505
|
+
longest: false
|
|
7506
|
+
};
|
|
7507
|
+
}
|
|
7508
|
+
match(input) {
|
|
7509
|
+
if (!this.prog) this.compile();
|
|
7510
|
+
const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7511
|
+
|
|
7512
|
+
// Fast path: Try the blistering fast DFA
|
|
7513
|
+
const dfaResult = this.dfa.matchSet(machineInput, 0, this.anchor);
|
|
7514
|
+
if (dfaResult !== null) return dfaResult;
|
|
7515
|
+
|
|
7516
|
+
// Safe Fallback: Handle boundaries (\b) or massive state explosions via NFA
|
|
7517
|
+
const machine = Machine.fromRE2(this.dummyRe2);
|
|
7518
|
+
machine.init(0);
|
|
7519
|
+
return machine.matchSet(machineInput, 0, this.anchor);
|
|
7520
|
+
}
|
|
7521
|
+
}
|
|
7522
|
+
|
|
6133
7523
|
/**
|
|
6134
7524
|
* Transform JS regex string to RE2 regex string
|
|
6135
7525
|
*/
|
|
@@ -6212,7 +7602,8 @@ class TranslateRegExpString {
|
|
|
6212
7602
|
default:
|
|
6213
7603
|
{
|
|
6214
7604
|
result += '\\';
|
|
6215
|
-
let
|
|
7605
|
+
let cp = data.codePointAt(i + 1);
|
|
7606
|
+
let symSize = Utils.charCount(cp);
|
|
6216
7607
|
result += data.substring(i + 1, i + 1 + symSize);
|
|
6217
7608
|
i += symSize + 1;
|
|
6218
7609
|
continue;
|
|
@@ -6232,7 +7623,8 @@ class TranslateRegExpString {
|
|
|
6232
7623
|
continue;
|
|
6233
7624
|
}
|
|
6234
7625
|
}
|
|
6235
|
-
let
|
|
7626
|
+
let cp = data.codePointAt(i);
|
|
7627
|
+
let symSize = Utils.charCount(cp);
|
|
6236
7628
|
result += data.substring(i, i + symSize);
|
|
6237
7629
|
i += symSize;
|
|
6238
7630
|
}
|
|
@@ -6595,5 +7987,7 @@ exports.RE2JSCompileException = RE2JSCompileException;
|
|
|
6595
7987
|
exports.RE2JSException = RE2JSException;
|
|
6596
7988
|
exports.RE2JSFlagsException = RE2JSFlagsException;
|
|
6597
7989
|
exports.RE2JSGroupException = RE2JSGroupException;
|
|
7990
|
+
exports.RE2JSInternalException = RE2JSInternalException;
|
|
6598
7991
|
exports.RE2JSSyntaxException = RE2JSSyntaxException;
|
|
7992
|
+
exports.RE2Set = RE2Set;
|
|
6599
7993
|
//# sourceMappingURL=index.cjs.cjs.map
|