re2js 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.cjs.cjs +206 -16
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +5 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +206 -17
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +206 -16
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v1.
|
|
5
|
+
* @version v1.3.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -923,6 +923,17 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
923
923
|
}
|
|
924
924
|
}
|
|
925
925
|
|
|
926
|
+
/**
|
|
927
|
+
* An exception thrown by DFA
|
|
928
|
+
*/
|
|
929
|
+
class RE2JSDfaMemoryException extends RE2JSException {
|
|
930
|
+
/** @param {string} message */
|
|
931
|
+
constructor(message) {
|
|
932
|
+
super(message);
|
|
933
|
+
this.name = 'RE2JSDfaMemoryException';
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
|
|
926
937
|
/**
|
|
927
938
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
928
939
|
*
|
|
@@ -2112,6 +2123,160 @@ class Machine {
|
|
|
2112
2123
|
}
|
|
2113
2124
|
}
|
|
2114
2125
|
|
|
2126
|
+
class DFAState {
|
|
2127
|
+
constructor(id, nfaStates, isMatch) {
|
|
2128
|
+
this.id = id; // Stringified NFA state list (e.g., "1,4,7")
|
|
2129
|
+
this.nfaStates = nfaStates; // Array of Instruction PCs
|
|
2130
|
+
this.isMatch = isMatch; // Boolean
|
|
2131
|
+
this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups (unanchored)
|
|
2132
|
+
this.nextMap = new Map(); // Cache of Char -> DFAState
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
class DFA {
|
|
2136
|
+
constructor(prog) {
|
|
2137
|
+
this.prog = prog;
|
|
2138
|
+
this.stateCache = new Map(); // id -> DFAState
|
|
2139
|
+
this.startState = null;
|
|
2140
|
+
this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection), like RE2 max_mem
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
// Follows epsilon (empty) transitions to find all reachable states without consuming a char
|
|
2144
|
+
computeClosure(pcs) {
|
|
2145
|
+
const closure = new Set();
|
|
2146
|
+
const stack = [...pcs];
|
|
2147
|
+
let isMatch = false;
|
|
2148
|
+
while (stack.length > 0) {
|
|
2149
|
+
const pc = stack.pop();
|
|
2150
|
+
if (closure.has(pc)) continue;
|
|
2151
|
+
closure.add(pc);
|
|
2152
|
+
const inst = this.prog.getInst(pc);
|
|
2153
|
+
switch (inst.op) {
|
|
2154
|
+
case Inst.MATCH:
|
|
2155
|
+
isMatch = true;
|
|
2156
|
+
break;
|
|
2157
|
+
case Inst.ALT:
|
|
2158
|
+
case Inst.ALT_MATCH:
|
|
2159
|
+
stack.push(inst.out);
|
|
2160
|
+
stack.push(inst.arg);
|
|
2161
|
+
break;
|
|
2162
|
+
case Inst.NOP:
|
|
2163
|
+
case Inst.CAPTURE:
|
|
2164
|
+
stack.push(inst.out);
|
|
2165
|
+
break;
|
|
2166
|
+
// Bailing out on complex empty-width assertions to keep DFA fast.
|
|
2167
|
+
// Engine will seamlessly fall back to the NFA.
|
|
2168
|
+
case Inst.EMPTY_WIDTH:
|
|
2169
|
+
return null;
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
const sortedPCs = Int32Array.from(closure).sort();
|
|
2173
|
+
return {
|
|
2174
|
+
pcs: sortedPCs,
|
|
2175
|
+
isMatch
|
|
2176
|
+
};
|
|
2177
|
+
}
|
|
2178
|
+
|
|
2179
|
+
// Get or create a DFA state from a list of NFA PCs
|
|
2180
|
+
getState(pcs) {
|
|
2181
|
+
const closureResult = this.computeClosure(pcs);
|
|
2182
|
+
if (!closureResult) return null; // Bailout to NFA required
|
|
2183
|
+
|
|
2184
|
+
const id = closureResult.pcs.join(',');
|
|
2185
|
+
if (this.stateCache.has(id)) {
|
|
2186
|
+
return this.stateCache.get(id);
|
|
2187
|
+
}
|
|
2188
|
+
|
|
2189
|
+
// Safety: prevent memory exhaustion from state explosion
|
|
2190
|
+
if (this.stateCache.size > this.stateLimit) {
|
|
2191
|
+
throw new RE2JSDfaMemoryException('dfa error: Out of memory exception');
|
|
2192
|
+
}
|
|
2193
|
+
const state = new DFAState(id, closureResult.pcs, closureResult.isMatch);
|
|
2194
|
+
this.stateCache.set(id, state);
|
|
2195
|
+
return state;
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
// Compute the next DFA state given a current state and a character
|
|
2199
|
+
step(state, charCode, anchor) {
|
|
2200
|
+
// OPTIMIZATION: ASCII Fast-Path
|
|
2201
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2202
|
+
const next = state.nextAscii[charCode];
|
|
2203
|
+
if (next !== null) {
|
|
2204
|
+
return next;
|
|
2205
|
+
}
|
|
2206
|
+
} else {
|
|
2207
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2208
|
+
if (state.nextMap.has(key)) {
|
|
2209
|
+
return state.nextMap.get(key);
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
const nextPCs = [];
|
|
2213
|
+
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2214
|
+
const pc = state.nfaStates[i];
|
|
2215
|
+
const inst = this.prog.getInst(pc);
|
|
2216
|
+
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2217
|
+
nextPCs.push(inst.out);
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
2221
|
+
nextPCs.push(this.prog.start);
|
|
2222
|
+
}
|
|
2223
|
+
const nextState = this.getState(nextPCs);
|
|
2224
|
+
|
|
2225
|
+
// Cache the result
|
|
2226
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2227
|
+
state.nextAscii[charCode] = nextState;
|
|
2228
|
+
} else {
|
|
2229
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2230
|
+
state.nextMap.set(key, nextState);
|
|
2231
|
+
}
|
|
2232
|
+
return nextState;
|
|
2233
|
+
}
|
|
2234
|
+
|
|
2235
|
+
// The hot loop: Execute the Lazy DFA
|
|
2236
|
+
match(input, pos, anchor) {
|
|
2237
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2238
|
+
return false;
|
|
2239
|
+
}
|
|
2240
|
+
if (!this.startState) {
|
|
2241
|
+
this.startState = this.getState([this.prog.start]);
|
|
2242
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2243
|
+
}
|
|
2244
|
+
let endPos = input.endPos();
|
|
2245
|
+
let currentState = this.startState;
|
|
2246
|
+
if (currentState.isMatch) {
|
|
2247
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2248
|
+
if (pos === endPos) return true;
|
|
2249
|
+
} else {
|
|
2250
|
+
return true;
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
let i = pos;
|
|
2254
|
+
while (i < endPos) {
|
|
2255
|
+
const r = input.step(i);
|
|
2256
|
+
const rune = r >> 3;
|
|
2257
|
+
const width = r & 7;
|
|
2258
|
+
currentState = this.step(currentState, rune, anchor);
|
|
2259
|
+
|
|
2260
|
+
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2261
|
+
if (currentState === null) return null;
|
|
2262
|
+
if (currentState.isMatch) {
|
|
2263
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2264
|
+
if (i + width === endPos) return true;
|
|
2265
|
+
} else {
|
|
2266
|
+
return true;
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
// If we hit a dead end, and anchored, fail early
|
|
2271
|
+
if (currentState.nfaStates.length === 0) {
|
|
2272
|
+
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2273
|
+
}
|
|
2274
|
+
i += width;
|
|
2275
|
+
}
|
|
2276
|
+
return false;
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2115
2280
|
/**
|
|
2116
2281
|
* Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this
|
|
2117
2282
|
* corresponds to {@code syntax.regexp} in the Go implementation; Go's {@code regexp} is called
|
|
@@ -5244,6 +5409,30 @@ class RE2 {
|
|
|
5244
5409
|
this.prefixComplete = false; // true if prefix is the entire regexp
|
|
5245
5410
|
this.prefixRune = 0; // first rune in prefix
|
|
5246
5411
|
this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
|
|
5412
|
+
this.dfa = new DFA(prog); // Initialize the Lazy DFA
|
|
5413
|
+
}
|
|
5414
|
+
executeEngine(input, pos, anchor, ncap) {
|
|
5415
|
+
// If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
|
|
5416
|
+
// We must use the NFA.
|
|
5417
|
+
if (ncap > 0) {
|
|
5418
|
+
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5419
|
+
}
|
|
5420
|
+
try {
|
|
5421
|
+
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
5422
|
+
if (dfaResult !== null) {
|
|
5423
|
+
// DFA succeeded (returned true or false)
|
|
5424
|
+
return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
|
|
5425
|
+
}
|
|
5426
|
+
} catch (e) {
|
|
5427
|
+
if (e instanceof RE2JSDfaMemoryException) {
|
|
5428
|
+
this.dfa = new DFA(this.prog); // flush cache
|
|
5429
|
+
} else {
|
|
5430
|
+
throw e;
|
|
5431
|
+
}
|
|
5432
|
+
}
|
|
5433
|
+
|
|
5434
|
+
// Fallback to NFA
|
|
5435
|
+
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5247
5436
|
}
|
|
5248
5437
|
|
|
5249
5438
|
/**
|
|
@@ -5328,10 +5517,10 @@ class RE2 {
|
|
|
5328
5517
|
return this.expr;
|
|
5329
5518
|
}
|
|
5330
5519
|
|
|
5331
|
-
//
|
|
5520
|
+
// doExecuteNFA() finds the leftmost match in the input and returns
|
|
5332
5521
|
// the position of its subexpressions.
|
|
5333
5522
|
// Derived from exec.go.
|
|
5334
|
-
|
|
5523
|
+
doExecuteNFA(input, pos, anchor, ncap) {
|
|
5335
5524
|
let m = this.get();
|
|
5336
5525
|
// The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
|
|
5337
5526
|
// the bottom of the stack (i.e., next == null).
|
|
@@ -5349,7 +5538,7 @@ class RE2 {
|
|
|
5349
5538
|
return cap;
|
|
5350
5539
|
}
|
|
5351
5540
|
match(s) {
|
|
5352
|
-
return this.
|
|
5541
|
+
return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
|
|
5353
5542
|
}
|
|
5354
5543
|
|
|
5355
5544
|
/**
|
|
@@ -5377,7 +5566,7 @@ class RE2 {
|
|
|
5377
5566
|
return [false, null];
|
|
5378
5567
|
}
|
|
5379
5568
|
const machineInput = input.isUTF16Encoding() ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end);
|
|
5380
|
-
const groupMatch = this.
|
|
5569
|
+
const groupMatch = this.executeEngine(machineInput, start, anchor, 2 * ngroup);
|
|
5381
5570
|
if (groupMatch === null) {
|
|
5382
5571
|
return [false, null];
|
|
5383
5572
|
}
|
|
@@ -5389,7 +5578,7 @@ class RE2 {
|
|
|
5389
5578
|
*/
|
|
5390
5579
|
// This is visible for testing.
|
|
5391
5580
|
matchUTF8(b) {
|
|
5392
|
-
return this.
|
|
5581
|
+
return this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
|
|
5393
5582
|
}
|
|
5394
5583
|
|
|
5395
5584
|
/**
|
|
@@ -5426,7 +5615,7 @@ class RE2 {
|
|
|
5426
5615
|
const input = MachineInput.fromUTF16(src);
|
|
5427
5616
|
let numReplaces = 0;
|
|
5428
5617
|
while (searchPos <= src.length) {
|
|
5429
|
-
const a = this.
|
|
5618
|
+
const a = this.executeEngine(input, searchPos, RE2Flags.UNANCHORED, 2);
|
|
5430
5619
|
if (a === null || a.length === 0) {
|
|
5431
5620
|
break;
|
|
5432
5621
|
}
|
|
@@ -5484,7 +5673,7 @@ class RE2 {
|
|
|
5484
5673
|
let i = 0;
|
|
5485
5674
|
let prevMatchEnd = -1;
|
|
5486
5675
|
while (i < n && pos <= end) {
|
|
5487
|
-
const matches = this.
|
|
5676
|
+
const matches = this.executeEngine(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5488
5677
|
if (matches === null || matches.length === 0) {
|
|
5489
5678
|
break;
|
|
5490
5679
|
}
|
|
@@ -5555,7 +5744,7 @@ class RE2 {
|
|
|
5555
5744
|
*/
|
|
5556
5745
|
// This is visible for testing.
|
|
5557
5746
|
findUTF8(b) {
|
|
5558
|
-
const a = this.
|
|
5747
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
|
|
5559
5748
|
if (a === null) {
|
|
5560
5749
|
return null;
|
|
5561
5750
|
}
|
|
@@ -5570,7 +5759,7 @@ class RE2 {
|
|
|
5570
5759
|
*/
|
|
5571
5760
|
// This is visible for testing.
|
|
5572
5761
|
findUTF8Index(b) {
|
|
5573
|
-
const a = this.
|
|
5762
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
|
|
5574
5763
|
if (a === null) {
|
|
5575
5764
|
return null;
|
|
5576
5765
|
}
|
|
@@ -5587,7 +5776,7 @@ class RE2 {
|
|
|
5587
5776
|
*/
|
|
5588
5777
|
// This is visible for testing.
|
|
5589
5778
|
find(s) {
|
|
5590
|
-
const a = this.
|
|
5779
|
+
const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
|
|
5591
5780
|
if (a === null) {
|
|
5592
5781
|
return '';
|
|
5593
5782
|
}
|
|
@@ -5603,7 +5792,7 @@ class RE2 {
|
|
|
5603
5792
|
*/
|
|
5604
5793
|
// This is visible for testing.
|
|
5605
5794
|
findIndex(s) {
|
|
5606
|
-
return this.
|
|
5795
|
+
return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
|
|
5607
5796
|
}
|
|
5608
5797
|
|
|
5609
5798
|
/**
|
|
@@ -5615,7 +5804,7 @@ class RE2 {
|
|
|
5615
5804
|
*/
|
|
5616
5805
|
// This is visible for testing.
|
|
5617
5806
|
findUTF8Submatch(b) {
|
|
5618
|
-
const a = this.
|
|
5807
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5619
5808
|
if (a === null) {
|
|
5620
5809
|
return null;
|
|
5621
5810
|
}
|
|
@@ -5637,7 +5826,7 @@ class RE2 {
|
|
|
5637
5826
|
*/
|
|
5638
5827
|
// This is visible for testing.
|
|
5639
5828
|
findUTF8SubmatchIndex(b) {
|
|
5640
|
-
return this.pad(this.
|
|
5829
|
+
return this.pad(this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
|
|
5641
5830
|
}
|
|
5642
5831
|
|
|
5643
5832
|
/**
|
|
@@ -5649,7 +5838,7 @@ class RE2 {
|
|
|
5649
5838
|
*/
|
|
5650
5839
|
// This is visible for testing.
|
|
5651
5840
|
findSubmatch(s) {
|
|
5652
|
-
const a = this.
|
|
5841
|
+
const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5653
5842
|
if (a === null) {
|
|
5654
5843
|
return null;
|
|
5655
5844
|
}
|
|
@@ -5671,7 +5860,7 @@ class RE2 {
|
|
|
5671
5860
|
*/
|
|
5672
5861
|
// This is visible for testing.
|
|
5673
5862
|
findSubmatchIndex(s) {
|
|
5674
|
-
return this.pad(this.
|
|
5863
|
+
return this.pad(this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
|
|
5675
5864
|
}
|
|
5676
5865
|
|
|
5677
5866
|
/**
|
|
@@ -6244,6 +6433,7 @@ class RE2JS {
|
|
|
6244
6433
|
exports.Matcher = Matcher;
|
|
6245
6434
|
exports.RE2JS = RE2JS;
|
|
6246
6435
|
exports.RE2JSCompileException = RE2JSCompileException;
|
|
6436
|
+
exports.RE2JSDfaMemoryException = RE2JSDfaMemoryException;
|
|
6247
6437
|
exports.RE2JSException = RE2JSException;
|
|
6248
6438
|
exports.RE2JSFlagsException = RE2JSFlagsException;
|
|
6249
6439
|
exports.RE2JSGroupException = RE2JSGroupException;
|