re2js 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.cjs.cjs +206 -16
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +5 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +206 -17
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +206 -16
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v1.
|
|
5
|
+
* @version v1.3.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -927,6 +927,17 @@
|
|
|
927
927
|
}
|
|
928
928
|
}
|
|
929
929
|
|
|
930
|
+
/**
|
|
931
|
+
* An exception thrown by DFA
|
|
932
|
+
*/
|
|
933
|
+
class RE2JSDfaMemoryException extends RE2JSException {
|
|
934
|
+
/** @param {string} message */
|
|
935
|
+
constructor(message) {
|
|
936
|
+
super(message);
|
|
937
|
+
this.name = 'RE2JSDfaMemoryException';
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
930
941
|
/**
|
|
931
942
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
932
943
|
*
|
|
@@ -2116,6 +2127,160 @@
|
|
|
2116
2127
|
}
|
|
2117
2128
|
}
|
|
2118
2129
|
|
|
2130
|
+
class DFAState {
|
|
2131
|
+
constructor(id, nfaStates, isMatch) {
|
|
2132
|
+
this.id = id; // Stringified NFA state list (e.g., "1,4,7")
|
|
2133
|
+
this.nfaStates = nfaStates; // Array of Instruction PCs
|
|
2134
|
+
this.isMatch = isMatch; // Boolean
|
|
2135
|
+
this.nextAscii = new Array(Unicode.MAX_ASCII + 1).fill(null); // Flat array for blisteringly fast ASCII lookups (unanchored)
|
|
2136
|
+
this.nextMap = new Map(); // Cache of Char -> DFAState
|
|
2137
|
+
}
|
|
2138
|
+
}
|
|
2139
|
+
class DFA {
|
|
2140
|
+
constructor(prog) {
|
|
2141
|
+
this.prog = prog;
|
|
2142
|
+
this.stateCache = new Map(); // id -> DFAState
|
|
2143
|
+
this.startState = null;
|
|
2144
|
+
this.stateLimit = 10000; // Prevent memory explosion (ReDoS protection), like RE2 max_mem
|
|
2145
|
+
}
|
|
2146
|
+
|
|
2147
|
+
// Follows epsilon (empty) transitions to find all reachable states without consuming a char
|
|
2148
|
+
computeClosure(pcs) {
|
|
2149
|
+
const closure = new Set();
|
|
2150
|
+
const stack = [...pcs];
|
|
2151
|
+
let isMatch = false;
|
|
2152
|
+
while (stack.length > 0) {
|
|
2153
|
+
const pc = stack.pop();
|
|
2154
|
+
if (closure.has(pc)) continue;
|
|
2155
|
+
closure.add(pc);
|
|
2156
|
+
const inst = this.prog.getInst(pc);
|
|
2157
|
+
switch (inst.op) {
|
|
2158
|
+
case Inst.MATCH:
|
|
2159
|
+
isMatch = true;
|
|
2160
|
+
break;
|
|
2161
|
+
case Inst.ALT:
|
|
2162
|
+
case Inst.ALT_MATCH:
|
|
2163
|
+
stack.push(inst.out);
|
|
2164
|
+
stack.push(inst.arg);
|
|
2165
|
+
break;
|
|
2166
|
+
case Inst.NOP:
|
|
2167
|
+
case Inst.CAPTURE:
|
|
2168
|
+
stack.push(inst.out);
|
|
2169
|
+
break;
|
|
2170
|
+
// Bailing out on complex empty-width assertions to keep DFA fast.
|
|
2171
|
+
// Engine will seamlessly fall back to the NFA.
|
|
2172
|
+
case Inst.EMPTY_WIDTH:
|
|
2173
|
+
return null;
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2176
|
+
const sortedPCs = Int32Array.from(closure).sort();
|
|
2177
|
+
return {
|
|
2178
|
+
pcs: sortedPCs,
|
|
2179
|
+
isMatch
|
|
2180
|
+
};
|
|
2181
|
+
}
|
|
2182
|
+
|
|
2183
|
+
// Get or create a DFA state from a list of NFA PCs
|
|
2184
|
+
getState(pcs) {
|
|
2185
|
+
const closureResult = this.computeClosure(pcs);
|
|
2186
|
+
if (!closureResult) return null; // Bailout to NFA required
|
|
2187
|
+
|
|
2188
|
+
const id = closureResult.pcs.join(',');
|
|
2189
|
+
if (this.stateCache.has(id)) {
|
|
2190
|
+
return this.stateCache.get(id);
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
// Safety: prevent memory exhaustion from state explosion
|
|
2194
|
+
if (this.stateCache.size > this.stateLimit) {
|
|
2195
|
+
throw new RE2JSDfaMemoryException('dfa error: Out of memory exception');
|
|
2196
|
+
}
|
|
2197
|
+
const state = new DFAState(id, closureResult.pcs, closureResult.isMatch);
|
|
2198
|
+
this.stateCache.set(id, state);
|
|
2199
|
+
return state;
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
// Compute the next DFA state given a current state and a character
|
|
2203
|
+
step(state, charCode, anchor) {
|
|
2204
|
+
// OPTIMIZATION: ASCII Fast-Path
|
|
2205
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2206
|
+
const next = state.nextAscii[charCode];
|
|
2207
|
+
if (next !== null) {
|
|
2208
|
+
return next;
|
|
2209
|
+
}
|
|
2210
|
+
} else {
|
|
2211
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2212
|
+
if (state.nextMap.has(key)) {
|
|
2213
|
+
return state.nextMap.get(key);
|
|
2214
|
+
}
|
|
2215
|
+
}
|
|
2216
|
+
const nextPCs = [];
|
|
2217
|
+
for (let i = 0; i < state.nfaStates.length; i++) {
|
|
2218
|
+
const pc = state.nfaStates[i];
|
|
2219
|
+
const inst = this.prog.getInst(pc);
|
|
2220
|
+
if (Inst.isRuneOp(inst.op) && inst.matchRune(charCode)) {
|
|
2221
|
+
nextPCs.push(inst.out);
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
if (anchor === RE2Flags.UNANCHORED) {
|
|
2225
|
+
nextPCs.push(this.prog.start);
|
|
2226
|
+
}
|
|
2227
|
+
const nextState = this.getState(nextPCs);
|
|
2228
|
+
|
|
2229
|
+
// Cache the result
|
|
2230
|
+
if (anchor === RE2Flags.UNANCHORED && charCode <= Unicode.MAX_ASCII) {
|
|
2231
|
+
state.nextAscii[charCode] = nextState;
|
|
2232
|
+
} else {
|
|
2233
|
+
const key = charCode + (anchor === RE2Flags.UNANCHORED ? 0 : Unicode.MAX_RUNE + 1);
|
|
2234
|
+
state.nextMap.set(key, nextState);
|
|
2235
|
+
}
|
|
2236
|
+
return nextState;
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
// The hot loop: Execute the Lazy DFA
|
|
2240
|
+
match(input, pos, anchor) {
|
|
2241
|
+
if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
|
|
2242
|
+
return false;
|
|
2243
|
+
}
|
|
2244
|
+
if (!this.startState) {
|
|
2245
|
+
this.startState = this.getState([this.prog.start]);
|
|
2246
|
+
if (!this.startState) return null; // Fallback to NFA
|
|
2247
|
+
}
|
|
2248
|
+
let endPos = input.endPos();
|
|
2249
|
+
let currentState = this.startState;
|
|
2250
|
+
if (currentState.isMatch) {
|
|
2251
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2252
|
+
if (pos === endPos) return true;
|
|
2253
|
+
} else {
|
|
2254
|
+
return true;
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
let i = pos;
|
|
2258
|
+
while (i < endPos) {
|
|
2259
|
+
const r = input.step(i);
|
|
2260
|
+
const rune = r >> 3;
|
|
2261
|
+
const width = r & 7;
|
|
2262
|
+
currentState = this.step(currentState, rune, anchor);
|
|
2263
|
+
|
|
2264
|
+
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2265
|
+
if (currentState === null) return null;
|
|
2266
|
+
if (currentState.isMatch) {
|
|
2267
|
+
if (anchor === RE2Flags.ANCHOR_BOTH) {
|
|
2268
|
+
if (i + width === endPos) return true;
|
|
2269
|
+
} else {
|
|
2270
|
+
return true;
|
|
2271
|
+
}
|
|
2272
|
+
}
|
|
2273
|
+
|
|
2274
|
+
// If we hit a dead end, and anchored, fail early
|
|
2275
|
+
if (currentState.nfaStates.length === 0) {
|
|
2276
|
+
if (anchor !== RE2Flags.UNANCHORED) return false;
|
|
2277
|
+
}
|
|
2278
|
+
i += width;
|
|
2279
|
+
}
|
|
2280
|
+
return false;
|
|
2281
|
+
}
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2119
2284
|
/**
|
|
2120
2285
|
* Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this
|
|
2121
2286
|
* corresponds to {@code syntax.regexp} in the Go implementation; Go's {@code regexp} is called
|
|
@@ -5248,6 +5413,30 @@
|
|
|
5248
5413
|
this.prefixComplete = false; // true if prefix is the entire regexp
|
|
5249
5414
|
this.prefixRune = 0; // first rune in prefix
|
|
5250
5415
|
this.pooled = new AtomicReference(); // Cache of machines for running regexp. Forms a Treiber stack.
|
|
5416
|
+
this.dfa = new DFA(prog); // Initialize the Lazy DFA
|
|
5417
|
+
}
|
|
5418
|
+
executeEngine(input, pos, anchor, ncap) {
|
|
5419
|
+
// If the user wants capturing groups (ncap > 0), the DFA mathematically CANNOT do it.
|
|
5420
|
+
// We must use the NFA.
|
|
5421
|
+
if (ncap > 0) {
|
|
5422
|
+
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5423
|
+
}
|
|
5424
|
+
try {
|
|
5425
|
+
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
5426
|
+
if (dfaResult !== null) {
|
|
5427
|
+
// DFA succeeded (returned true or false)
|
|
5428
|
+
return dfaResult ? [] : null; // Return empty array to signify "matched but no captures"
|
|
5429
|
+
}
|
|
5430
|
+
} catch (e) {
|
|
5431
|
+
if (e instanceof RE2JSDfaMemoryException) {
|
|
5432
|
+
this.dfa = new DFA(this.prog); // flush cache
|
|
5433
|
+
} else {
|
|
5434
|
+
throw e;
|
|
5435
|
+
}
|
|
5436
|
+
}
|
|
5437
|
+
|
|
5438
|
+
// Fallback to NFA
|
|
5439
|
+
return this.doExecuteNFA(input, pos, anchor, ncap);
|
|
5251
5440
|
}
|
|
5252
5441
|
|
|
5253
5442
|
/**
|
|
@@ -5332,10 +5521,10 @@
|
|
|
5332
5521
|
return this.expr;
|
|
5333
5522
|
}
|
|
5334
5523
|
|
|
5335
|
-
//
|
|
5524
|
+
// doExecuteNFA() finds the leftmost match in the input and returns
|
|
5336
5525
|
// the position of its subexpressions.
|
|
5337
5526
|
// Derived from exec.go.
|
|
5338
|
-
|
|
5527
|
+
doExecuteNFA(input, pos, anchor, ncap) {
|
|
5339
5528
|
let m = this.get();
|
|
5340
5529
|
// The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
|
|
5341
5530
|
// the bottom of the stack (i.e., next == null).
|
|
@@ -5353,7 +5542,7 @@
|
|
|
5353
5542
|
return cap;
|
|
5354
5543
|
}
|
|
5355
5544
|
match(s) {
|
|
5356
|
-
return this.
|
|
5545
|
+
return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 0) !== null;
|
|
5357
5546
|
}
|
|
5358
5547
|
|
|
5359
5548
|
/**
|
|
@@ -5381,7 +5570,7 @@
|
|
|
5381
5570
|
return [false, null];
|
|
5382
5571
|
}
|
|
5383
5572
|
const machineInput = input.isUTF16Encoding() ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end);
|
|
5384
|
-
const groupMatch = this.
|
|
5573
|
+
const groupMatch = this.executeEngine(machineInput, start, anchor, 2 * ngroup);
|
|
5385
5574
|
if (groupMatch === null) {
|
|
5386
5575
|
return [false, null];
|
|
5387
5576
|
}
|
|
@@ -5393,7 +5582,7 @@
|
|
|
5393
5582
|
*/
|
|
5394
5583
|
// This is visible for testing.
|
|
5395
5584
|
matchUTF8(b) {
|
|
5396
|
-
return this.
|
|
5585
|
+
return this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 0) !== null;
|
|
5397
5586
|
}
|
|
5398
5587
|
|
|
5399
5588
|
/**
|
|
@@ -5430,7 +5619,7 @@
|
|
|
5430
5619
|
const input = MachineInput.fromUTF16(src);
|
|
5431
5620
|
let numReplaces = 0;
|
|
5432
5621
|
while (searchPos <= src.length) {
|
|
5433
|
-
const a = this.
|
|
5622
|
+
const a = this.executeEngine(input, searchPos, RE2Flags.UNANCHORED, 2);
|
|
5434
5623
|
if (a === null || a.length === 0) {
|
|
5435
5624
|
break;
|
|
5436
5625
|
}
|
|
@@ -5488,7 +5677,7 @@
|
|
|
5488
5677
|
let i = 0;
|
|
5489
5678
|
let prevMatchEnd = -1;
|
|
5490
5679
|
while (i < n && pos <= end) {
|
|
5491
|
-
const matches = this.
|
|
5680
|
+
const matches = this.executeEngine(input, pos, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5492
5681
|
if (matches === null || matches.length === 0) {
|
|
5493
5682
|
break;
|
|
5494
5683
|
}
|
|
@@ -5559,7 +5748,7 @@
|
|
|
5559
5748
|
*/
|
|
5560
5749
|
// This is visible for testing.
|
|
5561
5750
|
findUTF8(b) {
|
|
5562
|
-
const a = this.
|
|
5751
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
|
|
5563
5752
|
if (a === null) {
|
|
5564
5753
|
return null;
|
|
5565
5754
|
}
|
|
@@ -5574,7 +5763,7 @@
|
|
|
5574
5763
|
*/
|
|
5575
5764
|
// This is visible for testing.
|
|
5576
5765
|
findUTF8Index(b) {
|
|
5577
|
-
const a = this.
|
|
5766
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, 2);
|
|
5578
5767
|
if (a === null) {
|
|
5579
5768
|
return null;
|
|
5580
5769
|
}
|
|
@@ -5591,7 +5780,7 @@
|
|
|
5591
5780
|
*/
|
|
5592
5781
|
// This is visible for testing.
|
|
5593
5782
|
find(s) {
|
|
5594
|
-
const a = this.
|
|
5783
|
+
const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
|
|
5595
5784
|
if (a === null) {
|
|
5596
5785
|
return '';
|
|
5597
5786
|
}
|
|
@@ -5607,7 +5796,7 @@
|
|
|
5607
5796
|
*/
|
|
5608
5797
|
// This is visible for testing.
|
|
5609
5798
|
findIndex(s) {
|
|
5610
|
-
return this.
|
|
5799
|
+
return this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, 2);
|
|
5611
5800
|
}
|
|
5612
5801
|
|
|
5613
5802
|
/**
|
|
@@ -5619,7 +5808,7 @@
|
|
|
5619
5808
|
*/
|
|
5620
5809
|
// This is visible for testing.
|
|
5621
5810
|
findUTF8Submatch(b) {
|
|
5622
|
-
const a = this.
|
|
5811
|
+
const a = this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5623
5812
|
if (a === null) {
|
|
5624
5813
|
return null;
|
|
5625
5814
|
}
|
|
@@ -5641,7 +5830,7 @@
|
|
|
5641
5830
|
*/
|
|
5642
5831
|
// This is visible for testing.
|
|
5643
5832
|
findUTF8SubmatchIndex(b) {
|
|
5644
|
-
return this.pad(this.
|
|
5833
|
+
return this.pad(this.executeEngine(MachineInput.fromUTF8(b), 0, RE2Flags.UNANCHORED, this.prog.numCap));
|
|
5645
5834
|
}
|
|
5646
5835
|
|
|
5647
5836
|
/**
|
|
@@ -5653,7 +5842,7 @@
|
|
|
5653
5842
|
*/
|
|
5654
5843
|
// This is visible for testing.
|
|
5655
5844
|
findSubmatch(s) {
|
|
5656
|
-
const a = this.
|
|
5845
|
+
const a = this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap);
|
|
5657
5846
|
if (a === null) {
|
|
5658
5847
|
return null;
|
|
5659
5848
|
}
|
|
@@ -5675,7 +5864,7 @@
|
|
|
5675
5864
|
*/
|
|
5676
5865
|
// This is visible for testing.
|
|
5677
5866
|
findSubmatchIndex(s) {
|
|
5678
|
-
return this.pad(this.
|
|
5867
|
+
return this.pad(this.executeEngine(MachineInput.fromUTF16(s), 0, RE2Flags.UNANCHORED, this.prog.numCap));
|
|
5679
5868
|
}
|
|
5680
5869
|
|
|
5681
5870
|
/**
|
|
@@ -6248,6 +6437,7 @@
|
|
|
6248
6437
|
exports.Matcher = Matcher;
|
|
6249
6438
|
exports.RE2JS = RE2JS;
|
|
6250
6439
|
exports.RE2JSCompileException = RE2JSCompileException;
|
|
6440
|
+
exports.RE2JSDfaMemoryException = RE2JSDfaMemoryException;
|
|
6251
6441
|
exports.RE2JSException = RE2JSException;
|
|
6252
6442
|
exports.RE2JSFlagsException = RE2JSFlagsException;
|
|
6253
6443
|
exports.RE2JSGroupException = RE2JSGroupException;
|