@bufbuild/re2 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/dist/cjs/CharClass.d.ts +30 -0
- package/dist/cjs/CharClass.js +284 -0
- package/dist/cjs/CharGroup.d.ts +8 -0
- package/dist/cjs/CharGroup.js +83 -0
- package/dist/cjs/Codepoint.d.ts +3 -0
- package/dist/cjs/Codepoint.js +62 -0
- package/dist/cjs/Compiler.d.ts +40 -0
- package/dist/cjs/Compiler.js +262 -0
- package/dist/cjs/DFA.d.ts +36 -0
- package/dist/cjs/DFA.js +350 -0
- package/dist/cjs/Inst.d.ts +26 -0
- package/dist/cjs/Inst.js +86 -0
- package/dist/cjs/MachineInput.d.ts +17 -0
- package/dist/cjs/MachineInput.js +72 -0
- package/dist/cjs/Parser.d.ts +111 -0
- package/dist/cjs/Parser.js +1538 -0
- package/dist/cjs/Prefilter.d.ts +19 -0
- package/dist/cjs/Prefilter.js +163 -0
- package/dist/cjs/Prog.d.ts +39 -0
- package/dist/cjs/Prog.js +154 -0
- package/dist/cjs/RE2.d.ts +27 -0
- package/dist/cjs/RE2.js +221 -0
- package/dist/cjs/RE2Flags.d.ts +16 -0
- package/dist/cjs/RE2Flags.js +58 -0
- package/dist/cjs/Regexp.d.ts +43 -0
- package/dist/cjs/Regexp.js +98 -0
- package/dist/cjs/Simplify.d.ts +3 -0
- package/dist/cjs/Simplify.js +230 -0
- package/dist/cjs/Unicode.d.ts +17 -0
- package/dist/cjs/Unicode.js +165 -0
- package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
- package/dist/cjs/UnicodeRangeTable.js +31 -0
- package/dist/cjs/UnicodeTables.d.ts +29 -0
- package/dist/cjs/UnicodeTables.js +571 -0
- package/dist/cjs/Utils.d.ts +22 -0
- package/dist/cjs/Utils.js +119 -0
- package/dist/cjs/__fixtures__/find.d.ts +9 -0
- package/dist/cjs/__fixtures__/find.js +115 -0
- package/dist/cjs/chars.d.ts +2 -0
- package/dist/cjs/chars.js +19 -0
- package/dist/cjs/exceptions.d.ts +55 -0
- package/dist/cjs/exceptions.js +94 -0
- package/dist/cjs/index.d.ts +102 -0
- package/dist/cjs/index.js +173 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/testParser.d.ts +3 -0
- package/dist/cjs/testParser.js +143 -0
- package/dist/esm/CharClass.d.ts +30 -0
- package/dist/esm/CharClass.js +281 -0
- package/dist/esm/CharGroup.d.ts +8 -0
- package/dist/esm/CharGroup.js +78 -0
- package/dist/esm/Codepoint.d.ts +3 -0
- package/dist/esm/Codepoint.js +59 -0
- package/dist/esm/Compiler.d.ts +40 -0
- package/dist/esm/Compiler.js +259 -0
- package/dist/esm/DFA.d.ts +36 -0
- package/dist/esm/DFA.js +347 -0
- package/dist/esm/Inst.d.ts +26 -0
- package/dist/esm/Inst.js +83 -0
- package/dist/esm/MachineInput.d.ts +17 -0
- package/dist/esm/MachineInput.js +68 -0
- package/dist/esm/Parser.d.ts +111 -0
- package/dist/esm/Parser.js +1535 -0
- package/dist/esm/Prefilter.d.ts +19 -0
- package/dist/esm/Prefilter.js +159 -0
- package/dist/esm/Prog.d.ts +39 -0
- package/dist/esm/Prog.js +150 -0
- package/dist/esm/RE2.d.ts +27 -0
- package/dist/esm/RE2.js +218 -0
- package/dist/esm/RE2Flags.d.ts +16 -0
- package/dist/esm/RE2Flags.js +41 -0
- package/dist/esm/Regexp.d.ts +43 -0
- package/dist/esm/Regexp.js +94 -0
- package/dist/esm/Simplify.d.ts +3 -0
- package/dist/esm/Simplify.js +228 -0
- package/dist/esm/Unicode.d.ts +17 -0
- package/dist/esm/Unicode.js +150 -0
- package/dist/esm/UnicodeRangeTable.d.ts +12 -0
- package/dist/esm/UnicodeRangeTable.js +28 -0
- package/dist/esm/UnicodeTables.d.ts +29 -0
- package/dist/esm/UnicodeTables.js +568 -0
- package/dist/esm/Utils.d.ts +22 -0
- package/dist/esm/Utils.js +103 -0
- package/dist/esm/__fixtures__/find.d.ts +9 -0
- package/dist/esm/__fixtures__/find.js +112 -0
- package/dist/esm/chars.d.ts +2 -0
- package/dist/esm/chars.js +14 -0
- package/dist/esm/exceptions.d.ts +55 -0
- package/dist/esm/exceptions.js +86 -0
- package/dist/esm/index.d.ts +102 -0
- package/dist/esm/index.js +163 -0
- package/dist/esm/testParser.d.ts +3 -0
- package/dist/esm/testParser.js +138 -0
- package/package.json +49 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Regexp } from "./Regexp.js";
|
|
2
|
+
import type { MachineUTF16Input } from "./MachineInput.js";
|
|
3
|
+
declare class Prefilter {
|
|
4
|
+
type: number;
|
|
5
|
+
subs: Prefilter[];
|
|
6
|
+
str: string;
|
|
7
|
+
static Type: {
|
|
8
|
+
NONE: number;
|
|
9
|
+
EXACT: number;
|
|
10
|
+
AND: number;
|
|
11
|
+
OR: number;
|
|
12
|
+
};
|
|
13
|
+
constructor(type: number);
|
|
14
|
+
eval(input: MachineUTF16Input, pos: number): boolean;
|
|
15
|
+
}
|
|
16
|
+
declare const PrefilterTree: {
|
|
17
|
+
build: (re: Regexp) => Prefilter;
|
|
18
|
+
};
|
|
19
|
+
export { Prefilter, PrefilterTree };
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PrefilterTree = exports.Prefilter = void 0;
|
|
4
|
+
const Regexp_js_1 = require("./Regexp.js");
|
|
5
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
6
|
+
class Prefilter {
|
|
7
|
+
type;
|
|
8
|
+
subs;
|
|
9
|
+
str;
|
|
10
|
+
static Type = { NONE: 0, EXACT: 1, AND: 2, OR: 3 };
|
|
11
|
+
constructor(type) {
|
|
12
|
+
this.type = type;
|
|
13
|
+
this.subs = [];
|
|
14
|
+
this.str = "";
|
|
15
|
+
}
|
|
16
|
+
eval(input, pos) {
|
|
17
|
+
switch (this.type) {
|
|
18
|
+
case Prefilter.Type.NONE:
|
|
19
|
+
return true;
|
|
20
|
+
case Prefilter.Type.EXACT:
|
|
21
|
+
return input.hasString(this, pos);
|
|
22
|
+
case Prefilter.Type.AND:
|
|
23
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
24
|
+
if (!this.subs[i].eval(input, pos))
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
return true;
|
|
28
|
+
case Prefilter.Type.OR:
|
|
29
|
+
for (let i = 0; i < this.subs.length; i++) {
|
|
30
|
+
if (this.subs[i].eval(input, pos))
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
default:
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.Prefilter = Prefilter;
|
|
40
|
+
const fromRegexp = (re) => {
|
|
41
|
+
if (!re)
|
|
42
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
43
|
+
switch (re.op) {
|
|
44
|
+
case Regexp_js_1.Regexp.Op.NO_MATCH:
|
|
45
|
+
case Regexp_js_1.Regexp.Op.EMPTY_MATCH:
|
|
46
|
+
case Regexp_js_1.Regexp.Op.BEGIN_LINE:
|
|
47
|
+
case Regexp_js_1.Regexp.Op.END_LINE:
|
|
48
|
+
case Regexp_js_1.Regexp.Op.BEGIN_TEXT:
|
|
49
|
+
case Regexp_js_1.Regexp.Op.END_TEXT:
|
|
50
|
+
case Regexp_js_1.Regexp.Op.WORD_BOUNDARY:
|
|
51
|
+
case Regexp_js_1.Regexp.Op.NO_WORD_BOUNDARY:
|
|
52
|
+
case Regexp_js_1.Regexp.Op.CHAR_CLASS:
|
|
53
|
+
case Regexp_js_1.Regexp.Op.ANY_CHAR_NOT_NL:
|
|
54
|
+
case Regexp_js_1.Regexp.Op.ANY_CHAR: {
|
|
55
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
56
|
+
}
|
|
57
|
+
case Regexp_js_1.Regexp.Op.LITERAL: {
|
|
58
|
+
if (re.runes.length === 0 || (re.flags & RE2Flags_js_1.FOLD_CASE) !== 0) {
|
|
59
|
+
// Skip case-folded literals for simplicity
|
|
60
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
61
|
+
}
|
|
62
|
+
const pf = new Prefilter(Prefilter.Type.EXACT);
|
|
63
|
+
let str = "";
|
|
64
|
+
for (let i = 0; i < re.runes.length; i++) {
|
|
65
|
+
str += String.fromCodePoint(re.runes[i]);
|
|
66
|
+
}
|
|
67
|
+
pf.str = str;
|
|
68
|
+
return pf;
|
|
69
|
+
}
|
|
70
|
+
case Regexp_js_1.Regexp.Op.CAPTURE:
|
|
71
|
+
case Regexp_js_1.Regexp.Op.PLUS: {
|
|
72
|
+
return fromRegexp(re.subs[0]);
|
|
73
|
+
}
|
|
74
|
+
case Regexp_js_1.Regexp.Op.REPEAT: {
|
|
75
|
+
if (re.min >= 1) {
|
|
76
|
+
return fromRegexp(re.subs[0]);
|
|
77
|
+
}
|
|
78
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
79
|
+
}
|
|
80
|
+
case Regexp_js_1.Regexp.Op.CONCAT: {
|
|
81
|
+
const pf = new Prefilter(Prefilter.Type.AND);
|
|
82
|
+
for (const sub of re.subs) {
|
|
83
|
+
pf.subs.push(fromRegexp(sub));
|
|
84
|
+
}
|
|
85
|
+
return pf;
|
|
86
|
+
}
|
|
87
|
+
case Regexp_js_1.Regexp.Op.ALTERNATE: {
|
|
88
|
+
const pf = new Prefilter(Prefilter.Type.OR);
|
|
89
|
+
for (const sub of re.subs) {
|
|
90
|
+
pf.subs.push(fromRegexp(sub));
|
|
91
|
+
}
|
|
92
|
+
return pf;
|
|
93
|
+
}
|
|
94
|
+
default:
|
|
95
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
const simplify = (pf) => {
|
|
99
|
+
if (pf.type === Prefilter.Type.EXACT || pf.type === Prefilter.Type.NONE) {
|
|
100
|
+
return pf;
|
|
101
|
+
}
|
|
102
|
+
if (pf.type === Prefilter.Type.AND) {
|
|
103
|
+
const newSubs = [];
|
|
104
|
+
for (const sub of pf.subs) {
|
|
105
|
+
const s = simplify(sub);
|
|
106
|
+
if (s.type !== Prefilter.Type.NONE) {
|
|
107
|
+
if (s.type === Prefilter.Type.AND) {
|
|
108
|
+
newSubs.push(...s.subs);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
newSubs.push(s);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (newSubs.length === 0)
|
|
116
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
117
|
+
if (newSubs.length === 1)
|
|
118
|
+
return newSubs[0];
|
|
119
|
+
pf.subs = newSubs;
|
|
120
|
+
return pf;
|
|
121
|
+
}
|
|
122
|
+
if (pf.type === Prefilter.Type.OR) {
|
|
123
|
+
const newSubs = [];
|
|
124
|
+
for (const sub of pf.subs) {
|
|
125
|
+
const s = simplify(sub);
|
|
126
|
+
if (s.type === Prefilter.Type.NONE) {
|
|
127
|
+
// If any branch of an OR has no requirements, the whole OR has no requirements
|
|
128
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
129
|
+
}
|
|
130
|
+
if (s.type === Prefilter.Type.OR) {
|
|
131
|
+
newSubs.push(...s.subs);
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
newSubs.push(s);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (newSubs.length === 0)
|
|
138
|
+
return new Prefilter(Prefilter.Type.NONE);
|
|
139
|
+
if (newSubs.length === 1)
|
|
140
|
+
return newSubs[0];
|
|
141
|
+
// De-duplicate EXACT branches
|
|
142
|
+
const seen = new Set();
|
|
143
|
+
const uniqueSubs = [];
|
|
144
|
+
for (const sub of newSubs) {
|
|
145
|
+
if (sub.type === Prefilter.Type.EXACT) {
|
|
146
|
+
if (!seen.has(sub.str)) {
|
|
147
|
+
seen.add(sub.str);
|
|
148
|
+
uniqueSubs.push(sub);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
uniqueSubs.push(sub);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
pf.subs = uniqueSubs;
|
|
156
|
+
return pf;
|
|
157
|
+
}
|
|
158
|
+
return pf;
|
|
159
|
+
};
|
|
160
|
+
const PrefilterTree = {
|
|
161
|
+
build: (re) => simplify(fromRegexp(re)),
|
|
162
|
+
};
|
|
163
|
+
exports.PrefilterTree = PrefilterTree;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { Inst } from "./Inst.js";
|
|
2
|
+
/**
|
|
3
|
+
* A list of instruction pointers waiting to be patched.
|
|
4
|
+
* Tracks both `head` and `tail` to allow O(1) appending during compilation.
|
|
5
|
+
* * Values are encoded integers, not standard memory pointers:
|
|
6
|
+
* - Program instruction index: `l >> 1`
|
|
7
|
+
* - Patch `.out` field if: `(l & 1) === 0`
|
|
8
|
+
* - Patch `.arg` field if: `(l & 1) === 1`
|
|
9
|
+
* - `0` denotes an empty list.
|
|
10
|
+
* * @see https://swtch.com/~rsc/regexp/regexp1.html
|
|
11
|
+
*/
|
|
12
|
+
declare class PatchList {
|
|
13
|
+
head: number;
|
|
14
|
+
tail: number;
|
|
15
|
+
/**
|
|
16
|
+
* @param {number} head - Encoded pointer to the start of the patch list.
|
|
17
|
+
* @param {number} tail - Encoded pointer to the end of the patch list.
|
|
18
|
+
*/
|
|
19
|
+
constructor(head?: number, tail?: number);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* A Prog is a compiled regular expression program.
|
|
23
|
+
*/
|
|
24
|
+
declare class Prog {
|
|
25
|
+
inst: Inst[];
|
|
26
|
+
start: number;
|
|
27
|
+
numCap: number;
|
|
28
|
+
constructor();
|
|
29
|
+
getInst(pc: number): Inst;
|
|
30
|
+
numInst(): number;
|
|
31
|
+
addInst(op: number): void;
|
|
32
|
+
skipNop(pc: number): Inst;
|
|
33
|
+
prefix(): [boolean, string];
|
|
34
|
+
startCond(): number;
|
|
35
|
+
next(l: number): number;
|
|
36
|
+
patch(l: PatchList, val: number): void;
|
|
37
|
+
append(l1: PatchList, l2: PatchList): PatchList;
|
|
38
|
+
}
|
|
39
|
+
export { Prog, PatchList };
|
package/dist/cjs/Prog.js
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PatchList = exports.Prog = void 0;
|
|
4
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
5
|
+
const Inst_js_1 = require("./Inst.js");
|
|
6
|
+
/**
|
|
7
|
+
* A list of instruction pointers waiting to be patched.
|
|
8
|
+
* Tracks both `head` and `tail` to allow O(1) appending during compilation.
|
|
9
|
+
* * Values are encoded integers, not standard memory pointers:
|
|
10
|
+
* - Program instruction index: `l >> 1`
|
|
11
|
+
* - Patch `.out` field if: `(l & 1) === 0`
|
|
12
|
+
* - Patch `.arg` field if: `(l & 1) === 1`
|
|
13
|
+
* - `0` denotes an empty list.
|
|
14
|
+
* * @see https://swtch.com/~rsc/regexp/regexp1.html
|
|
15
|
+
*/
|
|
16
|
+
class PatchList {
|
|
17
|
+
head;
|
|
18
|
+
tail;
|
|
19
|
+
/**
|
|
20
|
+
* @param {number} head - Encoded pointer to the start of the patch list.
|
|
21
|
+
* @param {number} tail - Encoded pointer to the end of the patch list.
|
|
22
|
+
*/
|
|
23
|
+
constructor(head = 0, tail = 0) {
|
|
24
|
+
this.head = head;
|
|
25
|
+
this.tail = tail;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
exports.PatchList = PatchList;
|
|
29
|
+
/**
|
|
30
|
+
* A Prog is a compiled regular expression program.
|
|
31
|
+
*/
|
|
32
|
+
class Prog {
|
|
33
|
+
inst;
|
|
34
|
+
start;
|
|
35
|
+
numCap;
|
|
36
|
+
constructor() {
|
|
37
|
+
this.inst = [];
|
|
38
|
+
this.start = 0; // index of start instruction
|
|
39
|
+
// number of CAPTURE insts in re
|
|
40
|
+
// 2 => implicit ( and ) for whole match $0
|
|
41
|
+
this.numCap = 2;
|
|
42
|
+
}
|
|
43
|
+
// Returns the instruction at the specified pc.
|
|
44
|
+
// Precondition: pc > 0 && pc < numInst().
|
|
45
|
+
getInst(pc) {
|
|
46
|
+
return this.inst[pc];
|
|
47
|
+
}
|
|
48
|
+
// Returns the number of instructions in this program.
|
|
49
|
+
numInst() {
|
|
50
|
+
return this.inst.length;
|
|
51
|
+
}
|
|
52
|
+
// Adds a new instruction to this program, with operator |op| and |pc| equal
|
|
53
|
+
// to |numInst()|.
|
|
54
|
+
addInst(op) {
|
|
55
|
+
this.inst.push(new Inst_js_1.Inst(op));
|
|
56
|
+
}
|
|
57
|
+
// skipNop() follows any no-op or capturing instructions and returns the
|
|
58
|
+
// resulting instruction.
|
|
59
|
+
skipNop(pc) {
|
|
60
|
+
let i = this.inst[pc];
|
|
61
|
+
while (i.op === Inst_js_1.Inst.NOP || i.op === Inst_js_1.Inst.CAPTURE) {
|
|
62
|
+
i = this.inst[pc];
|
|
63
|
+
pc = i.out;
|
|
64
|
+
}
|
|
65
|
+
return i;
|
|
66
|
+
}
|
|
67
|
+
// prefix() returns a pair of a literal string that all matches for the
|
|
68
|
+
// regexp must start with, and a boolean which is true if the prefix is the
|
|
69
|
+
// entire match. The string is returned by appending to |prefix|.
|
|
70
|
+
prefix() {
|
|
71
|
+
let prefix = "";
|
|
72
|
+
let i = this.skipNop(this.start);
|
|
73
|
+
if (!Inst_js_1.Inst.isRuneOp(i.op) || i.runes.length !== 1) {
|
|
74
|
+
return [i.op === Inst_js_1.Inst.MATCH, prefix];
|
|
75
|
+
}
|
|
76
|
+
while (Inst_js_1.Inst.isRuneOp(i.op) &&
|
|
77
|
+
i.runes.length === 1 &&
|
|
78
|
+
(i.arg & RE2Flags_js_1.FOLD_CASE) === 0) {
|
|
79
|
+
prefix += String.fromCodePoint(i.runes[0]);
|
|
80
|
+
i = this.skipNop(i.out);
|
|
81
|
+
}
|
|
82
|
+
return [i.op === Inst_js_1.Inst.MATCH, prefix];
|
|
83
|
+
}
|
|
84
|
+
// startCond() returns the leading empty-width conditions that must be true
|
|
85
|
+
// in any match. It returns -1 (all bits set) if no matches are possible.
|
|
86
|
+
startCond() {
|
|
87
|
+
let flag = 0;
|
|
88
|
+
let pc = this.start;
|
|
89
|
+
loop: for (;;) {
|
|
90
|
+
const i = this.inst[pc];
|
|
91
|
+
switch (i.op) {
|
|
92
|
+
case Inst_js_1.Inst.EMPTY_WIDTH:
|
|
93
|
+
flag |= i.arg;
|
|
94
|
+
break;
|
|
95
|
+
case Inst_js_1.Inst.FAIL:
|
|
96
|
+
return -1;
|
|
97
|
+
case Inst_js_1.Inst.CAPTURE:
|
|
98
|
+
case Inst_js_1.Inst.NOP:
|
|
99
|
+
break;
|
|
100
|
+
default:
|
|
101
|
+
break loop;
|
|
102
|
+
}
|
|
103
|
+
pc = i.out;
|
|
104
|
+
}
|
|
105
|
+
return flag;
|
|
106
|
+
}
|
|
107
|
+
// --- Patch list ---
|
|
108
|
+
// A patchlist is a list of instruction pointers that need to be filled in
|
|
109
|
+
// (patched). Because the pointers haven't been filled in yet, we can reuse
|
|
110
|
+
// their storage to hold the list. It's kind of sleazy, but works well in
|
|
111
|
+
// practice. See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
|
112
|
+
// These aren't really pointers: they're integers, so we can reinterpret them
|
|
113
|
+
// this way without using package unsafe. A value l denotes p.inst[l>>1].out
|
|
114
|
+
// (l&1==0) or .arg (l&1==1). l == 0 denotes the empty list, okay because we
|
|
115
|
+
// start every program with a fail instruction, so we'll never want to point
|
|
116
|
+
// at its output link.
|
|
117
|
+
next(l) {
|
|
118
|
+
const i = this.inst[l >> 1];
|
|
119
|
+
if ((l & 1) === 0) {
|
|
120
|
+
return i.out;
|
|
121
|
+
}
|
|
122
|
+
return i.arg;
|
|
123
|
+
}
|
|
124
|
+
patch(l, val) {
|
|
125
|
+
let head = l.head;
|
|
126
|
+
while (head !== 0) {
|
|
127
|
+
const i = this.inst[head >> 1];
|
|
128
|
+
if ((head & 1) === 0) {
|
|
129
|
+
head = i.out;
|
|
130
|
+
i.out = val;
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
head = i.arg;
|
|
134
|
+
i.arg = val;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
append(l1, l2) {
|
|
139
|
+
if (l1.head === 0)
|
|
140
|
+
return l2;
|
|
141
|
+
if (l2.head === 0)
|
|
142
|
+
return l1;
|
|
143
|
+
// We know exactly where the tail is
|
|
144
|
+
const i = this.inst[l1.tail >> 1];
|
|
145
|
+
if ((l1.tail & 1) === 0) {
|
|
146
|
+
i.out = l2.head;
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
i.arg = l2.head;
|
|
150
|
+
}
|
|
151
|
+
return new PatchList(l1.head, l2.tail);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
exports.Prog = Prog;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { type MachineUTF16Input } from "./MachineInput.js";
|
|
2
|
+
import { DFA } from "./DFA.js";
|
|
3
|
+
import { Prefilter } from "./Prefilter.js";
|
|
4
|
+
import type { Prog } from "./Prog.js";
|
|
5
|
+
declare class RE2 {
|
|
6
|
+
expr: string;
|
|
7
|
+
prog: Prog;
|
|
8
|
+
numSubexp: number;
|
|
9
|
+
cond: number;
|
|
10
|
+
prefix: string;
|
|
11
|
+
prefixComplete: boolean;
|
|
12
|
+
prefixRune: number;
|
|
13
|
+
dfa: DFA;
|
|
14
|
+
prefilter: Prefilter | null;
|
|
15
|
+
namedGroups: Map<string, number>;
|
|
16
|
+
static compile(expr: string): RE2;
|
|
17
|
+
static compileImpl(expr: string, mode: number): RE2;
|
|
18
|
+
constructor(expr: string, mode: number);
|
|
19
|
+
matchPrefixComplete(input: MachineUTF16Input, pos: number, anchor: number, ncap: number): number[] | null;
|
|
20
|
+
executeEngine(input: MachineUTF16Input, pos: number, anchor: number, ncap: number): number[] | null;
|
|
21
|
+
_nfaFallback(input: MachineUTF16Input, pos: number, anchor: number): boolean;
|
|
22
|
+
numberOfCapturingGroups(): number;
|
|
23
|
+
reset(): void;
|
|
24
|
+
toString(): string;
|
|
25
|
+
match(s: string): boolean;
|
|
26
|
+
}
|
|
27
|
+
export { RE2 };
|
package/dist/cjs/RE2.js
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RE2 = void 0;
|
|
4
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
5
|
+
const MachineInput_js_1 = require("./MachineInput.js");
|
|
6
|
+
const DFA_js_1 = require("./DFA.js");
|
|
7
|
+
const Inst_js_1 = require("./Inst.js");
|
|
8
|
+
const Prefilter_js_1 = require("./Prefilter.js");
|
|
9
|
+
const Compiler_js_1 = require("./Compiler.js");
|
|
10
|
+
const Simplify_js_1 = require("./Simplify.js");
|
|
11
|
+
const Parser_js_1 = require("./Parser.js");
|
|
12
|
+
const Utils_js_1 = require("./Utils.js");
|
|
13
|
+
class RE2 {
|
|
14
|
+
expr;
|
|
15
|
+
prog;
|
|
16
|
+
numSubexp;
|
|
17
|
+
cond;
|
|
18
|
+
prefix;
|
|
19
|
+
prefixComplete;
|
|
20
|
+
prefixRune;
|
|
21
|
+
dfa;
|
|
22
|
+
prefilter;
|
|
23
|
+
namedGroups;
|
|
24
|
+
static compile(expr) {
|
|
25
|
+
return RE2.compileImpl(expr, RE2Flags_js_1.PERL);
|
|
26
|
+
}
|
|
27
|
+
static compileImpl(expr, mode) {
|
|
28
|
+
return new RE2(expr, mode);
|
|
29
|
+
}
|
|
30
|
+
constructor(expr, mode) {
|
|
31
|
+
let re = Parser_js_1.Parser.parse(expr, mode);
|
|
32
|
+
re = (0, Simplify_js_1.simplify)(re);
|
|
33
|
+
const prefilter = Prefilter_js_1.PrefilterTree.build(re);
|
|
34
|
+
const prog = Compiler_js_1.Compiler.compileRegexp(re);
|
|
35
|
+
this.prefilter = prefilter.type === Prefilter_js_1.Prefilter.Type.NONE ? null : prefilter;
|
|
36
|
+
const [prefixCompl, prefixStr] = prog.prefix();
|
|
37
|
+
this.prefixComplete = prefixCompl;
|
|
38
|
+
this.prefix = prefixStr;
|
|
39
|
+
this.prefixRune = 0;
|
|
40
|
+
if (this.prefix.length > 0) {
|
|
41
|
+
const cp = this.prefix.codePointAt(0);
|
|
42
|
+
if (cp === undefined) {
|
|
43
|
+
throw new Error("RE2: prefix has no code point");
|
|
44
|
+
}
|
|
45
|
+
this.prefixRune = cp;
|
|
46
|
+
}
|
|
47
|
+
this.namedGroups = re.namedGroups;
|
|
48
|
+
this.expr = expr;
|
|
49
|
+
this.prog = prog;
|
|
50
|
+
this.numSubexp = re.maxCap();
|
|
51
|
+
this.cond = prog.startCond();
|
|
52
|
+
this.dfa = new DFA_js_1.DFA(this.prog);
|
|
53
|
+
}
|
|
54
|
+
matchPrefixComplete(input, pos, anchor, ncap) {
|
|
55
|
+
if ((anchor === RE2Flags_js_1.ANCHOR_START || anchor === RE2Flags_js_1.ANCHOR_BOTH) && pos !== 0) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
let matchStart = -1;
|
|
59
|
+
let matchEnd = -1;
|
|
60
|
+
const pLen = input.prefixLength(this);
|
|
61
|
+
if (anchor === RE2Flags_js_1.UNANCHORED) {
|
|
62
|
+
const idx = input.index(this, pos);
|
|
63
|
+
if (idx < 0)
|
|
64
|
+
return null;
|
|
65
|
+
matchStart = pos + idx;
|
|
66
|
+
matchEnd = matchStart + pLen;
|
|
67
|
+
}
|
|
68
|
+
else if (anchor === RE2Flags_js_1.ANCHOR_BOTH) {
|
|
69
|
+
if (input.endPos() !== pLen)
|
|
70
|
+
return null;
|
|
71
|
+
const idx = input.index(this, 0);
|
|
72
|
+
if (idx !== 0)
|
|
73
|
+
return null;
|
|
74
|
+
matchStart = 0;
|
|
75
|
+
matchEnd = pLen;
|
|
76
|
+
}
|
|
77
|
+
else if (anchor === RE2Flags_js_1.ANCHOR_START) {
|
|
78
|
+
const idx = input.index(this, 0);
|
|
79
|
+
if (idx !== 0)
|
|
80
|
+
return null;
|
|
81
|
+
matchStart = 0;
|
|
82
|
+
matchEnd = pLen;
|
|
83
|
+
}
|
|
84
|
+
if (matchStart < 0)
|
|
85
|
+
return null;
|
|
86
|
+
// If captures are requested (e.g. findSubmatch instead of test), populate bounds
|
|
87
|
+
if (ncap > 0) {
|
|
88
|
+
const matchcap = new Int32Array(ncap).fill(-1);
|
|
89
|
+
matchcap[0] = matchStart;
|
|
90
|
+
matchcap[1] = matchEnd;
|
|
91
|
+
return Array.from(matchcap);
|
|
92
|
+
}
|
|
93
|
+
return []; // Matched successfully, but no capture data requested
|
|
94
|
+
}
|
|
95
|
+
executeEngine(input, pos, anchor, ncap) {
|
|
96
|
+
if (this.prefixComplete && (ncap === 0 || this.numSubexp === 0)) {
|
|
97
|
+
return this.matchPrefixComplete(input, pos, anchor, ncap);
|
|
98
|
+
}
|
|
99
|
+
if (this.prefilter !== null && anchor === RE2Flags_js_1.UNANCHORED) {
|
|
100
|
+
if (!this.prefilter.eval(input, pos)) {
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const dfaResult = this.dfa.match(input, pos, anchor);
|
|
105
|
+
if (dfaResult !== null) {
|
|
106
|
+
return dfaResult ? [] : null;
|
|
107
|
+
}
|
|
108
|
+
// Minimal NFA fallback for DFA state explosion
|
|
109
|
+
return this._nfaFallback(input, pos, anchor) ? [] : null;
|
|
110
|
+
}
|
|
111
|
+
// Minimal boolean-only NFA for when the DFA bails due to state explosion.
|
|
112
|
+
// No captures, no thread pools — just two sets of NFA states swapped each step.
|
|
113
|
+
_nfaFallback(input, pos, anchor) {
|
|
114
|
+
const prog = this.prog;
|
|
115
|
+
const endPos = input.endPos();
|
|
116
|
+
const addState = (set, visited, pc, context) => {
|
|
117
|
+
if (pc < 0 || pc >= prog.numInst() || visited.has(pc))
|
|
118
|
+
return;
|
|
119
|
+
visited.add(pc);
|
|
120
|
+
const inst = prog.getInst(pc);
|
|
121
|
+
switch (inst.op) {
|
|
122
|
+
case Inst_js_1.Inst.ALT:
|
|
123
|
+
case Inst_js_1.Inst.ALT_MATCH:
|
|
124
|
+
addState(set, visited, inst.out, context);
|
|
125
|
+
addState(set, visited, inst.arg, context);
|
|
126
|
+
break;
|
|
127
|
+
case Inst_js_1.Inst.NOP:
|
|
128
|
+
case Inst_js_1.Inst.CAPTURE:
|
|
129
|
+
addState(set, visited, inst.out, context);
|
|
130
|
+
break;
|
|
131
|
+
case Inst_js_1.Inst.EMPTY_WIDTH:
|
|
132
|
+
if ((inst.arg & ~context) === 0) {
|
|
133
|
+
addState(set, visited, inst.out, context);
|
|
134
|
+
}
|
|
135
|
+
break;
|
|
136
|
+
default:
|
|
137
|
+
set.add(pc);
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
let current = new Set();
|
|
142
|
+
let next = new Set();
|
|
143
|
+
// prevRune: the rune immediately before `pos`. See DFA.match for rationale.
|
|
144
|
+
let prevRune = -1;
|
|
145
|
+
if (pos > 0) {
|
|
146
|
+
const r = input.step(pos - 1) >> 3;
|
|
147
|
+
if (r >= 0)
|
|
148
|
+
prevRune = r;
|
|
149
|
+
}
|
|
150
|
+
for (let i = pos; i <= endPos; i++) {
|
|
151
|
+
const rune = i < endPos ? input.step(i) >> 3 : -1;
|
|
152
|
+
const width = i < endPos ? input.step(i) & 7 : 0;
|
|
153
|
+
const context = (0, Utils_js_1.emptyOpContext)(prevRune, rune);
|
|
154
|
+
// Add start state at each position for unanchored search
|
|
155
|
+
if (anchor === RE2Flags_js_1.UNANCHORED || i === pos) {
|
|
156
|
+
const visited = new Set();
|
|
157
|
+
addState(current, visited, prog.start, context);
|
|
158
|
+
}
|
|
159
|
+
// Check for matches before consuming.
|
|
160
|
+
// For UNANCHORED/ANCHOR_START, a MATCH at any position succeeds.
|
|
161
|
+
// For ANCHOR_BOTH, we must consume the entire input — intermediate
|
|
162
|
+
// matches are skipped; only the final post-loop check accepts MATCH.
|
|
163
|
+
if (anchor !== RE2Flags_js_1.ANCHOR_BOTH) {
|
|
164
|
+
for (const pc of current) {
|
|
165
|
+
const inst = prog.getInst(pc);
|
|
166
|
+
if (inst.op === Inst_js_1.Inst.MATCH) {
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
if (i >= endPos || width === 0)
|
|
172
|
+
break;
|
|
173
|
+
// Step: consume current character
|
|
174
|
+
next.clear();
|
|
175
|
+
for (const pc of current) {
|
|
176
|
+
const inst = prog.getInst(pc);
|
|
177
|
+
if (Inst_js_1.Inst.isRuneOp(inst.op) && inst.matchRune(rune)) {
|
|
178
|
+
const nextContext = (0, Utils_js_1.emptyOpContext)(rune, i + width < endPos ? input.step(i + width) >> 3 : -1);
|
|
179
|
+
const visited = new Set();
|
|
180
|
+
addState(next, visited, inst.out, nextContext);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// For unanchored, add start state at next position too
|
|
184
|
+
if (anchor === RE2Flags_js_1.UNANCHORED) {
|
|
185
|
+
const nextRune = i + width < endPos ? input.step(i + width) >> 3 : -1;
|
|
186
|
+
const nextContext = (0, Utils_js_1.emptyOpContext)(rune, nextRune);
|
|
187
|
+
const visited = new Set();
|
|
188
|
+
addState(next, visited, prog.start, nextContext);
|
|
189
|
+
}
|
|
190
|
+
prevRune = rune;
|
|
191
|
+
[current, next] = [next, current];
|
|
192
|
+
i += width - 1; // loop increments by 1, but we advanced by width
|
|
193
|
+
}
|
|
194
|
+
// Final check for match after processing all input
|
|
195
|
+
const endContext = (0, Utils_js_1.emptyOpContext)(prevRune, -1);
|
|
196
|
+
const visited = new Set();
|
|
197
|
+
const finalSet = new Set();
|
|
198
|
+
for (const pc of current) {
|
|
199
|
+
addState(finalSet, visited, pc, endContext);
|
|
200
|
+
}
|
|
201
|
+
for (const pc of finalSet) {
|
|
202
|
+
const inst = prog.getInst(pc);
|
|
203
|
+
if (inst.op === Inst_js_1.Inst.MATCH)
|
|
204
|
+
return true;
|
|
205
|
+
}
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
numberOfCapturingGroups() {
|
|
209
|
+
return this.numSubexp;
|
|
210
|
+
}
|
|
211
|
+
reset() {
|
|
212
|
+
// No-op: machine pool removed
|
|
213
|
+
}
|
|
214
|
+
toString() {
|
|
215
|
+
return this.expr;
|
|
216
|
+
}
|
|
217
|
+
match(s) {
|
|
218
|
+
return this.executeEngine((0, MachineInput_js_1.fromUTF16)(s), 0, RE2Flags_js_1.UNANCHORED, 0) !== null;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
exports.RE2 = RE2;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
declare const FOLD_CASE = 1;
|
|
2
|
+
declare const LITERAL = 2;
|
|
3
|
+
declare const CLASS_NL = 4;
|
|
4
|
+
declare const DOT_NL = 8;
|
|
5
|
+
declare const ONE_LINE = 16;
|
|
6
|
+
declare const NON_GREEDY = 32;
|
|
7
|
+
declare const PERL_X = 64;
|
|
8
|
+
declare const UNICODE_GROUPS = 128;
|
|
9
|
+
declare const WAS_DOLLAR = 256;
|
|
10
|
+
declare const MATCH_NL: number;
|
|
11
|
+
declare const PERL: number;
|
|
12
|
+
declare const POSIX = 0;
|
|
13
|
+
declare const UNANCHORED = 0;
|
|
14
|
+
declare const ANCHOR_START = 1;
|
|
15
|
+
declare const ANCHOR_BOTH = 2;
|
|
16
|
+
export { UNANCHORED, ANCHOR_BOTH, NON_GREEDY, FOLD_CASE, LITERAL, ONE_LINE, WAS_DOLLAR, DOT_NL, UNICODE_GROUPS, CLASS_NL, PERL, ANCHOR_START, POSIX, MATCH_NL, PERL_X, };
|