@bufbuild/re2 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/dist/cjs/CharClass.d.ts +30 -0
- package/dist/cjs/CharClass.js +284 -0
- package/dist/cjs/CharGroup.d.ts +8 -0
- package/dist/cjs/CharGroup.js +83 -0
- package/dist/cjs/Codepoint.d.ts +3 -0
- package/dist/cjs/Codepoint.js +62 -0
- package/dist/cjs/Compiler.d.ts +40 -0
- package/dist/cjs/Compiler.js +262 -0
- package/dist/cjs/DFA.d.ts +36 -0
- package/dist/cjs/DFA.js +350 -0
- package/dist/cjs/Inst.d.ts +26 -0
- package/dist/cjs/Inst.js +86 -0
- package/dist/cjs/MachineInput.d.ts +17 -0
- package/dist/cjs/MachineInput.js +72 -0
- package/dist/cjs/Parser.d.ts +111 -0
- package/dist/cjs/Parser.js +1538 -0
- package/dist/cjs/Prefilter.d.ts +19 -0
- package/dist/cjs/Prefilter.js +163 -0
- package/dist/cjs/Prog.d.ts +39 -0
- package/dist/cjs/Prog.js +154 -0
- package/dist/cjs/RE2.d.ts +27 -0
- package/dist/cjs/RE2.js +221 -0
- package/dist/cjs/RE2Flags.d.ts +16 -0
- package/dist/cjs/RE2Flags.js +58 -0
- package/dist/cjs/Regexp.d.ts +43 -0
- package/dist/cjs/Regexp.js +98 -0
- package/dist/cjs/Simplify.d.ts +3 -0
- package/dist/cjs/Simplify.js +230 -0
- package/dist/cjs/Unicode.d.ts +17 -0
- package/dist/cjs/Unicode.js +165 -0
- package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
- package/dist/cjs/UnicodeRangeTable.js +31 -0
- package/dist/cjs/UnicodeTables.d.ts +29 -0
- package/dist/cjs/UnicodeTables.js +571 -0
- package/dist/cjs/Utils.d.ts +22 -0
- package/dist/cjs/Utils.js +119 -0
- package/dist/cjs/__fixtures__/find.d.ts +9 -0
- package/dist/cjs/__fixtures__/find.js +115 -0
- package/dist/cjs/chars.d.ts +2 -0
- package/dist/cjs/chars.js +19 -0
- package/dist/cjs/exceptions.d.ts +55 -0
- package/dist/cjs/exceptions.js +94 -0
- package/dist/cjs/index.d.ts +102 -0
- package/dist/cjs/index.js +173 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/testParser.d.ts +3 -0
- package/dist/cjs/testParser.js +143 -0
- package/dist/esm/CharClass.d.ts +30 -0
- package/dist/esm/CharClass.js +281 -0
- package/dist/esm/CharGroup.d.ts +8 -0
- package/dist/esm/CharGroup.js +78 -0
- package/dist/esm/Codepoint.d.ts +3 -0
- package/dist/esm/Codepoint.js +59 -0
- package/dist/esm/Compiler.d.ts +40 -0
- package/dist/esm/Compiler.js +259 -0
- package/dist/esm/DFA.d.ts +36 -0
- package/dist/esm/DFA.js +347 -0
- package/dist/esm/Inst.d.ts +26 -0
- package/dist/esm/Inst.js +83 -0
- package/dist/esm/MachineInput.d.ts +17 -0
- package/dist/esm/MachineInput.js +68 -0
- package/dist/esm/Parser.d.ts +111 -0
- package/dist/esm/Parser.js +1535 -0
- package/dist/esm/Prefilter.d.ts +19 -0
- package/dist/esm/Prefilter.js +159 -0
- package/dist/esm/Prog.d.ts +39 -0
- package/dist/esm/Prog.js +150 -0
- package/dist/esm/RE2.d.ts +27 -0
- package/dist/esm/RE2.js +218 -0
- package/dist/esm/RE2Flags.d.ts +16 -0
- package/dist/esm/RE2Flags.js +41 -0
- package/dist/esm/Regexp.d.ts +43 -0
- package/dist/esm/Regexp.js +94 -0
- package/dist/esm/Simplify.d.ts +3 -0
- package/dist/esm/Simplify.js +228 -0
- package/dist/esm/Unicode.d.ts +17 -0
- package/dist/esm/Unicode.js +150 -0
- package/dist/esm/UnicodeRangeTable.d.ts +12 -0
- package/dist/esm/UnicodeRangeTable.js +28 -0
- package/dist/esm/UnicodeTables.d.ts +29 -0
- package/dist/esm/UnicodeTables.js +568 -0
- package/dist/esm/Utils.d.ts +22 -0
- package/dist/esm/Utils.js +103 -0
- package/dist/esm/__fixtures__/find.d.ts +9 -0
- package/dist/esm/__fixtures__/find.js +112 -0
- package/dist/esm/chars.d.ts +2 -0
- package/dist/esm/chars.js +14 -0
- package/dist/esm/exceptions.d.ts +55 -0
- package/dist/esm/exceptions.js +86 -0
- package/dist/esm/index.d.ts +102 -0
- package/dist/esm/index.js +163 -0
- package/dist/esm/testParser.d.ts +3 -0
- package/dist/esm/testParser.js +138 -0
- package/package.json +49 -0
package/dist/cjs/Inst.js
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Inst = void 0;
|
|
4
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
5
|
+
const Unicode_js_1 = require("./Unicode.js");
|
|
6
|
+
/**
|
|
7
|
+
* A single instruction in the regular expression virtual machine.
|
|
8
|
+
*
|
|
9
|
+
* @see http://swtch.com/~rsc/regexp/regexp2.html
|
|
10
|
+
*/
|
|
11
|
+
class Inst {
|
|
12
|
+
static ALT = 1;
|
|
13
|
+
static ALT_MATCH = 2;
|
|
14
|
+
static CAPTURE = 3;
|
|
15
|
+
static EMPTY_WIDTH = 4;
|
|
16
|
+
static FAIL = 5;
|
|
17
|
+
static MATCH = 6;
|
|
18
|
+
static NOP = 7;
|
|
19
|
+
static RUNE = 8;
|
|
20
|
+
static RUNE1 = 9;
|
|
21
|
+
static RUNE_ANY = 10;
|
|
22
|
+
static RUNE_ANY_NOT_NL = 11;
|
|
23
|
+
op;
|
|
24
|
+
out;
|
|
25
|
+
arg;
|
|
26
|
+
runes;
|
|
27
|
+
static isRuneOp(op) {
|
|
28
|
+
return Inst.RUNE <= op && op <= Inst.RUNE_ANY_NOT_NL;
|
|
29
|
+
}
|
|
30
|
+
constructor(op) {
|
|
31
|
+
this.op = op;
|
|
32
|
+
this.out = 0; // all but MATCH, FAIL
|
|
33
|
+
this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
|
|
34
|
+
// length==1 => exact match
|
|
35
|
+
// otherwise a list of [lo,hi] pairs. hi is *inclusive*.
|
|
36
|
+
this.runes = [];
|
|
37
|
+
}
|
|
38
|
+
// MatchRune returns true if the instruction matches (and consumes) r.
|
|
39
|
+
// It should only be called when op is a rune op.
|
|
40
|
+
matchRune(r) {
|
|
41
|
+
// Special case: single-rune slice is from literal string, not char
|
|
42
|
+
// class.
|
|
43
|
+
if (this.runes.length === 1) {
|
|
44
|
+
const r0 = this.runes[0];
|
|
45
|
+
// If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
|
|
46
|
+
// Note that this may result in a case-folding loop when executed,
|
|
47
|
+
// so attempt to reduce the chance of that occurring
|
|
48
|
+
// by performing case folding on |r0| from the pattern rather than |r| from the input.
|
|
49
|
+
if ((this.arg & RE2Flags_js_1.FOLD_CASE) !== 0) {
|
|
50
|
+
return (0, Unicode_js_1.equalsIgnoreCase)(r0, r);
|
|
51
|
+
}
|
|
52
|
+
return r === r0;
|
|
53
|
+
}
|
|
54
|
+
const len = this.runes.length;
|
|
55
|
+
// If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
|
|
56
|
+
if (len === 2 || len === 4 || len === 6 || len === 8) {
|
|
57
|
+
for (let j = 0; j < len; j += 2) {
|
|
58
|
+
if (r < this.runes[j]) {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
if (r <= this.runes[j + 1]) {
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return false; // Stop here
|
|
66
|
+
}
|
|
67
|
+
// Otherwise binary search.
|
|
68
|
+
let lo = 0;
|
|
69
|
+
let hi = (this.runes.length / 2) | 0;
|
|
70
|
+
while (lo < hi) {
|
|
71
|
+
const m = (lo + hi) >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
|
|
72
|
+
const c = this.runes[2 * m];
|
|
73
|
+
if (c <= r) {
|
|
74
|
+
if (r <= this.runes[2 * m + 1]) {
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
77
|
+
lo = m + 1;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
hi = m;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
exports.Inst = Inst;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Prefilter } from "./Prefilter.js";
|
|
2
|
+
import type { RE2 } from "./RE2.js";
|
|
3
|
+
declare class MachineUTF16Input {
|
|
4
|
+
charSequence: string;
|
|
5
|
+
start: number;
|
|
6
|
+
end: number;
|
|
7
|
+
constructor(charSequence: string, start?: number, end?: number);
|
|
8
|
+
static EOF(): number;
|
|
9
|
+
endPos(): number;
|
|
10
|
+
hasString(prefilter: Prefilter, pos: number): boolean;
|
|
11
|
+
step(pos: number): number;
|
|
12
|
+
index(re2: RE2, pos: number): number;
|
|
13
|
+
context(pos: number): number;
|
|
14
|
+
prefixLength(re2: RE2): number;
|
|
15
|
+
}
|
|
16
|
+
declare function fromUTF16(charSequence: string, start?: number, end?: number): MachineUTF16Input;
|
|
17
|
+
export { fromUTF16, MachineUTF16Input };
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MachineUTF16Input = void 0;
|
|
4
|
+
exports.fromUTF16 = fromUTF16;
|
|
5
|
+
const Utils_js_1 = require("./Utils.js");
|
|
6
|
+
const Unicode_js_1 = require("./Unicode.js");
|
|
7
|
+
class MachineUTF16Input {
|
|
8
|
+
charSequence;
|
|
9
|
+
start;
|
|
10
|
+
end;
|
|
11
|
+
constructor(charSequence, start = 0, end = charSequence.length) {
|
|
12
|
+
this.charSequence = charSequence;
|
|
13
|
+
this.start = start;
|
|
14
|
+
this.end = end;
|
|
15
|
+
}
|
|
16
|
+
static EOF() {
|
|
17
|
+
return -1 << 3;
|
|
18
|
+
}
|
|
19
|
+
endPos() {
|
|
20
|
+
return this.end;
|
|
21
|
+
}
|
|
22
|
+
hasString(prefilter, pos) {
|
|
23
|
+
const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
|
|
24
|
+
return idx !== -1 && idx <= this.end - prefilter.str.length;
|
|
25
|
+
}
|
|
26
|
+
step(pos) {
|
|
27
|
+
pos += this.start;
|
|
28
|
+
if (pos >= this.end) {
|
|
29
|
+
return MachineUTF16Input.EOF();
|
|
30
|
+
}
|
|
31
|
+
const c1 = this.charSequence.charCodeAt(pos);
|
|
32
|
+
// Fast path: standard BMP character (not a high surrogate)
|
|
33
|
+
if (c1 < Unicode_js_1.MIN_HIGH_SURROGATE ||
|
|
34
|
+
c1 > Unicode_js_1.MAX_HIGH_SURROGATE ||
|
|
35
|
+
pos + 1 >= this.end) {
|
|
36
|
+
return (c1 << 3) | 1;
|
|
37
|
+
}
|
|
38
|
+
// Slow path: Calculate surrogate pair manually
|
|
39
|
+
const c2 = this.charSequence.charCodeAt(pos + 1);
|
|
40
|
+
if (c2 >= Unicode_js_1.MIN_LOW_SURROGATE && c2 <= Unicode_js_1.MAX_LOW_SURROGATE) {
|
|
41
|
+
const rune = (c1 - Unicode_js_1.MIN_HIGH_SURROGATE) * 0x400 +
|
|
42
|
+
(c2 - Unicode_js_1.MIN_LOW_SURROGATE) +
|
|
43
|
+
Unicode_js_1.MIN_SUPPLEMENTARY_CODE_POINT;
|
|
44
|
+
return (rune << 3) | 2;
|
|
45
|
+
}
|
|
46
|
+
// Invalid surrogate pair fallback
|
|
47
|
+
return (c1 << 3) | 1;
|
|
48
|
+
}
|
|
49
|
+
index(re2, pos) {
|
|
50
|
+
pos += this.start;
|
|
51
|
+
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
52
|
+
return i < 0 ? i : i - pos;
|
|
53
|
+
}
|
|
54
|
+
context(pos) {
|
|
55
|
+
pos += this.start;
|
|
56
|
+
const r1 = pos > 0 && pos <= this.charSequence.length
|
|
57
|
+
? this.charSequence.codePointAt(pos - 1)
|
|
58
|
+
: -1;
|
|
59
|
+
const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
|
|
60
|
+
if (r1 === undefined || r2 === undefined) {
|
|
61
|
+
throw new Error("invalid state");
|
|
62
|
+
}
|
|
63
|
+
return (0, Utils_js_1.emptyOpContext)(r1, r2);
|
|
64
|
+
}
|
|
65
|
+
prefixLength(re2) {
|
|
66
|
+
return re2.prefix.length;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
exports.MachineUTF16Input = MachineUTF16Input;
|
|
70
|
+
function fromUTF16(charSequence, start = 0, end = charSequence.length) {
|
|
71
|
+
return new MachineUTF16Input(charSequence, start, end);
|
|
72
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { UnicodeRangeTable } from "./UnicodeRangeTable.js";
|
|
2
|
+
import { CharClass } from "./CharClass.js";
|
|
3
|
+
import { Regexp } from "./Regexp.js";
|
|
4
|
+
declare class StringIterator {
|
|
5
|
+
str: string;
|
|
6
|
+
position: number;
|
|
7
|
+
constructor(str: string);
|
|
8
|
+
pos(): number;
|
|
9
|
+
rewindTo(pos: number): void;
|
|
10
|
+
more(): boolean;
|
|
11
|
+
peek(): number;
|
|
12
|
+
skip(n: number): void;
|
|
13
|
+
skipString(s: string): void;
|
|
14
|
+
pop(): number;
|
|
15
|
+
lookingAt(s: string): boolean;
|
|
16
|
+
rest(): string;
|
|
17
|
+
from(beforePos: number): string;
|
|
18
|
+
toString(): string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* A parser of regular expression patterns.
|
|
22
|
+
*
|
|
23
|
+
* The only public entry point is {@link #parse(String pattern, int flags)}.
|
|
24
|
+
*/
|
|
25
|
+
declare class Parser {
|
|
26
|
+
static ERR_INVALID_CHAR_RANGE: string;
|
|
27
|
+
static ERR_INVALID_ESCAPE: string;
|
|
28
|
+
static ERR_INVALID_NAMED_CAPTURE: string;
|
|
29
|
+
static ERR_INVALID_PERL_OP: string;
|
|
30
|
+
static ERR_INVALID_REPEAT_OP: string;
|
|
31
|
+
static ERR_INVALID_REPEAT_SIZE: string;
|
|
32
|
+
static ERR_MISSING_BRACKET: string;
|
|
33
|
+
static ERR_MISSING_PAREN: string;
|
|
34
|
+
static ERR_MISSING_REPEAT_ARGUMENT: string;
|
|
35
|
+
static ERR_TRAILING_BACKSLASH: string;
|
|
36
|
+
static ERR_DUPLICATE_NAMED_CAPTURE: string;
|
|
37
|
+
static ERR_UNEXPECTED_PAREN: string;
|
|
38
|
+
static ERR_NESTING_DEPTH: string;
|
|
39
|
+
static ERR_LARGE: string;
|
|
40
|
+
static ERR_BAD_EXPRESSION: string;
|
|
41
|
+
static MAX_HEIGHT: number;
|
|
42
|
+
static MAX_SIZE: number;
|
|
43
|
+
static MAX_RUNES: number;
|
|
44
|
+
static ANY_TABLE: UnicodeRangeTable;
|
|
45
|
+
static ASCII_TABLE: UnicodeRangeTable;
|
|
46
|
+
static ASCII_FOLD_TABLE: UnicodeRangeTable;
|
|
47
|
+
static unicodeTable(name: string): {
|
|
48
|
+
tab: UnicodeRangeTable | null;
|
|
49
|
+
fold: UnicodeRangeTable | null;
|
|
50
|
+
sign: number;
|
|
51
|
+
} | null;
|
|
52
|
+
static minFoldRune(r: number): number;
|
|
53
|
+
static literalRegexp(s: string, flags: number): Regexp;
|
|
54
|
+
/**
|
|
55
|
+
* Parse regular expression pattern {@code pattern} with mode flags {@code flags}.
|
|
56
|
+
* @param {string} pattern
|
|
57
|
+
* @param {number} flags
|
|
58
|
+
*/
|
|
59
|
+
static parse(pattern: string, flags: number): Regexp;
|
|
60
|
+
static parseRepeat(t: StringIterator): number;
|
|
61
|
+
static isValidCaptureName(name: string): boolean;
|
|
62
|
+
static parseInt(t: StringIterator): number;
|
|
63
|
+
static isCharClass(re: Regexp): boolean;
|
|
64
|
+
static matchRune(re: Regexp, r: number): boolean;
|
|
65
|
+
static mergeCharClass(dst: Regexp, src: Regexp): void;
|
|
66
|
+
static parseEscape(t: StringIterator): number;
|
|
67
|
+
static parseClassChar(t: StringIterator, wholeClassPos: number): number;
|
|
68
|
+
static concatRunes(x: number[], y: number[]): number[];
|
|
69
|
+
wholeRegexp: string;
|
|
70
|
+
flags: number;
|
|
71
|
+
numCap: number;
|
|
72
|
+
namedGroups: Map<string, number>;
|
|
73
|
+
stack: Regexp[];
|
|
74
|
+
free: Regexp | null;
|
|
75
|
+
numRegexp: number;
|
|
76
|
+
numRunes: number;
|
|
77
|
+
repeats: number;
|
|
78
|
+
height: Map<Regexp, number> | null;
|
|
79
|
+
size: Map<Regexp, number> | null;
|
|
80
|
+
constructor(wholeRegexp: string, flags?: number);
|
|
81
|
+
newRegexp(op: number): Regexp;
|
|
82
|
+
reuse(re: Regexp): void;
|
|
83
|
+
checkLimits(re: Regexp): void;
|
|
84
|
+
checkSize(re: Regexp): void;
|
|
85
|
+
calcSize(re: Regexp, force?: boolean): number;
|
|
86
|
+
checkHeight(re: Regexp): void;
|
|
87
|
+
calcHeight(re: Regexp, force?: boolean): number;
|
|
88
|
+
pop(): Regexp | undefined;
|
|
89
|
+
popToPseudo(): Regexp[];
|
|
90
|
+
push(re: Regexp): Regexp | null;
|
|
91
|
+
maybeConcat(r: number, flags: number): boolean;
|
|
92
|
+
newLiteral(r: number, flags: number): Regexp;
|
|
93
|
+
literal(r: number): void;
|
|
94
|
+
op(op: number): Regexp | null;
|
|
95
|
+
repeat(op: number, min: number, max: number, beforePos: number, t: StringIterator, lastRepeatPos: number): void;
|
|
96
|
+
repeatIsValid(re: Regexp, n: number): boolean;
|
|
97
|
+
concat(): Regexp | null;
|
|
98
|
+
alternate(): Regexp | null;
|
|
99
|
+
cleanAlt(re: Regexp): void;
|
|
100
|
+
collapse(subs: Regexp[], op: number): Regexp;
|
|
101
|
+
parseInternal(): Regexp;
|
|
102
|
+
parsePerlFlags(t: StringIterator): void;
|
|
103
|
+
parseVerticalBar(): void;
|
|
104
|
+
swapVerticalBar(): boolean;
|
|
105
|
+
parseRightParen(): void;
|
|
106
|
+
parsePerlClassEscape(t: StringIterator, cc: CharClass): boolean;
|
|
107
|
+
parseNamedClass(t: StringIterator, cc: CharClass): boolean;
|
|
108
|
+
parseUnicodeClass(t: StringIterator, cc: CharClass): boolean;
|
|
109
|
+
parseClass(t: StringIterator): void;
|
|
110
|
+
}
|
|
111
|
+
export { Parser };
|