@bufbuild/re2 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/dist/cjs/CharClass.d.ts +30 -0
- package/dist/cjs/CharClass.js +284 -0
- package/dist/cjs/CharGroup.d.ts +8 -0
- package/dist/cjs/CharGroup.js +83 -0
- package/dist/cjs/Codepoint.d.ts +3 -0
- package/dist/cjs/Codepoint.js +62 -0
- package/dist/cjs/Compiler.d.ts +40 -0
- package/dist/cjs/Compiler.js +262 -0
- package/dist/cjs/DFA.d.ts +36 -0
- package/dist/cjs/DFA.js +350 -0
- package/dist/cjs/Inst.d.ts +26 -0
- package/dist/cjs/Inst.js +86 -0
- package/dist/cjs/MachineInput.d.ts +17 -0
- package/dist/cjs/MachineInput.js +72 -0
- package/dist/cjs/Parser.d.ts +111 -0
- package/dist/cjs/Parser.js +1538 -0
- package/dist/cjs/Prefilter.d.ts +19 -0
- package/dist/cjs/Prefilter.js +163 -0
- package/dist/cjs/Prog.d.ts +39 -0
- package/dist/cjs/Prog.js +154 -0
- package/dist/cjs/RE2.d.ts +27 -0
- package/dist/cjs/RE2.js +221 -0
- package/dist/cjs/RE2Flags.d.ts +16 -0
- package/dist/cjs/RE2Flags.js +58 -0
- package/dist/cjs/Regexp.d.ts +43 -0
- package/dist/cjs/Regexp.js +98 -0
- package/dist/cjs/Simplify.d.ts +3 -0
- package/dist/cjs/Simplify.js +230 -0
- package/dist/cjs/Unicode.d.ts +17 -0
- package/dist/cjs/Unicode.js +165 -0
- package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
- package/dist/cjs/UnicodeRangeTable.js +31 -0
- package/dist/cjs/UnicodeTables.d.ts +29 -0
- package/dist/cjs/UnicodeTables.js +571 -0
- package/dist/cjs/Utils.d.ts +22 -0
- package/dist/cjs/Utils.js +119 -0
- package/dist/cjs/__fixtures__/find.d.ts +9 -0
- package/dist/cjs/__fixtures__/find.js +115 -0
- package/dist/cjs/chars.d.ts +2 -0
- package/dist/cjs/chars.js +19 -0
- package/dist/cjs/exceptions.d.ts +55 -0
- package/dist/cjs/exceptions.js +94 -0
- package/dist/cjs/index.d.ts +102 -0
- package/dist/cjs/index.js +173 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/testParser.d.ts +3 -0
- package/dist/cjs/testParser.js +143 -0
- package/dist/esm/CharClass.d.ts +30 -0
- package/dist/esm/CharClass.js +281 -0
- package/dist/esm/CharGroup.d.ts +8 -0
- package/dist/esm/CharGroup.js +78 -0
- package/dist/esm/Codepoint.d.ts +3 -0
- package/dist/esm/Codepoint.js +59 -0
- package/dist/esm/Compiler.d.ts +40 -0
- package/dist/esm/Compiler.js +259 -0
- package/dist/esm/DFA.d.ts +36 -0
- package/dist/esm/DFA.js +347 -0
- package/dist/esm/Inst.d.ts +26 -0
- package/dist/esm/Inst.js +83 -0
- package/dist/esm/MachineInput.d.ts +17 -0
- package/dist/esm/MachineInput.js +68 -0
- package/dist/esm/Parser.d.ts +111 -0
- package/dist/esm/Parser.js +1535 -0
- package/dist/esm/Prefilter.d.ts +19 -0
- package/dist/esm/Prefilter.js +159 -0
- package/dist/esm/Prog.d.ts +39 -0
- package/dist/esm/Prog.js +150 -0
- package/dist/esm/RE2.d.ts +27 -0
- package/dist/esm/RE2.js +218 -0
- package/dist/esm/RE2Flags.d.ts +16 -0
- package/dist/esm/RE2Flags.js +41 -0
- package/dist/esm/Regexp.d.ts +43 -0
- package/dist/esm/Regexp.js +94 -0
- package/dist/esm/Simplify.d.ts +3 -0
- package/dist/esm/Simplify.js +228 -0
- package/dist/esm/Unicode.d.ts +17 -0
- package/dist/esm/Unicode.js +150 -0
- package/dist/esm/UnicodeRangeTable.d.ts +12 -0
- package/dist/esm/UnicodeRangeTable.js +28 -0
- package/dist/esm/UnicodeTables.d.ts +29 -0
- package/dist/esm/UnicodeTables.js +568 -0
- package/dist/esm/Utils.d.ts +22 -0
- package/dist/esm/Utils.js +103 -0
- package/dist/esm/__fixtures__/find.d.ts +9 -0
- package/dist/esm/__fixtures__/find.js +112 -0
- package/dist/esm/chars.d.ts +2 -0
- package/dist/esm/chars.js +14 -0
- package/dist/esm/exceptions.d.ts +55 -0
- package/dist/esm/exceptions.js +86 -0
- package/dist/esm/index.d.ts +102 -0
- package/dist/esm/index.js +163 -0
- package/dist/esm/testParser.d.ts +3 -0
- package/dist/esm/testParser.js +138 -0
- package/package.json +49 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
class Test {
|
|
2
|
+
pat;
|
|
3
|
+
text;
|
|
4
|
+
matches;
|
|
5
|
+
constructor(pat, text, n, ...x) {
|
|
6
|
+
this.pat = pat;
|
|
7
|
+
this.text = text;
|
|
8
|
+
this.matches = [];
|
|
9
|
+
if (n > 0) {
|
|
10
|
+
const runLength = Math.floor(x.length / n);
|
|
11
|
+
for (let i = 0, j = 0; i < n; i++) {
|
|
12
|
+
this.matches[i] = x.slice(j, j + runLength);
|
|
13
|
+
j += runLength;
|
|
14
|
+
if (j > x.length) {
|
|
15
|
+
throw new Error("invalid build entry");
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
toString() {
|
|
21
|
+
return `pat=${this.pat} text=${this.text} len=${this.matches.length} matches=${JSON.stringify(this.matches)}`;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
export const FIND_TESTS = [
|
|
25
|
+
new Test("", "", 1, 0, 0),
|
|
26
|
+
new Test("^abcdefg", "abcdefg", 1, 0, 7),
|
|
27
|
+
new Test("a+", "baaab", 1, 1, 4),
|
|
28
|
+
new Test("abcd..", "abcdef", 1, 0, 6),
|
|
29
|
+
new Test("a", "a", 1, 0, 1),
|
|
30
|
+
new Test("x", "y", 0),
|
|
31
|
+
new Test("b", "abc", 1, 1, 2),
|
|
32
|
+
new Test(".", "a", 1, 0, 1),
|
|
33
|
+
new Test(".*", "abcdef", 1, 0, 6),
|
|
34
|
+
new Test("^", "abcde", 1, 0, 0),
|
|
35
|
+
new Test("$", "abcde", 1, 5, 5),
|
|
36
|
+
new Test("^abcd$", "abcd", 1, 0, 4),
|
|
37
|
+
new Test("^bcd'", "abcdef", 0),
|
|
38
|
+
new Test("^abcd$", "abcde", 0),
|
|
39
|
+
new Test("h.*od?", "hello\ngoodbye\n", 1, 0, 5),
|
|
40
|
+
new Test("a{1,5}", "baaac", 1, 1, 4),
|
|
41
|
+
new Test("ac{1,25}", "bbaaaccccdd", 1, 4, 9),
|
|
42
|
+
new Test("a+", "baaab", 1, 1, 4),
|
|
43
|
+
new Test("a*", "baaab", 3, 0, 0, 1, 4, 5, 5),
|
|
44
|
+
new Test("[a-z]+", "abcd", 1, 0, 4),
|
|
45
|
+
new Test("[^a-z]+", "ab1234cd", 1, 2, 6),
|
|
46
|
+
new Test("[a\\-\\]z]+", "az]-bcz", 2, 0, 4, 6, 7),
|
|
47
|
+
new Test("[^\\n]+", "abcd\n", 1, 0, 4),
|
|
48
|
+
new Test("[日本語]+", "日本語日本語", 1, 0, 18),
|
|
49
|
+
new Test("日本語+", "日本語", 1, 0, 9),
|
|
50
|
+
new Test("日本語+", "日本語語語語", 1, 0, 18),
|
|
51
|
+
new Test("()", "", 1, 0, 0, 0, 0),
|
|
52
|
+
new Test("(a)", "a", 1, 0, 1, 0, 1),
|
|
53
|
+
new Test("(.)(.)", "日a", 1, 0, 4, 0, 3, 3, 4),
|
|
54
|
+
new Test("(.*)", "", 1, 0, 0, 0, 0),
|
|
55
|
+
new Test("(.*)", "abcd", 1, 0, 4, 0, 4),
|
|
56
|
+
new Test("(..)(..)", "abcd", 1, 0, 4, 0, 2, 2, 4),
|
|
57
|
+
new Test("(([^xyz]*)(d))", "abcd", 1, 0, 4, 0, 4, 0, 3, 3, 4),
|
|
58
|
+
new Test("((a|b|c)*(d))", "abcd", 1, 0, 4, 0, 4, 2, 3, 3, 4),
|
|
59
|
+
new Test("(((a|b|c)*)(d))", "abcd", 1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4),
|
|
60
|
+
new Test("\\a\\f\\n\\r\\t\\v", "\x07\f\n\r\t\v", 1, 0, 6),
|
|
61
|
+
new Test("[\\a\\f\\n\\r\\t\\v]+", "\x07\f\n\r\t\v", 1, 0, 6),
|
|
62
|
+
new Test("a*(|(b))c*", "aacc", 1, 0, 4, 2, 2, -1, -1),
|
|
63
|
+
new Test("(.*).*", "ab", 1, 0, 2, 0, 2),
|
|
64
|
+
new Test("[.]", ".", 1, 0, 1),
|
|
65
|
+
new Test("/$", "/abc/", 1, 4, 5),
|
|
66
|
+
new Test("/$", "/abc", 0),
|
|
67
|
+
// multiple matches
|
|
68
|
+
new Test(".", "abc", 3, 0, 1, 1, 2, 2, 3),
|
|
69
|
+
new Test("(.)", "abc", 3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3),
|
|
70
|
+
new Test(".(.)", "abcd", 2, 0, 2, 1, 2, 2, 4, 3, 4),
|
|
71
|
+
new Test("ab*", "abbaab", 3, 0, 3, 3, 4, 4, 6),
|
|
72
|
+
new Test("a(b*)", "abbaab", 3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6),
|
|
73
|
+
// fixed bugs
|
|
74
|
+
new Test("ab$", "cab", 1, 1, 3),
|
|
75
|
+
new Test("axxb$", "axxcb", 0),
|
|
76
|
+
new Test("data", "daXY data", 1, 5, 9),
|
|
77
|
+
new Test("da(.)a$", "daXY data", 1, 5, 9, 7, 8),
|
|
78
|
+
new Test("zx+", "zzx", 1, 1, 3),
|
|
79
|
+
new Test("ab$", "abcab", 1, 3, 5),
|
|
80
|
+
new Test("(aa)*$", "a", 1, 1, 1, -1, -1),
|
|
81
|
+
new Test("(?:.|(?:.a))", "", 0),
|
|
82
|
+
new Test("(?:A(?:A|a))", "Aa", 1, 0, 2),
|
|
83
|
+
new Test("(?:A|(?:A|a))", "a", 1, 0, 1),
|
|
84
|
+
new Test("(a){0}", "", 1, 0, 0, -1, -1),
|
|
85
|
+
new Test("(?-s)(?:(?:^).)", "\n", 0),
|
|
86
|
+
new Test("(?s)(?:(?:^).)", "\n", 1, 0, 1),
|
|
87
|
+
new Test("(?:(?:^).)", "\n", 0),
|
|
88
|
+
new Test("\\b", "x", 2, 0, 0, 1, 1),
|
|
89
|
+
new Test("\\b", "xx", 2, 0, 0, 2, 2),
|
|
90
|
+
new Test("\\b", "x y", 4, 0, 0, 1, 1, 2, 2, 3, 3),
|
|
91
|
+
new Test("\\b", "xx yy", 4, 0, 0, 2, 2, 3, 3, 5, 5),
|
|
92
|
+
new Test("\\B", "x", 0),
|
|
93
|
+
new Test("\\B", "xx", 1, 1, 1),
|
|
94
|
+
new Test("\\B", "x y", 0),
|
|
95
|
+
new Test("\\B", "xx yy", 2, 1, 1, 4, 4),
|
|
96
|
+
// RE2 tests
|
|
97
|
+
new Test("[^\\S\\s]", "abcd", 0),
|
|
98
|
+
new Test("[^\\S[:space:]]", "abcd", 0),
|
|
99
|
+
new Test("[^\\D\\d]", "abcd", 0),
|
|
100
|
+
new Test("[^\\D[:digit:]]", "abcd", 0),
|
|
101
|
+
new Test("(?i)\\W", "x", 0),
|
|
102
|
+
new Test("(?i)\\W", "k", 0),
|
|
103
|
+
new Test("(?i)\\W", "s", 0),
|
|
104
|
+
// can backslash-escape any punctuation
|
|
105
|
+
new Test("\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\{\\|\\}\\~", "!\"#$%&'()*+,-./:;<=>?@[\\]^_{|}~", 1, 0, 31),
|
|
106
|
+
new Test("[\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\{\\|\\}\\~]+", "!\"#$%&'()*+,-./:;<=>?@[\\]^_{|}~", 1, 0, 31),
|
|
107
|
+
new Test("\\`", "`", 1, 0, 1),
|
|
108
|
+
new Test("[\\`]+", "`", 1, 0, 1),
|
|
109
|
+
// long set of matches
|
|
110
|
+
new Test(".", "qwertyuiopasdfghjklzxcvbnm1234567890", 36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
|
|
111
|
+
new Test("(|a)*", "aa", 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2),
|
|
112
|
+
];
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export const codePoint = (v) => {
|
|
2
|
+
const cp = v.codePointAt(0);
|
|
3
|
+
if (cp === undefined) {
|
|
4
|
+
throw new Error("codePoint: empty string");
|
|
5
|
+
}
|
|
6
|
+
return cp;
|
|
7
|
+
};
|
|
8
|
+
export const codePointAtOrThrow = (s, i) => {
|
|
9
|
+
const cp = s.codePointAt(i);
|
|
10
|
+
if (cp === undefined) {
|
|
11
|
+
throw new Error(`codePointAt(${i}) returned undefined for ${JSON.stringify(s)}`);
|
|
12
|
+
}
|
|
13
|
+
return cp;
|
|
14
|
+
};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
declare class RE2JSException extends Error {
|
|
2
|
+
/** @param {string} message */
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* An exception thrown by the parser if the pattern was invalid.
|
|
7
|
+
*/
|
|
8
|
+
declare class RE2JSSyntaxException extends RE2JSException {
|
|
9
|
+
error: string;
|
|
10
|
+
input: string | null;
|
|
11
|
+
/**
|
|
12
|
+
* @param {string} error
|
|
13
|
+
* @param {string|null} [input=null]
|
|
14
|
+
*/
|
|
15
|
+
constructor(error: string, input?: string | null);
|
|
16
|
+
/**
|
|
17
|
+
* Retrieves the description of the error.
|
|
18
|
+
* @returns {string}
|
|
19
|
+
*/
|
|
20
|
+
getDescription(): string;
|
|
21
|
+
/**
|
|
22
|
+
* Retrieves the erroneous regular-expression pattern.
|
|
23
|
+
* @returns {string|null}
|
|
24
|
+
*/
|
|
25
|
+
getPattern(): string | null;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* An exception thrown by the compiler
|
|
29
|
+
*/
|
|
30
|
+
declare class RE2JSCompileException extends RE2JSException {
|
|
31
|
+
/** @param {string} message */
|
|
32
|
+
constructor(message: string);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* An exception thrown by using groups
|
|
36
|
+
*/
|
|
37
|
+
declare class RE2JSGroupException extends RE2JSException {
|
|
38
|
+
/** @param {string} message */
|
|
39
|
+
constructor(message: string);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* An exception thrown by flags
|
|
43
|
+
*/
|
|
44
|
+
declare class RE2JSFlagsException extends RE2JSException {
|
|
45
|
+
/** @param {string} message */
|
|
46
|
+
constructor(message: string);
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* An exception thrown for internal engine errors, such as corrupted bytecodes.
|
|
50
|
+
*/
|
|
51
|
+
declare class RE2JSInternalException extends RE2JSException {
|
|
52
|
+
/** @param {string} message */
|
|
53
|
+
constructor(message: string);
|
|
54
|
+
}
|
|
55
|
+
export { RE2JSException, RE2JSSyntaxException, RE2JSCompileException, RE2JSGroupException, RE2JSFlagsException, RE2JSInternalException, };
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
class RE2JSException extends Error {
|
|
2
|
+
/** @param {string} message */
|
|
3
|
+
constructor(message) {
|
|
4
|
+
super(message);
|
|
5
|
+
this.name = "RE2JSException";
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* An exception thrown by the parser if the pattern was invalid.
|
|
10
|
+
*/
|
|
11
|
+
class RE2JSSyntaxException extends RE2JSException {
|
|
12
|
+
error;
|
|
13
|
+
input;
|
|
14
|
+
/**
|
|
15
|
+
* @param {string} error
|
|
16
|
+
* @param {string|null} [input=null]
|
|
17
|
+
*/
|
|
18
|
+
constructor(error, input = null) {
|
|
19
|
+
let message = `error parsing regexp: ${error}`;
|
|
20
|
+
if (input) {
|
|
21
|
+
message += `: \`${input}\``;
|
|
22
|
+
}
|
|
23
|
+
super(message);
|
|
24
|
+
this.name = "RE2JSSyntaxException";
|
|
25
|
+
this.message = message;
|
|
26
|
+
/** @type {string} */
|
|
27
|
+
this.error = error;
|
|
28
|
+
/** @type {string|null} */
|
|
29
|
+
this.input = input;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Retrieves the description of the error.
|
|
33
|
+
* @returns {string}
|
|
34
|
+
*/
|
|
35
|
+
getDescription() {
|
|
36
|
+
return this.error;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Retrieves the erroneous regular-expression pattern.
|
|
40
|
+
* @returns {string|null}
|
|
41
|
+
*/
|
|
42
|
+
getPattern() {
|
|
43
|
+
return this.input;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* An exception thrown by the compiler
|
|
48
|
+
*/
|
|
49
|
+
class RE2JSCompileException extends RE2JSException {
|
|
50
|
+
/** @param {string} message */
|
|
51
|
+
constructor(message) {
|
|
52
|
+
super(message);
|
|
53
|
+
this.name = "RE2JSCompileException";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* An exception thrown by using groups
|
|
58
|
+
*/
|
|
59
|
+
class RE2JSGroupException extends RE2JSException {
|
|
60
|
+
/** @param {string} message */
|
|
61
|
+
constructor(message) {
|
|
62
|
+
super(message);
|
|
63
|
+
this.name = "RE2JSGroupException";
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* An exception thrown by flags
|
|
68
|
+
*/
|
|
69
|
+
class RE2JSFlagsException extends RE2JSException {
|
|
70
|
+
/** @param {string} message */
|
|
71
|
+
constructor(message) {
|
|
72
|
+
super(message);
|
|
73
|
+
this.name = "RE2JSFlagsException";
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* An exception thrown for internal engine errors, such as corrupted bytecodes.
|
|
78
|
+
*/
|
|
79
|
+
class RE2JSInternalException extends RE2JSException {
|
|
80
|
+
/** @param {string} message */
|
|
81
|
+
constructor(message) {
|
|
82
|
+
super(message);
|
|
83
|
+
this.name = "RE2JSInternalException";
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
export { RE2JSException, RE2JSSyntaxException, RE2JSCompileException, RE2JSGroupException, RE2JSFlagsException, RE2JSInternalException, };
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { RE2 } from "./RE2.js";
|
|
2
|
+
export { RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, } from "./exceptions.js";
|
|
3
|
+
/**
|
|
4
|
+
* A compiled representation of an RE2 regular expression
|
|
5
|
+
*/
|
|
6
|
+
export declare class RE2JS {
|
|
7
|
+
patternInput: string;
|
|
8
|
+
flagsInput: number;
|
|
9
|
+
re2Input: RE2;
|
|
10
|
+
/**
|
|
11
|
+
* Flag: case insensitive matching.
|
|
12
|
+
*/
|
|
13
|
+
static CASE_INSENSITIVE: number;
|
|
14
|
+
/**
|
|
15
|
+
* Flag: dot ({@code .}) matches all characters, including newline.
|
|
16
|
+
*/
|
|
17
|
+
static DOTALL: number;
|
|
18
|
+
/**
|
|
19
|
+
* Flag: multiline matching: {@code ^} and {@code $} match at beginning and end of line, not just
|
|
20
|
+
* beginning and end of input.
|
|
21
|
+
*/
|
|
22
|
+
static MULTILINE: number;
|
|
23
|
+
/**
|
|
24
|
+
* Flag: Unicode groups (e.g. {@code \p{Greek}} ) will be syntax errors.
|
|
25
|
+
*/
|
|
26
|
+
static DISABLE_UNICODE_GROUPS: number;
|
|
27
|
+
/**
|
|
28
|
+
* Returns a literal pattern string for the specified string.
|
|
29
|
+
*
|
|
30
|
+
* @param {string} str The string to be literalized
|
|
31
|
+
* @returns {string} A literal string replacement
|
|
32
|
+
*/
|
|
33
|
+
static quote(str: string): string;
|
|
34
|
+
/**
|
|
35
|
+
* Helper: create new RE2JS with given regex and flags.
|
|
36
|
+
* @param {string} regex
|
|
37
|
+
* @param {number} [flags=0]
|
|
38
|
+
* @returns {RE2JS}
|
|
39
|
+
*/
|
|
40
|
+
static compile(regex: string, flags?: number): RE2JS;
|
|
41
|
+
static validateFlags(flags: number): void;
|
|
42
|
+
static buildRegexWithFlags(regex: string, flags?: number): string;
|
|
43
|
+
/**
|
|
44
|
+
* Matches a string against a regular expression.
|
|
45
|
+
*
|
|
46
|
+
* @param {string} regex the regular expression
|
|
47
|
+
* @param {string} input the input
|
|
48
|
+
* @returns {boolean} true if the regular expression matches the entire input
|
|
49
|
+
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
50
|
+
*/
|
|
51
|
+
static matches(regex: string, input: string): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* @param {string} pattern
|
|
54
|
+
* @param {number} flags
|
|
55
|
+
*/
|
|
56
|
+
constructor(pattern: string, flags?: number);
|
|
57
|
+
/**
|
|
58
|
+
* Releases memory used by internal caches associated with this pattern.
|
|
59
|
+
*/
|
|
60
|
+
reset(): void;
|
|
61
|
+
/**
|
|
62
|
+
* Returns the flags used in the constructor.
|
|
63
|
+
* @returns {number}
|
|
64
|
+
*/
|
|
65
|
+
flags(): number;
|
|
66
|
+
/**
|
|
67
|
+
* Returns the pattern used in the constructor.
|
|
68
|
+
* @returns {string}
|
|
69
|
+
*/
|
|
70
|
+
pattern(): string;
|
|
71
|
+
re2(): RE2;
|
|
72
|
+
/**
|
|
73
|
+
* Matches a string against a regular expression.
|
|
74
|
+
*
|
|
75
|
+
* @param {string} input the input
|
|
76
|
+
* @returns {boolean} true if the regular expression matches the entire input
|
|
77
|
+
*/
|
|
78
|
+
matches(input: string): boolean;
|
|
79
|
+
/**
|
|
80
|
+
* Tests whether the regular expression matches any part of the input string.
|
|
81
|
+
*
|
|
82
|
+
* @param {string} input - The input string to test against.
|
|
83
|
+
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
84
|
+
*/
|
|
85
|
+
test(input: string): boolean;
|
|
86
|
+
/**
|
|
87
|
+
* Tests whether the regular expression matches the ENTIRE input string.
|
|
88
|
+
*
|
|
89
|
+
* @param {string} input - The input string to test against.
|
|
90
|
+
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
91
|
+
*/
|
|
92
|
+
testExact(input: string): boolean;
|
|
93
|
+
toString(): string;
|
|
94
|
+
/**
|
|
95
|
+
* Returns the number of capturing groups in this matcher's pattern.
|
|
96
|
+
*/
|
|
97
|
+
groupCount(): number;
|
|
98
|
+
/**
|
|
99
|
+
* Return a map of the capturing groups in this matcher's pattern.
|
|
100
|
+
*/
|
|
101
|
+
namedGroups(): Map<string, number>;
|
|
102
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { ANCHOR_BOTH, PERL, UNICODE_GROUPS } from "./RE2Flags.js";
|
|
2
|
+
import { fromUTF16 } from "./MachineInput.js";
|
|
3
|
+
import { RE2 } from "./RE2.js";
|
|
4
|
+
import { quoteMeta } from "./Utils.js";
|
|
5
|
+
import { RE2JSFlagsException } from "./exceptions.js";
|
|
6
|
+
export { RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, } from "./exceptions.js";
|
|
7
|
+
/**
|
|
8
|
+
* A compiled representation of an RE2 regular expression
|
|
9
|
+
*/
|
|
10
|
+
export class RE2JS {
|
|
11
|
+
patternInput;
|
|
12
|
+
flagsInput;
|
|
13
|
+
re2Input;
|
|
14
|
+
/**
|
|
15
|
+
* Flag: case insensitive matching.
|
|
16
|
+
*/
|
|
17
|
+
static CASE_INSENSITIVE = 1;
|
|
18
|
+
/**
|
|
19
|
+
* Flag: dot ({@code .}) matches all characters, including newline.
|
|
20
|
+
*/
|
|
21
|
+
static DOTALL = 2;
|
|
22
|
+
/**
|
|
23
|
+
* Flag: multiline matching: {@code ^} and {@code $} match at beginning and end of line, not just
|
|
24
|
+
* beginning and end of input.
|
|
25
|
+
*/
|
|
26
|
+
static MULTILINE = 4;
|
|
27
|
+
/**
|
|
28
|
+
* Flag: Unicode groups (e.g. {@code \p{Greek}} ) will be syntax errors.
|
|
29
|
+
*/
|
|
30
|
+
static DISABLE_UNICODE_GROUPS = 8;
|
|
31
|
+
/**
|
|
32
|
+
* Returns a literal pattern string for the specified string.
|
|
33
|
+
*
|
|
34
|
+
* @param {string} str The string to be literalized
|
|
35
|
+
* @returns {string} A literal string replacement
|
|
36
|
+
*/
|
|
37
|
+
static quote(str) {
|
|
38
|
+
return quoteMeta(str);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Helper: create new RE2JS with given regex and flags.
|
|
42
|
+
* @param {string} regex
|
|
43
|
+
* @param {number} [flags=0]
|
|
44
|
+
* @returns {RE2JS}
|
|
45
|
+
*/
|
|
46
|
+
static compile(regex, flags = 0) {
|
|
47
|
+
return new RE2JS(regex, flags);
|
|
48
|
+
}
|
|
49
|
+
static validateFlags(flags) {
|
|
50
|
+
if ((flags &
|
|
51
|
+
~(RE2JS.MULTILINE |
|
|
52
|
+
RE2JS.DOTALL |
|
|
53
|
+
RE2JS.CASE_INSENSITIVE |
|
|
54
|
+
RE2JS.DISABLE_UNICODE_GROUPS)) !==
|
|
55
|
+
0) {
|
|
56
|
+
throw new RE2JSFlagsException("Flags should only be a combination of MULTILINE, DOTALL, CASE_INSENSITIVE, DISABLE_UNICODE_GROUPS");
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
static buildRegexWithFlags(regex, flags = 0) {
|
|
60
|
+
let fregex = regex;
|
|
61
|
+
if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
|
|
62
|
+
fregex = `(?i)${fregex}`;
|
|
63
|
+
}
|
|
64
|
+
if ((flags & RE2JS.DOTALL) !== 0) {
|
|
65
|
+
fregex = `(?s)${fregex}`;
|
|
66
|
+
}
|
|
67
|
+
if ((flags & RE2JS.MULTILINE) !== 0) {
|
|
68
|
+
fregex = `(?m)${fregex}`;
|
|
69
|
+
}
|
|
70
|
+
return fregex;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Matches a string against a regular expression.
|
|
74
|
+
*
|
|
75
|
+
* @param {string} regex the regular expression
|
|
76
|
+
* @param {string} input the input
|
|
77
|
+
* @returns {boolean} true if the regular expression matches the entire input
|
|
78
|
+
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
79
|
+
*/
|
|
80
|
+
static matches(regex, input) {
|
|
81
|
+
return RE2JS.compile(regex).testExact(input);
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* @param {string} pattern
|
|
85
|
+
* @param {number} flags
|
|
86
|
+
*/
|
|
87
|
+
constructor(pattern, flags = 0) {
|
|
88
|
+
let re2Flags = PERL;
|
|
89
|
+
if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
|
|
90
|
+
re2Flags &= ~UNICODE_GROUPS;
|
|
91
|
+
}
|
|
92
|
+
RE2JS.validateFlags(flags);
|
|
93
|
+
const fregex = RE2JS.buildRegexWithFlags(pattern, flags);
|
|
94
|
+
this.patternInput = pattern;
|
|
95
|
+
this.flagsInput = flags;
|
|
96
|
+
this.re2Input = RE2.compileImpl(fregex, re2Flags);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Releases memory used by internal caches associated with this pattern.
|
|
100
|
+
*/
|
|
101
|
+
reset() {
|
|
102
|
+
this.re2Input.reset();
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Returns the flags used in the constructor.
|
|
106
|
+
* @returns {number}
|
|
107
|
+
*/
|
|
108
|
+
flags() {
|
|
109
|
+
return this.flagsInput;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Returns the pattern used in the constructor.
|
|
113
|
+
* @returns {string}
|
|
114
|
+
*/
|
|
115
|
+
pattern() {
|
|
116
|
+
return this.patternInput;
|
|
117
|
+
}
|
|
118
|
+
re2() {
|
|
119
|
+
return this.re2Input;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Matches a string against a regular expression.
|
|
123
|
+
*
|
|
124
|
+
* @param {string} input the input
|
|
125
|
+
* @returns {boolean} true if the regular expression matches the entire input
|
|
126
|
+
*/
|
|
127
|
+
matches(input) {
|
|
128
|
+
return this.testExact(input);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Tests whether the regular expression matches any part of the input string.
|
|
132
|
+
*
|
|
133
|
+
* @param {string} input - The input string to test against.
|
|
134
|
+
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
135
|
+
*/
|
|
136
|
+
test(input) {
|
|
137
|
+
return this.re2Input.match(input);
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Tests whether the regular expression matches the ENTIRE input string.
|
|
141
|
+
*
|
|
142
|
+
* @param {string} input - The input string to test against.
|
|
143
|
+
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
144
|
+
*/
|
|
145
|
+
testExact(input) {
|
|
146
|
+
return (this.re2Input.executeEngine(fromUTF16(input), 0, ANCHOR_BOTH, 0) !== null);
|
|
147
|
+
}
|
|
148
|
+
toString() {
|
|
149
|
+
return this.patternInput;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Returns the number of capturing groups in this matcher's pattern.
|
|
153
|
+
*/
|
|
154
|
+
groupCount() {
|
|
155
|
+
return this.re2Input.numberOfCapturingGroups();
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Return a map of the capturing groups in this matcher's pattern.
|
|
159
|
+
*/
|
|
160
|
+
namedGroups() {
|
|
161
|
+
return this.re2Input.namedGroups;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { FOLD_CASE, NON_GREEDY, WAS_DOLLAR } from "./RE2Flags.js";
|
|
2
|
+
import { Regexp } from "./Regexp.js";
|
|
3
|
+
import { MAX_RUNE, simpleFold } from "./Unicode.js";
|
|
4
|
+
const OP_NAMES = new Map([
|
|
5
|
+
[Regexp.Op.NO_MATCH, "no"],
|
|
6
|
+
[Regexp.Op.EMPTY_MATCH, "emp"],
|
|
7
|
+
[Regexp.Op.LITERAL, "lit"],
|
|
8
|
+
[Regexp.Op.CHAR_CLASS, "cc"],
|
|
9
|
+
[Regexp.Op.ANY_CHAR_NOT_NL, "dnl"],
|
|
10
|
+
[Regexp.Op.ANY_CHAR, "dot"],
|
|
11
|
+
[Regexp.Op.BEGIN_LINE, "bol"],
|
|
12
|
+
[Regexp.Op.END_LINE, "eol"],
|
|
13
|
+
[Regexp.Op.BEGIN_TEXT, "bot"],
|
|
14
|
+
[Regexp.Op.END_TEXT, "eot"],
|
|
15
|
+
[Regexp.Op.WORD_BOUNDARY, "wb"],
|
|
16
|
+
[Regexp.Op.NO_WORD_BOUNDARY, "nwb"],
|
|
17
|
+
[Regexp.Op.CAPTURE, "cap"],
|
|
18
|
+
[Regexp.Op.STAR, "star"],
|
|
19
|
+
[Regexp.Op.PLUS, "plus"],
|
|
20
|
+
[Regexp.Op.QUEST, "que"],
|
|
21
|
+
[Regexp.Op.REPEAT, "rep"],
|
|
22
|
+
[Regexp.Op.CONCAT, "cat"],
|
|
23
|
+
[Regexp.Op.ALTERNATE, "alt"],
|
|
24
|
+
]);
|
|
25
|
+
export const dumpRegexp = (re) => {
|
|
26
|
+
let b = "";
|
|
27
|
+
if (!OP_NAMES.has(re.op)) {
|
|
28
|
+
b += `op${re.op}`;
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
const name = OP_NAMES.get(re.op);
|
|
32
|
+
switch (re.op) {
|
|
33
|
+
case Regexp.Op.STAR:
|
|
34
|
+
case Regexp.Op.PLUS:
|
|
35
|
+
case Regexp.Op.QUEST:
|
|
36
|
+
case Regexp.Op.REPEAT:
|
|
37
|
+
if ((re.flags & NON_GREEDY) !== 0) {
|
|
38
|
+
b += "n";
|
|
39
|
+
}
|
|
40
|
+
b += name;
|
|
41
|
+
break;
|
|
42
|
+
case Regexp.Op.LITERAL:
|
|
43
|
+
if (re.runes.length > 1) {
|
|
44
|
+
b += "str";
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
b += "lit";
|
|
48
|
+
}
|
|
49
|
+
if ((re.flags & FOLD_CASE) !== 0) {
|
|
50
|
+
for (let r of re.runes) {
|
|
51
|
+
if (simpleFold(r) !== r) {
|
|
52
|
+
b += "fold";
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
break;
|
|
58
|
+
default:
|
|
59
|
+
b += name;
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
b += "{";
|
|
64
|
+
switch (re.op) {
|
|
65
|
+
case Regexp.Op.END_TEXT:
|
|
66
|
+
if ((re.flags & WAS_DOLLAR) === 0) {
|
|
67
|
+
b += "\\z";
|
|
68
|
+
}
|
|
69
|
+
break;
|
|
70
|
+
case Regexp.Op.LITERAL:
|
|
71
|
+
for (let r of re.runes) {
|
|
72
|
+
b += String.fromCodePoint(r);
|
|
73
|
+
}
|
|
74
|
+
break;
|
|
75
|
+
case Regexp.Op.CONCAT:
|
|
76
|
+
case Regexp.Op.ALTERNATE:
|
|
77
|
+
for (let sub of re.subs) {
|
|
78
|
+
b += dumpRegexp(sub);
|
|
79
|
+
}
|
|
80
|
+
break;
|
|
81
|
+
case Regexp.Op.STAR:
|
|
82
|
+
case Regexp.Op.PLUS:
|
|
83
|
+
case Regexp.Op.QUEST:
|
|
84
|
+
b += dumpRegexp(re.subs[0]);
|
|
85
|
+
break;
|
|
86
|
+
case Regexp.Op.REPEAT:
|
|
87
|
+
b += `${re.min},${re.max}`;
|
|
88
|
+
b += " ";
|
|
89
|
+
b += dumpRegexp(re.subs[0]);
|
|
90
|
+
break;
|
|
91
|
+
case Regexp.Op.CAPTURE:
|
|
92
|
+
if (re.name !== null && re.name.length > 0) {
|
|
93
|
+
b += re.name;
|
|
94
|
+
b += ":";
|
|
95
|
+
}
|
|
96
|
+
b += dumpRegexp(re.subs[0]);
|
|
97
|
+
break;
|
|
98
|
+
case Regexp.Op.CHAR_CLASS: {
|
|
99
|
+
let sep = "";
|
|
100
|
+
for (let i = 0; i < re.runes.length; i += 2) {
|
|
101
|
+
b += sep;
|
|
102
|
+
sep = " ";
|
|
103
|
+
let lo = re.runes[i];
|
|
104
|
+
let hi = re.runes[i + 1];
|
|
105
|
+
if (lo === hi) {
|
|
106
|
+
b += `0x${lo.toString(16)}`;
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
b += `0x${lo.toString(16)}-0x${hi.toString(16)}`;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
b += "}";
|
|
116
|
+
return b;
|
|
117
|
+
};
|
|
118
|
+
export const mkCharClass = (f) => {
|
|
119
|
+
const re = new Regexp(Regexp.Op.CHAR_CLASS);
|
|
120
|
+
let runes = [];
|
|
121
|
+
let lo = -1;
|
|
122
|
+
for (let i = 0; i <= MAX_RUNE; i++) {
|
|
123
|
+
if (f(i)) {
|
|
124
|
+
if (lo < 0) {
|
|
125
|
+
lo = i;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
else if (lo >= 0) {
|
|
129
|
+
runes = [...runes, lo, i - 1];
|
|
130
|
+
lo = -1;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (lo >= 0) {
|
|
134
|
+
runes = [...runes, lo, MAX_RUNE];
|
|
135
|
+
}
|
|
136
|
+
re.runes = runes;
|
|
137
|
+
return dumpRegexp(re);
|
|
138
|
+
};
|