@bufbuild/re2 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +30 -0
  3. package/dist/cjs/CharClass.d.ts +30 -0
  4. package/dist/cjs/CharClass.js +284 -0
  5. package/dist/cjs/CharGroup.d.ts +8 -0
  6. package/dist/cjs/CharGroup.js +83 -0
  7. package/dist/cjs/Codepoint.d.ts +3 -0
  8. package/dist/cjs/Codepoint.js +62 -0
  9. package/dist/cjs/Compiler.d.ts +40 -0
  10. package/dist/cjs/Compiler.js +262 -0
  11. package/dist/cjs/DFA.d.ts +36 -0
  12. package/dist/cjs/DFA.js +350 -0
  13. package/dist/cjs/Inst.d.ts +26 -0
  14. package/dist/cjs/Inst.js +86 -0
  15. package/dist/cjs/MachineInput.d.ts +17 -0
  16. package/dist/cjs/MachineInput.js +72 -0
  17. package/dist/cjs/Parser.d.ts +111 -0
  18. package/dist/cjs/Parser.js +1538 -0
  19. package/dist/cjs/Prefilter.d.ts +19 -0
  20. package/dist/cjs/Prefilter.js +163 -0
  21. package/dist/cjs/Prog.d.ts +39 -0
  22. package/dist/cjs/Prog.js +154 -0
  23. package/dist/cjs/RE2.d.ts +27 -0
  24. package/dist/cjs/RE2.js +221 -0
  25. package/dist/cjs/RE2Flags.d.ts +16 -0
  26. package/dist/cjs/RE2Flags.js +58 -0
  27. package/dist/cjs/Regexp.d.ts +43 -0
  28. package/dist/cjs/Regexp.js +98 -0
  29. package/dist/cjs/Simplify.d.ts +3 -0
  30. package/dist/cjs/Simplify.js +230 -0
  31. package/dist/cjs/Unicode.d.ts +17 -0
  32. package/dist/cjs/Unicode.js +165 -0
  33. package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
  34. package/dist/cjs/UnicodeRangeTable.js +31 -0
  35. package/dist/cjs/UnicodeTables.d.ts +29 -0
  36. package/dist/cjs/UnicodeTables.js +571 -0
  37. package/dist/cjs/Utils.d.ts +22 -0
  38. package/dist/cjs/Utils.js +119 -0
  39. package/dist/cjs/__fixtures__/find.d.ts +9 -0
  40. package/dist/cjs/__fixtures__/find.js +115 -0
  41. package/dist/cjs/chars.d.ts +2 -0
  42. package/dist/cjs/chars.js +19 -0
  43. package/dist/cjs/exceptions.d.ts +55 -0
  44. package/dist/cjs/exceptions.js +94 -0
  45. package/dist/cjs/index.d.ts +102 -0
  46. package/dist/cjs/index.js +173 -0
  47. package/dist/cjs/package.json +1 -0
  48. package/dist/cjs/testParser.d.ts +3 -0
  49. package/dist/cjs/testParser.js +143 -0
  50. package/dist/esm/CharClass.d.ts +30 -0
  51. package/dist/esm/CharClass.js +281 -0
  52. package/dist/esm/CharGroup.d.ts +8 -0
  53. package/dist/esm/CharGroup.js +78 -0
  54. package/dist/esm/Codepoint.d.ts +3 -0
  55. package/dist/esm/Codepoint.js +59 -0
  56. package/dist/esm/Compiler.d.ts +40 -0
  57. package/dist/esm/Compiler.js +259 -0
  58. package/dist/esm/DFA.d.ts +36 -0
  59. package/dist/esm/DFA.js +347 -0
  60. package/dist/esm/Inst.d.ts +26 -0
  61. package/dist/esm/Inst.js +83 -0
  62. package/dist/esm/MachineInput.d.ts +17 -0
  63. package/dist/esm/MachineInput.js +68 -0
  64. package/dist/esm/Parser.d.ts +111 -0
  65. package/dist/esm/Parser.js +1535 -0
  66. package/dist/esm/Prefilter.d.ts +19 -0
  67. package/dist/esm/Prefilter.js +159 -0
  68. package/dist/esm/Prog.d.ts +39 -0
  69. package/dist/esm/Prog.js +150 -0
  70. package/dist/esm/RE2.d.ts +27 -0
  71. package/dist/esm/RE2.js +218 -0
  72. package/dist/esm/RE2Flags.d.ts +16 -0
  73. package/dist/esm/RE2Flags.js +41 -0
  74. package/dist/esm/Regexp.d.ts +43 -0
  75. package/dist/esm/Regexp.js +94 -0
  76. package/dist/esm/Simplify.d.ts +3 -0
  77. package/dist/esm/Simplify.js +228 -0
  78. package/dist/esm/Unicode.d.ts +17 -0
  79. package/dist/esm/Unicode.js +150 -0
  80. package/dist/esm/UnicodeRangeTable.d.ts +12 -0
  81. package/dist/esm/UnicodeRangeTable.js +28 -0
  82. package/dist/esm/UnicodeTables.d.ts +29 -0
  83. package/dist/esm/UnicodeTables.js +568 -0
  84. package/dist/esm/Utils.d.ts +22 -0
  85. package/dist/esm/Utils.js +103 -0
  86. package/dist/esm/__fixtures__/find.d.ts +9 -0
  87. package/dist/esm/__fixtures__/find.js +112 -0
  88. package/dist/esm/chars.d.ts +2 -0
  89. package/dist/esm/chars.js +14 -0
  90. package/dist/esm/exceptions.d.ts +55 -0
  91. package/dist/esm/exceptions.js +86 -0
  92. package/dist/esm/index.d.ts +102 -0
  93. package/dist/esm/index.js +163 -0
  94. package/dist/esm/testParser.d.ts +3 -0
  95. package/dist/esm/testParser.js +138 -0
  96. package/package.json +49 -0
@@ -0,0 +1,112 @@
1
+ class Test {
2
+ pat;
3
+ text;
4
+ matches;
5
+ constructor(pat, text, n, ...x) {
6
+ this.pat = pat;
7
+ this.text = text;
8
+ this.matches = [];
9
+ if (n > 0) {
10
+ const runLength = Math.floor(x.length / n);
11
+ for (let i = 0, j = 0; i < n; i++) {
12
+ this.matches[i] = x.slice(j, j + runLength);
13
+ j += runLength;
14
+ if (j > x.length) {
15
+ throw new Error("invalid build entry");
16
+ }
17
+ }
18
+ }
19
+ }
20
+ toString() {
21
+ return `pat=${this.pat} text=${this.text} len=${this.matches.length} matches=${JSON.stringify(this.matches)}`;
22
+ }
23
+ }
24
+ export const FIND_TESTS = [
25
+ new Test("", "", 1, 0, 0),
26
+ new Test("^abcdefg", "abcdefg", 1, 0, 7),
27
+ new Test("a+", "baaab", 1, 1, 4),
28
+ new Test("abcd..", "abcdef", 1, 0, 6),
29
+ new Test("a", "a", 1, 0, 1),
30
+ new Test("x", "y", 0),
31
+ new Test("b", "abc", 1, 1, 2),
32
+ new Test(".", "a", 1, 0, 1),
33
+ new Test(".*", "abcdef", 1, 0, 6),
34
+ new Test("^", "abcde", 1, 0, 0),
35
+ new Test("$", "abcde", 1, 5, 5),
36
+ new Test("^abcd$", "abcd", 1, 0, 4),
37
+ new Test("^bcd'", "abcdef", 0),
38
+ new Test("^abcd$", "abcde", 0),
39
+ new Test("h.*od?", "hello\ngoodbye\n", 1, 0, 5),
40
+ new Test("a{1,5}", "baaac", 1, 1, 4),
41
+ new Test("ac{1,25}", "bbaaaccccdd", 1, 4, 9),
42
+ new Test("a+", "baaab", 1, 1, 4),
43
+ new Test("a*", "baaab", 3, 0, 0, 1, 4, 5, 5),
44
+ new Test("[a-z]+", "abcd", 1, 0, 4),
45
+ new Test("[^a-z]+", "ab1234cd", 1, 2, 6),
46
+ new Test("[a\\-\\]z]+", "az]-bcz", 2, 0, 4, 6, 7),
47
+ new Test("[^\\n]+", "abcd\n", 1, 0, 4),
48
+ new Test("[日本語]+", "日本語日本語", 1, 0, 18),
49
+ new Test("日本語+", "日本語", 1, 0, 9),
50
+ new Test("日本語+", "日本語語語語", 1, 0, 18),
51
+ new Test("()", "", 1, 0, 0, 0, 0),
52
+ new Test("(a)", "a", 1, 0, 1, 0, 1),
53
+ new Test("(.)(.)", "日a", 1, 0, 4, 0, 3, 3, 4),
54
+ new Test("(.*)", "", 1, 0, 0, 0, 0),
55
+ new Test("(.*)", "abcd", 1, 0, 4, 0, 4),
56
+ new Test("(..)(..)", "abcd", 1, 0, 4, 0, 2, 2, 4),
57
+ new Test("(([^xyz]*)(d))", "abcd", 1, 0, 4, 0, 4, 0, 3, 3, 4),
58
+ new Test("((a|b|c)*(d))", "abcd", 1, 0, 4, 0, 4, 2, 3, 3, 4),
59
+ new Test("(((a|b|c)*)(d))", "abcd", 1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4),
60
+ new Test("\\a\\f\\n\\r\\t\\v", "\x07\f\n\r\t\v", 1, 0, 6),
61
+ new Test("[\\a\\f\\n\\r\\t\\v]+", "\x07\f\n\r\t\v", 1, 0, 6),
62
+ new Test("a*(|(b))c*", "aacc", 1, 0, 4, 2, 2, -1, -1),
63
+ new Test("(.*).*", "ab", 1, 0, 2, 0, 2),
64
+ new Test("[.]", ".", 1, 0, 1),
65
+ new Test("/$", "/abc/", 1, 4, 5),
66
+ new Test("/$", "/abc", 0),
67
+ // multiple matches
68
+ new Test(".", "abc", 3, 0, 1, 1, 2, 2, 3),
69
+ new Test("(.)", "abc", 3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3),
70
+ new Test(".(.)", "abcd", 2, 0, 2, 1, 2, 2, 4, 3, 4),
71
+ new Test("ab*", "abbaab", 3, 0, 3, 3, 4, 4, 6),
72
+ new Test("a(b*)", "abbaab", 3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6),
73
+ // fixed bugs
74
+ new Test("ab$", "cab", 1, 1, 3),
75
+ new Test("axxb$", "axxcb", 0),
76
+ new Test("data", "daXY data", 1, 5, 9),
77
+ new Test("da(.)a$", "daXY data", 1, 5, 9, 7, 8),
78
+ new Test("zx+", "zzx", 1, 1, 3),
79
+ new Test("ab$", "abcab", 1, 3, 5),
80
+ new Test("(aa)*$", "a", 1, 1, 1, -1, -1),
81
+ new Test("(?:.|(?:.a))", "", 0),
82
+ new Test("(?:A(?:A|a))", "Aa", 1, 0, 2),
83
+ new Test("(?:A|(?:A|a))", "a", 1, 0, 1),
84
+ new Test("(a){0}", "", 1, 0, 0, -1, -1),
85
+ new Test("(?-s)(?:(?:^).)", "\n", 0),
86
+ new Test("(?s)(?:(?:^).)", "\n", 1, 0, 1),
87
+ new Test("(?:(?:^).)", "\n", 0),
88
+ new Test("\\b", "x", 2, 0, 0, 1, 1),
89
+ new Test("\\b", "xx", 2, 0, 0, 2, 2),
90
+ new Test("\\b", "x y", 4, 0, 0, 1, 1, 2, 2, 3, 3),
91
+ new Test("\\b", "xx yy", 4, 0, 0, 2, 2, 3, 3, 5, 5),
92
+ new Test("\\B", "x", 0),
93
+ new Test("\\B", "xx", 1, 1, 1),
94
+ new Test("\\B", "x y", 0),
95
+ new Test("\\B", "xx yy", 2, 1, 1, 4, 4),
96
+ // RE2 tests
97
+ new Test("[^\\S\\s]", "abcd", 0),
98
+ new Test("[^\\S[:space:]]", "abcd", 0),
99
+ new Test("[^\\D\\d]", "abcd", 0),
100
+ new Test("[^\\D[:digit:]]", "abcd", 0),
101
+ new Test("(?i)\\W", "x", 0),
102
+ new Test("(?i)\\W", "k", 0),
103
+ new Test("(?i)\\W", "s", 0),
104
+ // can backslash-escape any punctuation
105
+ new Test("\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\{\\|\\}\\~", "!\"#$%&'()*+,-./:;<=>?@[\\]^_{|}~", 1, 0, 31),
106
+ new Test("[\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\{\\|\\}\\~]+", "!\"#$%&'()*+,-./:;<=>?@[\\]^_{|}~", 1, 0, 31),
107
+ new Test("\\`", "`", 1, 0, 1),
108
+ new Test("[\\`]+", "`", 1, 0, 1),
109
+ // long set of matches
110
+ new Test(".", "qwertyuiopasdfghjklzxcvbnm1234567890", 36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
111
+ new Test("(|a)*", "aa", 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2),
112
+ ];
@@ -0,0 +1,2 @@
1
+ export declare const codePoint: (v: string) => number;
2
+ export declare const codePointAtOrThrow: (s: string, i: number) => number;
@@ -0,0 +1,14 @@
1
+ export const codePoint = (v) => {
2
+ const cp = v.codePointAt(0);
3
+ if (cp === undefined) {
4
+ throw new Error("codePoint: empty string");
5
+ }
6
+ return cp;
7
+ };
8
+ export const codePointAtOrThrow = (s, i) => {
9
+ const cp = s.codePointAt(i);
10
+ if (cp === undefined) {
11
+ throw new Error(`codePointAt(${i}) returned undefined for ${JSON.stringify(s)}`);
12
+ }
13
+ return cp;
14
+ };
@@ -0,0 +1,55 @@
1
+ declare class RE2JSException extends Error {
2
+ /** @param {string} message */
3
+ constructor(message: string);
4
+ }
5
+ /**
6
+ * An exception thrown by the parser if the pattern was invalid.
7
+ */
8
+ declare class RE2JSSyntaxException extends RE2JSException {
9
+ error: string;
10
+ input: string | null;
11
+ /**
12
+ * @param {string} error
13
+ * @param {string|null} [input=null]
14
+ */
15
+ constructor(error: string, input?: string | null);
16
+ /**
17
+ * Retrieves the description of the error.
18
+ * @returns {string}
19
+ */
20
+ getDescription(): string;
21
+ /**
22
+ * Retrieves the erroneous regular-expression pattern.
23
+ * @returns {string|null}
24
+ */
25
+ getPattern(): string | null;
26
+ }
27
+ /**
28
+ * An exception thrown by the compiler
29
+ */
30
+ declare class RE2JSCompileException extends RE2JSException {
31
+ /** @param {string} message */
32
+ constructor(message: string);
33
+ }
34
+ /**
35
+ * An exception thrown by using groups
36
+ */
37
+ declare class RE2JSGroupException extends RE2JSException {
38
+ /** @param {string} message */
39
+ constructor(message: string);
40
+ }
41
+ /**
42
+ * An exception thrown by flags
43
+ */
44
+ declare class RE2JSFlagsException extends RE2JSException {
45
+ /** @param {string} message */
46
+ constructor(message: string);
47
+ }
48
+ /**
49
+ * An exception thrown for internal engine errors, such as corrupted bytecodes.
50
+ */
51
+ declare class RE2JSInternalException extends RE2JSException {
52
+ /** @param {string} message */
53
+ constructor(message: string);
54
+ }
55
+ export { RE2JSException, RE2JSSyntaxException, RE2JSCompileException, RE2JSGroupException, RE2JSFlagsException, RE2JSInternalException, };
@@ -0,0 +1,86 @@
1
+ class RE2JSException extends Error {
2
+ /** @param {string} message */
3
+ constructor(message) {
4
+ super(message);
5
+ this.name = "RE2JSException";
6
+ }
7
+ }
8
+ /**
9
+ * An exception thrown by the parser if the pattern was invalid.
10
+ */
11
+ class RE2JSSyntaxException extends RE2JSException {
12
+ error;
13
+ input;
14
+ /**
15
+ * @param {string} error
16
+ * @param {string|null} [input=null]
17
+ */
18
+ constructor(error, input = null) {
19
+ let message = `error parsing regexp: ${error}`;
20
+ if (input) {
21
+ message += `: \`${input}\``;
22
+ }
23
+ super(message);
24
+ this.name = "RE2JSSyntaxException";
25
+ this.message = message;
26
+ /** @type {string} */
27
+ this.error = error;
28
+ /** @type {string|null} */
29
+ this.input = input;
30
+ }
31
+ /**
32
+ * Retrieves the description of the error.
33
+ * @returns {string}
34
+ */
35
+ getDescription() {
36
+ return this.error;
37
+ }
38
+ /**
39
+ * Retrieves the erroneous regular-expression pattern.
40
+ * @returns {string|null}
41
+ */
42
+ getPattern() {
43
+ return this.input;
44
+ }
45
+ }
46
+ /**
47
+ * An exception thrown by the compiler
48
+ */
49
+ class RE2JSCompileException extends RE2JSException {
50
+ /** @param {string} message */
51
+ constructor(message) {
52
+ super(message);
53
+ this.name = "RE2JSCompileException";
54
+ }
55
+ }
56
+ /**
57
+ * An exception thrown by using groups
58
+ */
59
+ class RE2JSGroupException extends RE2JSException {
60
+ /** @param {string} message */
61
+ constructor(message) {
62
+ super(message);
63
+ this.name = "RE2JSGroupException";
64
+ }
65
+ }
66
+ /**
67
+ * An exception thrown by flags
68
+ */
69
+ class RE2JSFlagsException extends RE2JSException {
70
+ /** @param {string} message */
71
+ constructor(message) {
72
+ super(message);
73
+ this.name = "RE2JSFlagsException";
74
+ }
75
+ }
76
+ /**
77
+ * An exception thrown for internal engine errors, such as corrupted bytecodes.
78
+ */
79
+ class RE2JSInternalException extends RE2JSException {
80
+ /** @param {string} message */
81
+ constructor(message) {
82
+ super(message);
83
+ this.name = "RE2JSInternalException";
84
+ }
85
+ }
86
+ export { RE2JSException, RE2JSSyntaxException, RE2JSCompileException, RE2JSGroupException, RE2JSFlagsException, RE2JSInternalException, };
@@ -0,0 +1,102 @@
1
+ import { RE2 } from "./RE2.js";
2
+ export { RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, } from "./exceptions.js";
3
+ /**
4
+ * A compiled representation of an RE2 regular expression
5
+ */
6
+ export declare class RE2JS {
7
+ patternInput: string;
8
+ flagsInput: number;
9
+ re2Input: RE2;
10
+ /**
11
+ * Flag: case insensitive matching.
12
+ */
13
+ static CASE_INSENSITIVE: number;
14
+ /**
15
+ * Flag: dot ({@code .}) matches all characters, including newline.
16
+ */
17
+ static DOTALL: number;
18
+ /**
19
+ * Flag: multiline matching: {@code ^} and {@code $} match at beginning and end of line, not just
20
+ * beginning and end of input.
21
+ */
22
+ static MULTILINE: number;
23
+ /**
24
+ * Flag: Unicode groups (e.g. {@code \p{Greek}} ) will be syntax errors.
25
+ */
26
+ static DISABLE_UNICODE_GROUPS: number;
27
+ /**
28
+ * Returns a literal pattern string for the specified string.
29
+ *
30
+ * @param {string} str The string to be literalized
31
+ * @returns {string} A literal string replacement
32
+ */
33
+ static quote(str: string): string;
34
+ /**
35
+ * Helper: create new RE2JS with given regex and flags.
36
+ * @param {string} regex
37
+ * @param {number} [flags=0]
38
+ * @returns {RE2JS}
39
+ */
40
+ static compile(regex: string, flags?: number): RE2JS;
41
+ static validateFlags(flags: number): void;
42
+ static buildRegexWithFlags(regex: string, flags?: number): string;
43
+ /**
44
+ * Matches a string against a regular expression.
45
+ *
46
+ * @param {string} regex the regular expression
47
+ * @param {string} input the input
48
+ * @returns {boolean} true if the regular expression matches the entire input
49
+ * @throws RE2JSSyntaxException if the regular expression is malformed
50
+ */
51
+ static matches(regex: string, input: string): boolean;
52
+ /**
53
+ * @param {string} pattern
54
+ * @param {number} flags
55
+ */
56
+ constructor(pattern: string, flags?: number);
57
+ /**
58
+ * Releases memory used by internal caches associated with this pattern.
59
+ */
60
+ reset(): void;
61
+ /**
62
+ * Returns the flags used in the constructor.
63
+ * @returns {number}
64
+ */
65
+ flags(): number;
66
+ /**
67
+ * Returns the pattern used in the constructor.
68
+ * @returns {string}
69
+ */
70
+ pattern(): string;
71
+ re2(): RE2;
72
+ /**
73
+ * Matches a string against a regular expression.
74
+ *
75
+ * @param {string} input the input
76
+ * @returns {boolean} true if the regular expression matches the entire input
77
+ */
78
+ matches(input: string): boolean;
79
+ /**
80
+ * Tests whether the regular expression matches any part of the input string.
81
+ *
82
+ * @param {string} input - The input string to test against.
83
+ * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
84
+ */
85
+ test(input: string): boolean;
86
+ /**
87
+ * Tests whether the regular expression matches the ENTIRE input string.
88
+ *
89
+ * @param {string} input - The input string to test against.
90
+ * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
91
+ */
92
+ testExact(input: string): boolean;
93
+ toString(): string;
94
+ /**
95
+ * Returns the number of capturing groups in this matcher's pattern.
96
+ */
97
+ groupCount(): number;
98
+ /**
99
+ * Return a map of the capturing groups in this matcher's pattern.
100
+ */
101
+ namedGroups(): Map<string, number>;
102
+ }
@@ -0,0 +1,163 @@
1
+ import { ANCHOR_BOTH, PERL, UNICODE_GROUPS } from "./RE2Flags.js";
2
+ import { fromUTF16 } from "./MachineInput.js";
3
+ import { RE2 } from "./RE2.js";
4
+ import { quoteMeta } from "./Utils.js";
5
+ import { RE2JSFlagsException } from "./exceptions.js";
6
+ export { RE2JSCompileException, RE2JSException, RE2JSFlagsException, RE2JSGroupException, RE2JSInternalException, RE2JSSyntaxException, } from "./exceptions.js";
7
+ /**
8
+ * A compiled representation of an RE2 regular expression
9
+ */
10
+ export class RE2JS {
11
+ patternInput;
12
+ flagsInput;
13
+ re2Input;
14
+ /**
15
+ * Flag: case insensitive matching.
16
+ */
17
+ static CASE_INSENSITIVE = 1;
18
+ /**
19
+ * Flag: dot ({@code .}) matches all characters, including newline.
20
+ */
21
+ static DOTALL = 2;
22
+ /**
23
+ * Flag: multiline matching: {@code ^} and {@code $} match at beginning and end of line, not just
24
+ * beginning and end of input.
25
+ */
26
+ static MULTILINE = 4;
27
+ /**
28
+ * Flag: Unicode groups (e.g. {@code \p{Greek}} ) will be syntax errors.
29
+ */
30
+ static DISABLE_UNICODE_GROUPS = 8;
31
+ /**
32
+ * Returns a literal pattern string for the specified string.
33
+ *
34
+ * @param {string} str The string to be literalized
35
+ * @returns {string} A literal string replacement
36
+ */
37
+ static quote(str) {
38
+ return quoteMeta(str);
39
+ }
40
+ /**
41
+ * Helper: create new RE2JS with given regex and flags.
42
+ * @param {string} regex
43
+ * @param {number} [flags=0]
44
+ * @returns {RE2JS}
45
+ */
46
+ static compile(regex, flags = 0) {
47
+ return new RE2JS(regex, flags);
48
+ }
49
+ static validateFlags(flags) {
50
+ if ((flags &
51
+ ~(RE2JS.MULTILINE |
52
+ RE2JS.DOTALL |
53
+ RE2JS.CASE_INSENSITIVE |
54
+ RE2JS.DISABLE_UNICODE_GROUPS)) !==
55
+ 0) {
56
+ throw new RE2JSFlagsException("Flags should only be a combination of MULTILINE, DOTALL, CASE_INSENSITIVE, DISABLE_UNICODE_GROUPS");
57
+ }
58
+ }
59
+ static buildRegexWithFlags(regex, flags = 0) {
60
+ let fregex = regex;
61
+ if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
62
+ fregex = `(?i)${fregex}`;
63
+ }
64
+ if ((flags & RE2JS.DOTALL) !== 0) {
65
+ fregex = `(?s)${fregex}`;
66
+ }
67
+ if ((flags & RE2JS.MULTILINE) !== 0) {
68
+ fregex = `(?m)${fregex}`;
69
+ }
70
+ return fregex;
71
+ }
72
+ /**
73
+ * Matches a string against a regular expression.
74
+ *
75
+ * @param {string} regex the regular expression
76
+ * @param {string} input the input
77
+ * @returns {boolean} true if the regular expression matches the entire input
78
+ * @throws RE2JSSyntaxException if the regular expression is malformed
79
+ */
80
+ static matches(regex, input) {
81
+ return RE2JS.compile(regex).testExact(input);
82
+ }
83
+ /**
84
+ * @param {string} pattern
85
+ * @param {number} flags
86
+ */
87
+ constructor(pattern, flags = 0) {
88
+ let re2Flags = PERL;
89
+ if ((flags & RE2JS.DISABLE_UNICODE_GROUPS) !== 0) {
90
+ re2Flags &= ~UNICODE_GROUPS;
91
+ }
92
+ RE2JS.validateFlags(flags);
93
+ const fregex = RE2JS.buildRegexWithFlags(pattern, flags);
94
+ this.patternInput = pattern;
95
+ this.flagsInput = flags;
96
+ this.re2Input = RE2.compileImpl(fregex, re2Flags);
97
+ }
98
+ /**
99
+ * Releases memory used by internal caches associated with this pattern.
100
+ */
101
+ reset() {
102
+ this.re2Input.reset();
103
+ }
104
+ /**
105
+ * Returns the flags used in the constructor.
106
+ * @returns {number}
107
+ */
108
+ flags() {
109
+ return this.flagsInput;
110
+ }
111
+ /**
112
+ * Returns the pattern used in the constructor.
113
+ * @returns {string}
114
+ */
115
+ pattern() {
116
+ return this.patternInput;
117
+ }
118
+ re2() {
119
+ return this.re2Input;
120
+ }
121
+ /**
122
+ * Matches a string against a regular expression.
123
+ *
124
+ * @param {string} input the input
125
+ * @returns {boolean} true if the regular expression matches the entire input
126
+ */
127
+ matches(input) {
128
+ return this.testExact(input);
129
+ }
130
+ /**
131
+ * Tests whether the regular expression matches any part of the input string.
132
+ *
133
+ * @param {string} input - The input string to test against.
134
+ * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
135
+ */
136
+ test(input) {
137
+ return this.re2Input.match(input);
138
+ }
139
+ /**
140
+ * Tests whether the regular expression matches the ENTIRE input string.
141
+ *
142
+ * @param {string} input - The input string to test against.
143
+ * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
144
+ */
145
+ testExact(input) {
146
+ return (this.re2Input.executeEngine(fromUTF16(input), 0, ANCHOR_BOTH, 0) !== null);
147
+ }
148
+ toString() {
149
+ return this.patternInput;
150
+ }
151
+ /**
152
+ * Returns the number of capturing groups in this matcher's pattern.
153
+ */
154
+ groupCount() {
155
+ return this.re2Input.numberOfCapturingGroups();
156
+ }
157
+ /**
158
+ * Return a map of the capturing groups in this matcher's pattern.
159
+ */
160
+ namedGroups() {
161
+ return this.re2Input.namedGroups;
162
+ }
163
+ }
@@ -0,0 +1,3 @@
1
+ import { Regexp } from "./Regexp.js";
2
+ export declare const dumpRegexp: (re: Regexp) => string;
3
+ export declare const mkCharClass: (f: (r: number) => boolean) => string;
@@ -0,0 +1,138 @@
1
+ import { FOLD_CASE, NON_GREEDY, WAS_DOLLAR } from "./RE2Flags.js";
2
+ import { Regexp } from "./Regexp.js";
3
+ import { MAX_RUNE, simpleFold } from "./Unicode.js";
4
+ const OP_NAMES = new Map([
5
+ [Regexp.Op.NO_MATCH, "no"],
6
+ [Regexp.Op.EMPTY_MATCH, "emp"],
7
+ [Regexp.Op.LITERAL, "lit"],
8
+ [Regexp.Op.CHAR_CLASS, "cc"],
9
+ [Regexp.Op.ANY_CHAR_NOT_NL, "dnl"],
10
+ [Regexp.Op.ANY_CHAR, "dot"],
11
+ [Regexp.Op.BEGIN_LINE, "bol"],
12
+ [Regexp.Op.END_LINE, "eol"],
13
+ [Regexp.Op.BEGIN_TEXT, "bot"],
14
+ [Regexp.Op.END_TEXT, "eot"],
15
+ [Regexp.Op.WORD_BOUNDARY, "wb"],
16
+ [Regexp.Op.NO_WORD_BOUNDARY, "nwb"],
17
+ [Regexp.Op.CAPTURE, "cap"],
18
+ [Regexp.Op.STAR, "star"],
19
+ [Regexp.Op.PLUS, "plus"],
20
+ [Regexp.Op.QUEST, "que"],
21
+ [Regexp.Op.REPEAT, "rep"],
22
+ [Regexp.Op.CONCAT, "cat"],
23
+ [Regexp.Op.ALTERNATE, "alt"],
24
+ ]);
25
+ export const dumpRegexp = (re) => {
26
+ let b = "";
27
+ if (!OP_NAMES.has(re.op)) {
28
+ b += `op${re.op}`;
29
+ }
30
+ else {
31
+ const name = OP_NAMES.get(re.op);
32
+ switch (re.op) {
33
+ case Regexp.Op.STAR:
34
+ case Regexp.Op.PLUS:
35
+ case Regexp.Op.QUEST:
36
+ case Regexp.Op.REPEAT:
37
+ if ((re.flags & NON_GREEDY) !== 0) {
38
+ b += "n";
39
+ }
40
+ b += name;
41
+ break;
42
+ case Regexp.Op.LITERAL:
43
+ if (re.runes.length > 1) {
44
+ b += "str";
45
+ }
46
+ else {
47
+ b += "lit";
48
+ }
49
+ if ((re.flags & FOLD_CASE) !== 0) {
50
+ for (let r of re.runes) {
51
+ if (simpleFold(r) !== r) {
52
+ b += "fold";
53
+ break;
54
+ }
55
+ }
56
+ }
57
+ break;
58
+ default:
59
+ b += name;
60
+ break;
61
+ }
62
+ }
63
+ b += "{";
64
+ switch (re.op) {
65
+ case Regexp.Op.END_TEXT:
66
+ if ((re.flags & WAS_DOLLAR) === 0) {
67
+ b += "\\z";
68
+ }
69
+ break;
70
+ case Regexp.Op.LITERAL:
71
+ for (let r of re.runes) {
72
+ b += String.fromCodePoint(r);
73
+ }
74
+ break;
75
+ case Regexp.Op.CONCAT:
76
+ case Regexp.Op.ALTERNATE:
77
+ for (let sub of re.subs) {
78
+ b += dumpRegexp(sub);
79
+ }
80
+ break;
81
+ case Regexp.Op.STAR:
82
+ case Regexp.Op.PLUS:
83
+ case Regexp.Op.QUEST:
84
+ b += dumpRegexp(re.subs[0]);
85
+ break;
86
+ case Regexp.Op.REPEAT:
87
+ b += `${re.min},${re.max}`;
88
+ b += " ";
89
+ b += dumpRegexp(re.subs[0]);
90
+ break;
91
+ case Regexp.Op.CAPTURE:
92
+ if (re.name !== null && re.name.length > 0) {
93
+ b += re.name;
94
+ b += ":";
95
+ }
96
+ b += dumpRegexp(re.subs[0]);
97
+ break;
98
+ case Regexp.Op.CHAR_CLASS: {
99
+ let sep = "";
100
+ for (let i = 0; i < re.runes.length; i += 2) {
101
+ b += sep;
102
+ sep = " ";
103
+ let lo = re.runes[i];
104
+ let hi = re.runes[i + 1];
105
+ if (lo === hi) {
106
+ b += `0x${lo.toString(16)}`;
107
+ }
108
+ else {
109
+ b += `0x${lo.toString(16)}-0x${hi.toString(16)}`;
110
+ }
111
+ }
112
+ break;
113
+ }
114
+ }
115
+ b += "}";
116
+ return b;
117
+ };
118
+ export const mkCharClass = (f) => {
119
+ const re = new Regexp(Regexp.Op.CHAR_CLASS);
120
+ let runes = [];
121
+ let lo = -1;
122
+ for (let i = 0; i <= MAX_RUNE; i++) {
123
+ if (f(i)) {
124
+ if (lo < 0) {
125
+ lo = i;
126
+ }
127
+ }
128
+ else if (lo >= 0) {
129
+ runes = [...runes, lo, i - 1];
130
+ lo = -1;
131
+ }
132
+ }
133
+ if (lo >= 0) {
134
+ runes = [...runes, lo, MAX_RUNE];
135
+ }
136
+ re.runes = runes;
137
+ return dumpRegexp(re);
138
+ };