@bufbuild/re2 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +30 -0
  3. package/dist/cjs/CharClass.d.ts +30 -0
  4. package/dist/cjs/CharClass.js +284 -0
  5. package/dist/cjs/CharGroup.d.ts +8 -0
  6. package/dist/cjs/CharGroup.js +83 -0
  7. package/dist/cjs/Codepoint.d.ts +3 -0
  8. package/dist/cjs/Codepoint.js +62 -0
  9. package/dist/cjs/Compiler.d.ts +40 -0
  10. package/dist/cjs/Compiler.js +262 -0
  11. package/dist/cjs/DFA.d.ts +36 -0
  12. package/dist/cjs/DFA.js +350 -0
  13. package/dist/cjs/Inst.d.ts +26 -0
  14. package/dist/cjs/Inst.js +86 -0
  15. package/dist/cjs/MachineInput.d.ts +17 -0
  16. package/dist/cjs/MachineInput.js +72 -0
  17. package/dist/cjs/Parser.d.ts +111 -0
  18. package/dist/cjs/Parser.js +1538 -0
  19. package/dist/cjs/Prefilter.d.ts +19 -0
  20. package/dist/cjs/Prefilter.js +163 -0
  21. package/dist/cjs/Prog.d.ts +39 -0
  22. package/dist/cjs/Prog.js +154 -0
  23. package/dist/cjs/RE2.d.ts +27 -0
  24. package/dist/cjs/RE2.js +221 -0
  25. package/dist/cjs/RE2Flags.d.ts +16 -0
  26. package/dist/cjs/RE2Flags.js +58 -0
  27. package/dist/cjs/Regexp.d.ts +43 -0
  28. package/dist/cjs/Regexp.js +98 -0
  29. package/dist/cjs/Simplify.d.ts +3 -0
  30. package/dist/cjs/Simplify.js +230 -0
  31. package/dist/cjs/Unicode.d.ts +17 -0
  32. package/dist/cjs/Unicode.js +165 -0
  33. package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
  34. package/dist/cjs/UnicodeRangeTable.js +31 -0
  35. package/dist/cjs/UnicodeTables.d.ts +29 -0
  36. package/dist/cjs/UnicodeTables.js +571 -0
  37. package/dist/cjs/Utils.d.ts +22 -0
  38. package/dist/cjs/Utils.js +119 -0
  39. package/dist/cjs/__fixtures__/find.d.ts +9 -0
  40. package/dist/cjs/__fixtures__/find.js +115 -0
  41. package/dist/cjs/chars.d.ts +2 -0
  42. package/dist/cjs/chars.js +19 -0
  43. package/dist/cjs/exceptions.d.ts +55 -0
  44. package/dist/cjs/exceptions.js +94 -0
  45. package/dist/cjs/index.d.ts +102 -0
  46. package/dist/cjs/index.js +173 -0
  47. package/dist/cjs/package.json +1 -0
  48. package/dist/cjs/testParser.d.ts +3 -0
  49. package/dist/cjs/testParser.js +143 -0
  50. package/dist/esm/CharClass.d.ts +30 -0
  51. package/dist/esm/CharClass.js +281 -0
  52. package/dist/esm/CharGroup.d.ts +8 -0
  53. package/dist/esm/CharGroup.js +78 -0
  54. package/dist/esm/Codepoint.d.ts +3 -0
  55. package/dist/esm/Codepoint.js +59 -0
  56. package/dist/esm/Compiler.d.ts +40 -0
  57. package/dist/esm/Compiler.js +259 -0
  58. package/dist/esm/DFA.d.ts +36 -0
  59. package/dist/esm/DFA.js +347 -0
  60. package/dist/esm/Inst.d.ts +26 -0
  61. package/dist/esm/Inst.js +83 -0
  62. package/dist/esm/MachineInput.d.ts +17 -0
  63. package/dist/esm/MachineInput.js +68 -0
  64. package/dist/esm/Parser.d.ts +111 -0
  65. package/dist/esm/Parser.js +1535 -0
  66. package/dist/esm/Prefilter.d.ts +19 -0
  67. package/dist/esm/Prefilter.js +159 -0
  68. package/dist/esm/Prog.d.ts +39 -0
  69. package/dist/esm/Prog.js +150 -0
  70. package/dist/esm/RE2.d.ts +27 -0
  71. package/dist/esm/RE2.js +218 -0
  72. package/dist/esm/RE2Flags.d.ts +16 -0
  73. package/dist/esm/RE2Flags.js +41 -0
  74. package/dist/esm/Regexp.d.ts +43 -0
  75. package/dist/esm/Regexp.js +94 -0
  76. package/dist/esm/Simplify.d.ts +3 -0
  77. package/dist/esm/Simplify.js +228 -0
  78. package/dist/esm/Unicode.d.ts +17 -0
  79. package/dist/esm/Unicode.js +150 -0
  80. package/dist/esm/UnicodeRangeTable.d.ts +12 -0
  81. package/dist/esm/UnicodeRangeTable.js +28 -0
  82. package/dist/esm/UnicodeTables.d.ts +29 -0
  83. package/dist/esm/UnicodeTables.js +568 -0
  84. package/dist/esm/Utils.d.ts +22 -0
  85. package/dist/esm/Utils.js +103 -0
  86. package/dist/esm/__fixtures__/find.d.ts +9 -0
  87. package/dist/esm/__fixtures__/find.js +112 -0
  88. package/dist/esm/chars.d.ts +2 -0
  89. package/dist/esm/chars.js +14 -0
  90. package/dist/esm/exceptions.d.ts +55 -0
  91. package/dist/esm/exceptions.js +86 -0
  92. package/dist/esm/index.d.ts +102 -0
  93. package/dist/esm/index.js +163 -0
  94. package/dist/esm/testParser.d.ts +3 -0
  95. package/dist/esm/testParser.js +138 -0
  96. package/package.json +49 -0
@@ -0,0 +1,83 @@
1
+ import { FOLD_CASE } from "./RE2Flags.js";
2
+ import { equalsIgnoreCase } from "./Unicode.js";
3
+ /**
4
+ * A single instruction in the regular expression virtual machine.
5
+ *
6
+ * @see http://swtch.com/~rsc/regexp/regexp2.html
7
+ */
8
+ class Inst {
9
+ static ALT = 1;
10
+ static ALT_MATCH = 2;
11
+ static CAPTURE = 3;
12
+ static EMPTY_WIDTH = 4;
13
+ static FAIL = 5;
14
+ static MATCH = 6;
15
+ static NOP = 7;
16
+ static RUNE = 8;
17
+ static RUNE1 = 9;
18
+ static RUNE_ANY = 10;
19
+ static RUNE_ANY_NOT_NL = 11;
20
+ op;
21
+ out;
22
+ arg;
23
+ runes;
24
+ static isRuneOp(op) {
25
+ return Inst.RUNE <= op && op <= Inst.RUNE_ANY_NOT_NL;
26
+ }
27
+ constructor(op) {
28
+ this.op = op;
29
+ this.out = 0; // all but MATCH, FAIL
30
+ this.arg = 0; // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH
31
+ // length==1 => exact match
32
+ // otherwise a list of [lo,hi] pairs. hi is *inclusive*.
33
+ this.runes = [];
34
+ }
35
+ // MatchRune returns true if the instruction matches (and consumes) r.
36
+ // It should only be called when op is a rune op.
37
+ matchRune(r) {
38
+ // Special case: single-rune slice is from literal string, not char
39
+ // class.
40
+ if (this.runes.length === 1) {
41
+ const r0 = this.runes[0];
42
+ // If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
43
+ // Note that this may result in a case-folding loop when executed,
44
+ // so attempt to reduce the chance of that occurring
45
+ // by performing case folding on |r0| from the pattern rather than |r| from the input.
46
+ if ((this.arg & FOLD_CASE) !== 0) {
47
+ return equalsIgnoreCase(r0, r);
48
+ }
49
+ return r === r0;
50
+ }
51
+ const len = this.runes.length;
52
+ // If the array is exactly 2, 4, 6, or 8 items, DO NOT fall through to binary search
53
+ if (len === 2 || len === 4 || len === 6 || len === 8) {
54
+ for (let j = 0; j < len; j += 2) {
55
+ if (r < this.runes[j]) {
56
+ return false;
57
+ }
58
+ if (r <= this.runes[j + 1]) {
59
+ return true;
60
+ }
61
+ }
62
+ return false; // Stop here
63
+ }
64
+ // Otherwise binary search.
65
+ let lo = 0;
66
+ let hi = (this.runes.length / 2) | 0;
67
+ while (lo < hi) {
68
+ const m = (lo + hi) >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
69
+ const c = this.runes[2 * m];
70
+ if (c <= r) {
71
+ if (r <= this.runes[2 * m + 1]) {
72
+ return true;
73
+ }
74
+ lo = m + 1;
75
+ }
76
+ else {
77
+ hi = m;
78
+ }
79
+ }
80
+ return false;
81
+ }
82
+ }
83
+ export { Inst };
@@ -0,0 +1,17 @@
1
+ import type { Prefilter } from "./Prefilter.js";
2
+ import type { RE2 } from "./RE2.js";
3
+ declare class MachineUTF16Input {
4
+ charSequence: string;
5
+ start: number;
6
+ end: number;
7
+ constructor(charSequence: string, start?: number, end?: number);
8
+ static EOF(): number;
9
+ endPos(): number;
10
+ hasString(prefilter: Prefilter, pos: number): boolean;
11
+ step(pos: number): number;
12
+ index(re2: RE2, pos: number): number;
13
+ context(pos: number): number;
14
+ prefixLength(re2: RE2): number;
15
+ }
16
+ declare function fromUTF16(charSequence: string, start?: number, end?: number): MachineUTF16Input;
17
+ export { fromUTF16, MachineUTF16Input };
@@ -0,0 +1,68 @@
1
+ import { emptyOpContext } from "./Utils.js";
2
+ import { MAX_HIGH_SURROGATE, MAX_LOW_SURROGATE, MIN_HIGH_SURROGATE, MIN_LOW_SURROGATE, MIN_SUPPLEMENTARY_CODE_POINT, } from "./Unicode.js";
3
+ class MachineUTF16Input {
4
+ charSequence;
5
+ start;
6
+ end;
7
+ constructor(charSequence, start = 0, end = charSequence.length) {
8
+ this.charSequence = charSequence;
9
+ this.start = start;
10
+ this.end = end;
11
+ }
12
+ static EOF() {
13
+ return -1 << 3;
14
+ }
15
+ endPos() {
16
+ return this.end;
17
+ }
18
+ hasString(prefilter, pos) {
19
+ const idx = this.charSequence.indexOf(prefilter.str, this.start + pos);
20
+ return idx !== -1 && idx <= this.end - prefilter.str.length;
21
+ }
22
+ step(pos) {
23
+ pos += this.start;
24
+ if (pos >= this.end) {
25
+ return MachineUTF16Input.EOF();
26
+ }
27
+ const c1 = this.charSequence.charCodeAt(pos);
28
+ // Fast path: standard BMP character (not a high surrogate)
29
+ if (c1 < MIN_HIGH_SURROGATE ||
30
+ c1 > MAX_HIGH_SURROGATE ||
31
+ pos + 1 >= this.end) {
32
+ return (c1 << 3) | 1;
33
+ }
34
+ // Slow path: Calculate surrogate pair manually
35
+ const c2 = this.charSequence.charCodeAt(pos + 1);
36
+ if (c2 >= MIN_LOW_SURROGATE && c2 <= MAX_LOW_SURROGATE) {
37
+ const rune = (c1 - MIN_HIGH_SURROGATE) * 0x400 +
38
+ (c2 - MIN_LOW_SURROGATE) +
39
+ MIN_SUPPLEMENTARY_CODE_POINT;
40
+ return (rune << 3) | 2;
41
+ }
42
+ // Invalid surrogate pair fallback
43
+ return (c1 << 3) | 1;
44
+ }
45
+ index(re2, pos) {
46
+ pos += this.start;
47
+ const i = this.charSequence.indexOf(re2.prefix, pos);
48
+ return i < 0 ? i : i - pos;
49
+ }
50
+ context(pos) {
51
+ pos += this.start;
52
+ const r1 = pos > 0 && pos <= this.charSequence.length
53
+ ? this.charSequence.codePointAt(pos - 1)
54
+ : -1;
55
+ const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
56
+ if (r1 === undefined || r2 === undefined) {
57
+ throw new Error("invalid state");
58
+ }
59
+ return emptyOpContext(r1, r2);
60
+ }
61
+ prefixLength(re2) {
62
+ return re2.prefix.length;
63
+ }
64
+ }
65
+ function fromUTF16(charSequence, start = 0, end = charSequence.length) {
66
+ return new MachineUTF16Input(charSequence, start, end);
67
+ }
68
+ export { fromUTF16, MachineUTF16Input };
@@ -0,0 +1,111 @@
1
+ import { UnicodeRangeTable } from "./UnicodeRangeTable.js";
2
+ import { CharClass } from "./CharClass.js";
3
+ import { Regexp } from "./Regexp.js";
4
+ declare class StringIterator {
5
+ str: string;
6
+ position: number;
7
+ constructor(str: string);
8
+ pos(): number;
9
+ rewindTo(pos: number): void;
10
+ more(): boolean;
11
+ peek(): number;
12
+ skip(n: number): void;
13
+ skipString(s: string): void;
14
+ pop(): number;
15
+ lookingAt(s: string): boolean;
16
+ rest(): string;
17
+ from(beforePos: number): string;
18
+ toString(): string;
19
+ }
20
+ /**
21
+ * A parser of regular expression patterns.
22
+ *
23
+ * The only public entry point is {@link #parse(String pattern, int flags)}.
24
+ */
25
+ declare class Parser {
26
+ static ERR_INVALID_CHAR_RANGE: string;
27
+ static ERR_INVALID_ESCAPE: string;
28
+ static ERR_INVALID_NAMED_CAPTURE: string;
29
+ static ERR_INVALID_PERL_OP: string;
30
+ static ERR_INVALID_REPEAT_OP: string;
31
+ static ERR_INVALID_REPEAT_SIZE: string;
32
+ static ERR_MISSING_BRACKET: string;
33
+ static ERR_MISSING_PAREN: string;
34
+ static ERR_MISSING_REPEAT_ARGUMENT: string;
35
+ static ERR_TRAILING_BACKSLASH: string;
36
+ static ERR_DUPLICATE_NAMED_CAPTURE: string;
37
+ static ERR_UNEXPECTED_PAREN: string;
38
+ static ERR_NESTING_DEPTH: string;
39
+ static ERR_LARGE: string;
40
+ static ERR_BAD_EXPRESSION: string;
41
+ static MAX_HEIGHT: number;
42
+ static MAX_SIZE: number;
43
+ static MAX_RUNES: number;
44
+ static ANY_TABLE: UnicodeRangeTable;
45
+ static ASCII_TABLE: UnicodeRangeTable;
46
+ static ASCII_FOLD_TABLE: UnicodeRangeTable;
47
+ static unicodeTable(name: string): {
48
+ tab: UnicodeRangeTable | null;
49
+ fold: UnicodeRangeTable | null;
50
+ sign: number;
51
+ } | null;
52
+ static minFoldRune(r: number): number;
53
+ static literalRegexp(s: string, flags: number): Regexp;
54
+ /**
55
+ * Parse regular expression pattern {@code pattern} with mode flags {@code flags}.
56
+ * @param {string} pattern
57
+ * @param {number} flags
58
+ */
59
+ static parse(pattern: string, flags: number): Regexp;
60
+ static parseRepeat(t: StringIterator): number;
61
+ static isValidCaptureName(name: string): boolean;
62
+ static parseInt(t: StringIterator): number;
63
+ static isCharClass(re: Regexp): boolean;
64
+ static matchRune(re: Regexp, r: number): boolean;
65
+ static mergeCharClass(dst: Regexp, src: Regexp): void;
66
+ static parseEscape(t: StringIterator): number;
67
+ static parseClassChar(t: StringIterator, wholeClassPos: number): number;
68
+ static concatRunes(x: number[], y: number[]): number[];
69
+ wholeRegexp: string;
70
+ flags: number;
71
+ numCap: number;
72
+ namedGroups: Map<string, number>;
73
+ stack: Regexp[];
74
+ free: Regexp | null;
75
+ numRegexp: number;
76
+ numRunes: number;
77
+ repeats: number;
78
+ height: Map<Regexp, number> | null;
79
+ size: Map<Regexp, number> | null;
80
+ constructor(wholeRegexp: string, flags?: number);
81
+ newRegexp(op: number): Regexp;
82
+ reuse(re: Regexp): void;
83
+ checkLimits(re: Regexp): void;
84
+ checkSize(re: Regexp): void;
85
+ calcSize(re: Regexp, force?: boolean): number;
86
+ checkHeight(re: Regexp): void;
87
+ calcHeight(re: Regexp, force?: boolean): number;
88
+ pop(): Regexp | undefined;
89
+ popToPseudo(): Regexp[];
90
+ push(re: Regexp): Regexp | null;
91
+ maybeConcat(r: number, flags: number): boolean;
92
+ newLiteral(r: number, flags: number): Regexp;
93
+ literal(r: number): void;
94
+ op(op: number): Regexp | null;
95
+ repeat(op: number, min: number, max: number, beforePos: number, t: StringIterator, lastRepeatPos: number): void;
96
+ repeatIsValid(re: Regexp, n: number): boolean;
97
+ concat(): Regexp | null;
98
+ alternate(): Regexp | null;
99
+ cleanAlt(re: Regexp): void;
100
+ collapse(subs: Regexp[], op: number): Regexp;
101
+ parseInternal(): Regexp;
102
+ parsePerlFlags(t: StringIterator): void;
103
+ parseVerticalBar(): void;
104
+ swapVerticalBar(): boolean;
105
+ parseRightParen(): void;
106
+ parsePerlClassEscape(t: StringIterator, cc: CharClass): boolean;
107
+ parseNamedClass(t: StringIterator, cc: CharClass): boolean;
108
+ parseUnicodeClass(t: StringIterator, cc: CharClass): boolean;
109
+ parseClass(t: StringIterator): void;
110
+ }
111
+ export { Parser };