@bufbuild/re2 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/dist/cjs/CharClass.d.ts +30 -0
- package/dist/cjs/CharClass.js +284 -0
- package/dist/cjs/CharGroup.d.ts +8 -0
- package/dist/cjs/CharGroup.js +83 -0
- package/dist/cjs/Codepoint.d.ts +3 -0
- package/dist/cjs/Codepoint.js +62 -0
- package/dist/cjs/Compiler.d.ts +40 -0
- package/dist/cjs/Compiler.js +262 -0
- package/dist/cjs/DFA.d.ts +36 -0
- package/dist/cjs/DFA.js +350 -0
- package/dist/cjs/Inst.d.ts +26 -0
- package/dist/cjs/Inst.js +86 -0
- package/dist/cjs/MachineInput.d.ts +17 -0
- package/dist/cjs/MachineInput.js +72 -0
- package/dist/cjs/Parser.d.ts +111 -0
- package/dist/cjs/Parser.js +1538 -0
- package/dist/cjs/Prefilter.d.ts +19 -0
- package/dist/cjs/Prefilter.js +163 -0
- package/dist/cjs/Prog.d.ts +39 -0
- package/dist/cjs/Prog.js +154 -0
- package/dist/cjs/RE2.d.ts +27 -0
- package/dist/cjs/RE2.js +221 -0
- package/dist/cjs/RE2Flags.d.ts +16 -0
- package/dist/cjs/RE2Flags.js +58 -0
- package/dist/cjs/Regexp.d.ts +43 -0
- package/dist/cjs/Regexp.js +98 -0
- package/dist/cjs/Simplify.d.ts +3 -0
- package/dist/cjs/Simplify.js +230 -0
- package/dist/cjs/Unicode.d.ts +17 -0
- package/dist/cjs/Unicode.js +165 -0
- package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
- package/dist/cjs/UnicodeRangeTable.js +31 -0
- package/dist/cjs/UnicodeTables.d.ts +29 -0
- package/dist/cjs/UnicodeTables.js +571 -0
- package/dist/cjs/Utils.d.ts +22 -0
- package/dist/cjs/Utils.js +119 -0
- package/dist/cjs/__fixtures__/find.d.ts +9 -0
- package/dist/cjs/__fixtures__/find.js +115 -0
- package/dist/cjs/chars.d.ts +2 -0
- package/dist/cjs/chars.js +19 -0
- package/dist/cjs/exceptions.d.ts +55 -0
- package/dist/cjs/exceptions.js +94 -0
- package/dist/cjs/index.d.ts +102 -0
- package/dist/cjs/index.js +173 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/testParser.d.ts +3 -0
- package/dist/cjs/testParser.js +143 -0
- package/dist/esm/CharClass.d.ts +30 -0
- package/dist/esm/CharClass.js +281 -0
- package/dist/esm/CharGroup.d.ts +8 -0
- package/dist/esm/CharGroup.js +78 -0
- package/dist/esm/Codepoint.d.ts +3 -0
- package/dist/esm/Codepoint.js +59 -0
- package/dist/esm/Compiler.d.ts +40 -0
- package/dist/esm/Compiler.js +259 -0
- package/dist/esm/DFA.d.ts +36 -0
- package/dist/esm/DFA.js +347 -0
- package/dist/esm/Inst.d.ts +26 -0
- package/dist/esm/Inst.js +83 -0
- package/dist/esm/MachineInput.d.ts +17 -0
- package/dist/esm/MachineInput.js +68 -0
- package/dist/esm/Parser.d.ts +111 -0
- package/dist/esm/Parser.js +1535 -0
- package/dist/esm/Prefilter.d.ts +19 -0
- package/dist/esm/Prefilter.js +159 -0
- package/dist/esm/Prog.d.ts +39 -0
- package/dist/esm/Prog.js +150 -0
- package/dist/esm/RE2.d.ts +27 -0
- package/dist/esm/RE2.js +218 -0
- package/dist/esm/RE2Flags.d.ts +16 -0
- package/dist/esm/RE2Flags.js +41 -0
- package/dist/esm/Regexp.d.ts +43 -0
- package/dist/esm/Regexp.js +94 -0
- package/dist/esm/Simplify.d.ts +3 -0
- package/dist/esm/Simplify.js +228 -0
- package/dist/esm/Unicode.d.ts +17 -0
- package/dist/esm/Unicode.js +150 -0
- package/dist/esm/UnicodeRangeTable.d.ts +12 -0
- package/dist/esm/UnicodeRangeTable.js +28 -0
- package/dist/esm/UnicodeTables.d.ts +29 -0
- package/dist/esm/UnicodeTables.js +568 -0
- package/dist/esm/Utils.d.ts +22 -0
- package/dist/esm/Utils.js +103 -0
- package/dist/esm/__fixtures__/find.d.ts +9 -0
- package/dist/esm/__fixtures__/find.js +112 -0
- package/dist/esm/chars.d.ts +2 -0
- package/dist/esm/chars.js +14 -0
- package/dist/esm/exceptions.d.ts +55 -0
- package/dist/esm/exceptions.js +86 -0
- package/dist/esm/index.d.ts +102 -0
- package/dist/esm/index.js +163 -0
- package/dist/esm/testParser.d.ts +3 -0
- package/dist/esm/testParser.js +138 -0
- package/package.json +49 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Alexey Vasiliev
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# @bufbuild/re2
|
|
2
|
+
|
|
3
|
+
This package provides an [RE2-compatible](https://cel.dev) regular expression engine, designed for use with CEL-es and Protovalidate-es.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```ts
|
|
8
|
+
import { RE2JS } from '@bufbuild/re2';
|
|
9
|
+
|
|
10
|
+
const re = new RE2JS('^foo');
|
|
11
|
+
console.log(re.test('foo')); // true
|
|
12
|
+
console.log(re.testExact('fooxyz')); // false
|
|
13
|
+
|
|
14
|
+
console.log(RE2JS.matches('^foo','foo')); // true
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Limitations
|
|
18
|
+
Only boolean matchers are supported: `test` and `testExact`.
|
|
19
|
+
|
|
20
|
+
The instance method `matches` is an alias for `testExact`. The static method `RE2JS.matches` compiles
|
|
21
|
+
a regular expression and calls `testExact`.
|
|
22
|
+
|
|
23
|
+
As a size optimization, Unicode category and script information is generated on first use. This causes a
|
|
24
|
+
slight slowdown the first time a pattern is compiled with a category or script (only the referenced category
|
|
25
|
+
or script is generated). The categories and scripts in Unicode version 16.0 are supported. This package includes
|
|
26
|
+
data to support Unicode version 16.0 on Unicode 15.0 or later.
|
|
27
|
+
|
|
28
|
+
## Credits
|
|
29
|
+
This code is a fork of the [RE2JS](https://re2js.leopard.in.ua) project. It has been converted to TypeScript and has a feature set tailored for
|
|
30
|
+
CEL and Protovalidate-es.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { UnicodeRangeTable } from "./UnicodeRangeTable.js";
|
|
2
|
+
import type { CharGroup } from "./CharGroup.js";
|
|
3
|
+
/**
|
|
4
|
+
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
5
|
+
* pairs of runes [lo, hi], each denoting an inclusive interval.
|
|
6
|
+
*
|
|
7
|
+
* All methods mutate the internal state and return {@code this}, allowing operations to be chained.
|
|
8
|
+
*/
|
|
9
|
+
declare class CharClass {
|
|
10
|
+
static cmp(array: number[], i: number, pivotFrom: number, pivotTo: number): number;
|
|
11
|
+
static qsortIntPair(array: number[], left: number, right: number): void;
|
|
12
|
+
r: number[];
|
|
13
|
+
len: number;
|
|
14
|
+
constructor(r?: number[]);
|
|
15
|
+
toArray(): number[];
|
|
16
|
+
cleanClass(): this;
|
|
17
|
+
appendLiteral(x: number, flags: number): this;
|
|
18
|
+
appendRange(lo: number, hi: number): this;
|
|
19
|
+
appendFoldedRange(lo: number, hi: number): this;
|
|
20
|
+
appendClass(x: number[]): this;
|
|
21
|
+
appendFoldedClass(x: number[]): this;
|
|
22
|
+
appendNegatedClass(x: number[]): this;
|
|
23
|
+
appendTable(table: UnicodeRangeTable): this;
|
|
24
|
+
appendNegatedTable(table: UnicodeRangeTable): this;
|
|
25
|
+
appendTableWithSign(table: UnicodeRangeTable, sign: number): this;
|
|
26
|
+
negateClass(): this;
|
|
27
|
+
appendClassWithSign(x: number[], sign: number): this;
|
|
28
|
+
appendGroup(g: CharGroup, foldCase: boolean): this;
|
|
29
|
+
}
|
|
30
|
+
export { CharClass };
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CharClass = void 0;
|
|
4
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
5
|
+
const Unicode_js_1 = require("./Unicode.js");
|
|
6
|
+
const Utils_js_1 = require("./Utils.js");
|
|
7
|
+
/**
|
|
8
|
+
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
9
|
+
* pairs of runes [lo, hi], each denoting an inclusive interval.
|
|
10
|
+
*
|
|
11
|
+
* All methods mutate the internal state and return {@code this}, allowing operations to be chained.
|
|
12
|
+
*/
|
|
13
|
+
class CharClass {
|
|
14
|
+
// cmp() returns the ordering of the pair (a[i], a[i+1]) relative to
|
|
15
|
+
// (pivotFrom, pivotTo), where the first component of the pair (lo) is
|
|
16
|
+
// ordered naturally and the second component (hi) is in reverse order.
|
|
17
|
+
static cmp(array, i, pivotFrom, pivotTo) {
|
|
18
|
+
const cmp = array[i] - pivotFrom;
|
|
19
|
+
return cmp !== 0 ? cmp : pivotTo - array[i + 1];
|
|
20
|
+
}
|
|
21
|
+
// qsortIntPair() quicksorts pairs of ints in |array| according to lt().
|
|
22
|
+
// Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
|
|
23
|
+
static qsortIntPair(array, left, right) {
|
|
24
|
+
const pivotIndex = (((left + right) / 2) | 0) & ~1;
|
|
25
|
+
const pivotFrom = array[pivotIndex];
|
|
26
|
+
const pivotTo = array[pivotIndex + 1];
|
|
27
|
+
let i = left;
|
|
28
|
+
let j = right;
|
|
29
|
+
while (i <= j) {
|
|
30
|
+
while (i < right && CharClass.cmp(array, i, pivotFrom, pivotTo) < 0) {
|
|
31
|
+
i += 2;
|
|
32
|
+
}
|
|
33
|
+
while (j > left && CharClass.cmp(array, j, pivotFrom, pivotTo) > 0) {
|
|
34
|
+
j -= 2;
|
|
35
|
+
}
|
|
36
|
+
if (i <= j) {
|
|
37
|
+
if (i !== j) {
|
|
38
|
+
let temp = array[i];
|
|
39
|
+
array[i] = array[j];
|
|
40
|
+
array[j] = temp;
|
|
41
|
+
temp = array[i + 1];
|
|
42
|
+
array[i + 1] = array[j + 1];
|
|
43
|
+
array[j + 1] = temp;
|
|
44
|
+
}
|
|
45
|
+
i += 2;
|
|
46
|
+
j -= 2;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (left < j) {
|
|
50
|
+
CharClass.qsortIntPair(array, left, j);
|
|
51
|
+
}
|
|
52
|
+
if (i < right) {
|
|
53
|
+
CharClass.qsortIntPair(array, i, right);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
r;
|
|
57
|
+
len;
|
|
58
|
+
constructor(r = (0, Utils_js_1.emptyInts)()) {
|
|
59
|
+
this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
|
|
60
|
+
this.len = r.length; // prefix of |r| that is defined. Even.
|
|
61
|
+
}
|
|
62
|
+
// Returns the character class as an int array. Subsequent CharClass
|
|
63
|
+
// operations may mutate this array, so typically this is the last operation
|
|
64
|
+
// performed on a given CharClass instance.
|
|
65
|
+
toArray() {
|
|
66
|
+
if (this.len === this.r.length) {
|
|
67
|
+
return this.r;
|
|
68
|
+
}
|
|
69
|
+
return this.r.slice(0, this.len);
|
|
70
|
+
}
|
|
71
|
+
// cleanClass() sorts the ranges (pairs of elements) of this CharClass,
|
|
72
|
+
// merges them, and eliminates duplicates.
|
|
73
|
+
cleanClass() {
|
|
74
|
+
if (this.len < 4) {
|
|
75
|
+
return this;
|
|
76
|
+
}
|
|
77
|
+
// Sort by lo increasing, hi decreasing to break ties.
|
|
78
|
+
CharClass.qsortIntPair(this.r, 0, this.len - 2);
|
|
79
|
+
// Merge abutting, overlapping.
|
|
80
|
+
let w = 2; // write index
|
|
81
|
+
for (let i = 2; i < this.len; i += 2) {
|
|
82
|
+
{
|
|
83
|
+
const lo = this.r[i];
|
|
84
|
+
const hi = this.r[i + 1];
|
|
85
|
+
if (lo <= this.r[w - 1] + 1) {
|
|
86
|
+
// merge with previous range
|
|
87
|
+
if (hi > this.r[w - 1]) {
|
|
88
|
+
this.r[w - 1] = hi;
|
|
89
|
+
}
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
// new disjoint range
|
|
93
|
+
this.r[w] = lo;
|
|
94
|
+
this.r[w + 1] = hi;
|
|
95
|
+
w += 2;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
this.len = w;
|
|
99
|
+
return this;
|
|
100
|
+
}
|
|
101
|
+
// appendLiteral() appends the literal |x| to this CharClass.
|
|
102
|
+
appendLiteral(x, flags) {
|
|
103
|
+
return (flags & RE2Flags_js_1.FOLD_CASE) !== 0
|
|
104
|
+
? this.appendFoldedRange(x, x)
|
|
105
|
+
: this.appendRange(x, x);
|
|
106
|
+
}
|
|
107
|
+
// appendRange() appends the range [lo-hi] (inclusive) to this CharClass.
|
|
108
|
+
appendRange(lo, hi) {
|
|
109
|
+
// Expand last range or next to last range if it overlaps or abuts.
|
|
110
|
+
// Checking two ranges helps when appending case-folded
|
|
111
|
+
// alphabets, so that one range can be expanding A-Z and the
|
|
112
|
+
// other expanding a-z.
|
|
113
|
+
if (this.len > 0) {
|
|
114
|
+
for (let i = 2; i <= 4; i += 2) {
|
|
115
|
+
// twice, using i=2, i=4
|
|
116
|
+
if (this.len >= i) {
|
|
117
|
+
const rlo = this.r[this.len - i];
|
|
118
|
+
const rhi = this.r[this.len - i + 1];
|
|
119
|
+
if (lo <= rhi + 1 && rlo <= hi + 1) {
|
|
120
|
+
if (lo < rlo) {
|
|
121
|
+
this.r[this.len - i] = lo;
|
|
122
|
+
}
|
|
123
|
+
if (hi > rhi) {
|
|
124
|
+
this.r[this.len - i + 1] = hi;
|
|
125
|
+
}
|
|
126
|
+
return this;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
this.r[this.len++] = lo;
|
|
132
|
+
this.r[this.len++] = hi;
|
|
133
|
+
return this;
|
|
134
|
+
}
|
|
135
|
+
// appendFoldedRange() appends the range [lo-hi] and its case
|
|
136
|
+
// folding-equivalent runes to this CharClass.
|
|
137
|
+
appendFoldedRange(lo, hi) {
|
|
138
|
+
// Optimizations.
|
|
139
|
+
if (lo <= Unicode_js_1.MIN_FOLD && hi >= Unicode_js_1.MAX_FOLD) {
|
|
140
|
+
// Range is full: folding can't add more.
|
|
141
|
+
return this.appendRange(lo, hi);
|
|
142
|
+
}
|
|
143
|
+
if (hi < Unicode_js_1.MIN_FOLD || lo > Unicode_js_1.MAX_FOLD) {
|
|
144
|
+
// Range is outside folding possibilities.
|
|
145
|
+
return this.appendRange(lo, hi);
|
|
146
|
+
}
|
|
147
|
+
if (lo < Unicode_js_1.MIN_FOLD) {
|
|
148
|
+
// [lo, minFold-1] needs no folding.
|
|
149
|
+
this.appendRange(lo, Unicode_js_1.MIN_FOLD - 1);
|
|
150
|
+
lo = Unicode_js_1.MIN_FOLD;
|
|
151
|
+
}
|
|
152
|
+
if (hi > Unicode_js_1.MAX_FOLD) {
|
|
153
|
+
// [maxFold+1, hi] needs no folding.
|
|
154
|
+
this.appendRange(Unicode_js_1.MAX_FOLD + 1, hi);
|
|
155
|
+
hi = Unicode_js_1.MAX_FOLD;
|
|
156
|
+
}
|
|
157
|
+
// Brute force. Depend on appendRange to coalesce ranges on the fly.
|
|
158
|
+
for (let c = lo; c <= hi; c++) {
|
|
159
|
+
this.appendRange(c, c);
|
|
160
|
+
for (let f = (0, Unicode_js_1.simpleFold)(c); f !== c; f = (0, Unicode_js_1.simpleFold)(f)) {
|
|
161
|
+
this.appendRange(f, f);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return this;
|
|
165
|
+
}
|
|
166
|
+
// appendClass() appends the class |x| to this CharClass.
|
|
167
|
+
// It assumes |x| is clean. Does not mutate |x|.
|
|
168
|
+
appendClass(x) {
|
|
169
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
170
|
+
this.appendRange(x[i], x[i + 1]);
|
|
171
|
+
}
|
|
172
|
+
return this;
|
|
173
|
+
}
|
|
174
|
+
// appendFoldedClass() appends the case folding of the class |x| to this
|
|
175
|
+
// CharClass. Does not mutate |x|.
|
|
176
|
+
appendFoldedClass(x) {
|
|
177
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
178
|
+
this.appendFoldedRange(x[i], x[i + 1]);
|
|
179
|
+
}
|
|
180
|
+
return this;
|
|
181
|
+
}
|
|
182
|
+
// appendNegatedClass() append the negation of the class |x| to this
|
|
183
|
+
// CharClass. It assumes |x| is clean. Does not mutate |x|.
|
|
184
|
+
appendNegatedClass(x) {
|
|
185
|
+
let nextLo = 0;
|
|
186
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
187
|
+
const lo = x[i];
|
|
188
|
+
const hi = x[i + 1];
|
|
189
|
+
if (nextLo <= lo - 1) {
|
|
190
|
+
this.appendRange(nextLo, lo - 1);
|
|
191
|
+
}
|
|
192
|
+
nextLo = hi + 1;
|
|
193
|
+
}
|
|
194
|
+
if (nextLo <= Unicode_js_1.MAX_RUNE) {
|
|
195
|
+
this.appendRange(nextLo, Unicode_js_1.MAX_RUNE);
|
|
196
|
+
}
|
|
197
|
+
return this;
|
|
198
|
+
}
|
|
199
|
+
// appendTable() appends the Unicode range table |table| to this CharClass.
|
|
200
|
+
// Does not mutate |table|.
|
|
201
|
+
appendTable(table) {
|
|
202
|
+
for (let i = 0; i < table.length; ++i) {
|
|
203
|
+
const lo = table.getLo(i);
|
|
204
|
+
const hi = table.getHi(i);
|
|
205
|
+
const stride = table.getStride(i);
|
|
206
|
+
if (stride === 1) {
|
|
207
|
+
this.appendRange(lo, hi);
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
for (let c = lo; c <= hi; c += stride) {
|
|
211
|
+
this.appendRange(c, c);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return this;
|
|
215
|
+
}
|
|
216
|
+
// appendNegatedTable() returns the result of appending the negation of range
|
|
217
|
+
// table |table| to this CharClass. Does not mutate |table|.
|
|
218
|
+
appendNegatedTable(table) {
|
|
219
|
+
let nextLo = 0;
|
|
220
|
+
for (let i = 0; i < table.length; ++i) {
|
|
221
|
+
const lo = table.getLo(i);
|
|
222
|
+
const hi = table.getHi(i);
|
|
223
|
+
const stride = table.getStride(i);
|
|
224
|
+
if (stride === 1) {
|
|
225
|
+
if (nextLo <= lo - 1) {
|
|
226
|
+
this.appendRange(nextLo, lo - 1);
|
|
227
|
+
}
|
|
228
|
+
nextLo = hi + 1;
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
for (let c = lo; c <= hi; c += stride) {
|
|
232
|
+
if (nextLo <= c - 1) {
|
|
233
|
+
this.appendRange(nextLo, c - 1);
|
|
234
|
+
}
|
|
235
|
+
nextLo = c + 1;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
if (nextLo <= Unicode_js_1.MAX_RUNE) {
|
|
239
|
+
this.appendRange(nextLo, Unicode_js_1.MAX_RUNE);
|
|
240
|
+
}
|
|
241
|
+
return this;
|
|
242
|
+
}
|
|
243
|
+
// appendTableWithSign() calls append{,Negated}Table depending on sign.
|
|
244
|
+
// Does not mutate |table|.
|
|
245
|
+
appendTableWithSign(table, sign) {
|
|
246
|
+
return sign < 0 ? this.appendNegatedTable(table) : this.appendTable(table);
|
|
247
|
+
}
|
|
248
|
+
// negateClass() negates this CharClass, which must already be clean.
|
|
249
|
+
negateClass() {
|
|
250
|
+
let nextLo = 0; // lo end of next class to add
|
|
251
|
+
let w = 0; // write index
|
|
252
|
+
for (let i = 0; i < this.len; i += 2) {
|
|
253
|
+
const lo = this.r[i];
|
|
254
|
+
const hi = this.r[i + 1];
|
|
255
|
+
if (nextLo <= lo - 1) {
|
|
256
|
+
this.r[w] = nextLo;
|
|
257
|
+
this.r[w + 1] = lo - 1;
|
|
258
|
+
w += 2;
|
|
259
|
+
}
|
|
260
|
+
nextLo = hi + 1;
|
|
261
|
+
}
|
|
262
|
+
this.len = w;
|
|
263
|
+
if (nextLo <= Unicode_js_1.MAX_RUNE) {
|
|
264
|
+
this.r[this.len++] = nextLo;
|
|
265
|
+
this.r[this.len++] = Unicode_js_1.MAX_RUNE;
|
|
266
|
+
}
|
|
267
|
+
return this;
|
|
268
|
+
}
|
|
269
|
+
// appendClassWithSign() calls appendClass() if sign is +1 or
|
|
270
|
+
// appendNegatedClass if sign is -1. Does not mutate |x|.
|
|
271
|
+
appendClassWithSign(x, sign) {
|
|
272
|
+
return sign < 0 ? this.appendNegatedClass(x) : this.appendClass(x);
|
|
273
|
+
}
|
|
274
|
+
// appendGroup() appends CharGroup |g| to this CharClass, folding iff
|
|
275
|
+
// |foldCase|. Does not mutate |g|.
|
|
276
|
+
appendGroup(g, foldCase) {
|
|
277
|
+
let cls = g.cls;
|
|
278
|
+
if (foldCase) {
|
|
279
|
+
cls = new CharClass().appendFoldedClass(cls).cleanClass().toArray();
|
|
280
|
+
}
|
|
281
|
+
return this.appendClassWithSign(cls, g.sign);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
exports.CharClass = CharClass;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
declare class CharGroup {
|
|
2
|
+
sign: number;
|
|
3
|
+
cls: number[];
|
|
4
|
+
constructor(sign: number, cls: number[]);
|
|
5
|
+
}
|
|
6
|
+
declare const getPerlGroups: () => Map<string, CharGroup>;
|
|
7
|
+
declare const getPosixGroups: () => Map<string, CharGroup>;
|
|
8
|
+
export { CharGroup, getPerlGroups, getPosixGroups };
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// GENERATED BY tools/scripts/make_perl_groups.pl; DO NOT EDIT.
|
|
3
|
+
// ./tools/scripts/make_perl_groups.pl > src/CharGroup.js
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.getPosixGroups = exports.getPerlGroups = exports.CharGroup = void 0;
|
|
6
|
+
class CharGroup {
|
|
7
|
+
sign;
|
|
8
|
+
cls;
|
|
9
|
+
constructor(sign, cls) {
|
|
10
|
+
this.sign = sign;
|
|
11
|
+
this.cls = cls;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
exports.CharGroup = CharGroup;
|
|
15
|
+
const code1 = [0x30, 0x39];
|
|
16
|
+
const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
|
|
17
|
+
const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
18
|
+
let _PERL_GROUPS = null;
|
|
19
|
+
const getPerlGroups = () => {
|
|
20
|
+
if (!_PERL_GROUPS) {
|
|
21
|
+
_PERL_GROUPS = new Map([
|
|
22
|
+
["\\d", new CharGroup(+1, code1)],
|
|
23
|
+
["\\D", new CharGroup(-1, code1)],
|
|
24
|
+
["\\s", new CharGroup(+1, code2)],
|
|
25
|
+
["\\S", new CharGroup(-1, code2)],
|
|
26
|
+
["\\w", new CharGroup(+1, code3)],
|
|
27
|
+
["\\W", new CharGroup(-1, code3)],
|
|
28
|
+
]);
|
|
29
|
+
}
|
|
30
|
+
return _PERL_GROUPS;
|
|
31
|
+
};
|
|
32
|
+
exports.getPerlGroups = getPerlGroups;
|
|
33
|
+
const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
|
|
34
|
+
const code5 = [0x41, 0x5a, 0x61, 0x7a];
|
|
35
|
+
const code6 = [0x0, 0x7f];
|
|
36
|
+
const code7 = [0x9, 0x9, 0x20, 0x20];
|
|
37
|
+
const code8 = [0x0, 0x1f, 0x7f, 0x7f];
|
|
38
|
+
const code9 = [0x30, 0x39];
|
|
39
|
+
const code10 = [0x21, 0x7e];
|
|
40
|
+
const code11 = [0x61, 0x7a];
|
|
41
|
+
const code12 = [0x20, 0x7e];
|
|
42
|
+
const code13 = [0x21, 0x2f, 0x3a, 0x40, 0x5b, 0x60, 0x7b, 0x7e];
|
|
43
|
+
const code14 = [0x9, 0xd, 0x20, 0x20];
|
|
44
|
+
const code15 = [0x41, 0x5a];
|
|
45
|
+
const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
46
|
+
const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
|
|
47
|
+
let _POSIX_GROUPS = null;
|
|
48
|
+
const getPosixGroups = () => {
|
|
49
|
+
if (!_POSIX_GROUPS) {
|
|
50
|
+
_POSIX_GROUPS = new Map([
|
|
51
|
+
["[:alnum:]", new CharGroup(+1, code4)],
|
|
52
|
+
["[:^alnum:]", new CharGroup(-1, code4)],
|
|
53
|
+
["[:alpha:]", new CharGroup(+1, code5)],
|
|
54
|
+
["[:^alpha:]", new CharGroup(-1, code5)],
|
|
55
|
+
["[:ascii:]", new CharGroup(+1, code6)],
|
|
56
|
+
["[:^ascii:]", new CharGroup(-1, code6)],
|
|
57
|
+
["[:blank:]", new CharGroup(+1, code7)],
|
|
58
|
+
["[:^blank:]", new CharGroup(-1, code7)],
|
|
59
|
+
["[:cntrl:]", new CharGroup(+1, code8)],
|
|
60
|
+
["[:^cntrl:]", new CharGroup(-1, code8)],
|
|
61
|
+
["[:digit:]", new CharGroup(+1, code9)],
|
|
62
|
+
["[:^digit:]", new CharGroup(-1, code9)],
|
|
63
|
+
["[:graph:]", new CharGroup(+1, code10)],
|
|
64
|
+
["[:^graph:]", new CharGroup(-1, code10)],
|
|
65
|
+
["[:lower:]", new CharGroup(+1, code11)],
|
|
66
|
+
["[:^lower:]", new CharGroup(-1, code11)],
|
|
67
|
+
["[:print:]", new CharGroup(+1, code12)],
|
|
68
|
+
["[:^print:]", new CharGroup(-1, code12)],
|
|
69
|
+
["[:punct:]", new CharGroup(+1, code13)],
|
|
70
|
+
["[:^punct:]", new CharGroup(-1, code13)],
|
|
71
|
+
["[:space:]", new CharGroup(+1, code14)],
|
|
72
|
+
["[:^space:]", new CharGroup(-1, code14)],
|
|
73
|
+
["[:upper:]", new CharGroup(+1, code15)],
|
|
74
|
+
["[:^upper:]", new CharGroup(-1, code15)],
|
|
75
|
+
["[:word:]", new CharGroup(+1, code16)],
|
|
76
|
+
["[:^word:]", new CharGroup(-1, code16)],
|
|
77
|
+
["[:xdigit:]", new CharGroup(+1, code17)],
|
|
78
|
+
["[:^xdigit:]", new CharGroup(-1, code17)],
|
|
79
|
+
]);
|
|
80
|
+
}
|
|
81
|
+
return _POSIX_GROUPS;
|
|
82
|
+
};
|
|
83
|
+
exports.getPosixGroups = getPosixGroups;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.toUpperCase = toUpperCase;
|
|
4
|
+
exports.toLowerCase = toLowerCase;
|
|
5
|
+
/**
|
|
6
|
+
* Various constants and helper for unicode codepoints.
|
|
7
|
+
*/
|
|
8
|
+
const ASCII_SIZE = 128;
|
|
9
|
+
let _ASCII_TO_UPPER = null;
|
|
10
|
+
let _ASCII_TO_LOWER = null;
|
|
11
|
+
const getAsciiToUpper = () => {
|
|
12
|
+
if (!_ASCII_TO_UPPER) {
|
|
13
|
+
_ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
|
|
14
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
15
|
+
_ASCII_TO_UPPER[i] = i >= 97 && i <= 122 ? i - 32 : i;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return _ASCII_TO_UPPER;
|
|
19
|
+
};
|
|
20
|
+
const getAsciiToLower = () => {
|
|
21
|
+
if (!_ASCII_TO_LOWER) {
|
|
22
|
+
_ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
|
|
23
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
24
|
+
_ASCII_TO_LOWER[i] = i >= 65 && i <= 90 ? i + 32 : i;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return _ASCII_TO_LOWER;
|
|
28
|
+
};
|
|
29
|
+
function toUpperCase(codepoint) {
|
|
30
|
+
if (codepoint < ASCII_SIZE)
|
|
31
|
+
return getAsciiToUpper()[codepoint];
|
|
32
|
+
const s = String.fromCodePoint(codepoint).toUpperCase();
|
|
33
|
+
if (s.length > 1) {
|
|
34
|
+
return codepoint;
|
|
35
|
+
}
|
|
36
|
+
const cp = s.codePointAt(0);
|
|
37
|
+
if (cp === undefined) {
|
|
38
|
+
return codepoint;
|
|
39
|
+
}
|
|
40
|
+
const sOrigin = String.fromCodePoint(cp).toLowerCase();
|
|
41
|
+
if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
|
|
42
|
+
return codepoint;
|
|
43
|
+
}
|
|
44
|
+
return cp;
|
|
45
|
+
}
|
|
46
|
+
function toLowerCase(codepoint) {
|
|
47
|
+
if (codepoint < ASCII_SIZE)
|
|
48
|
+
return getAsciiToLower()[codepoint];
|
|
49
|
+
const s = String.fromCodePoint(codepoint).toLowerCase();
|
|
50
|
+
if (s.length > 1) {
|
|
51
|
+
return codepoint;
|
|
52
|
+
}
|
|
53
|
+
const cp = s.codePointAt(0);
|
|
54
|
+
if (cp === undefined) {
|
|
55
|
+
return codepoint;
|
|
56
|
+
}
|
|
57
|
+
const sOrigin = String.fromCodePoint(cp).toUpperCase();
|
|
58
|
+
if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
|
|
59
|
+
return codepoint;
|
|
60
|
+
}
|
|
61
|
+
return cp;
|
|
62
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { Regexp } from "./Regexp.js";
|
|
2
|
+
import { Prog, PatchList } from "./Prog.js";
|
|
3
|
+
/**
|
|
4
|
+
* A fragment of a compiled regular expression program.
|
|
5
|
+
*
|
|
6
|
+
* @see http://swtch.com/~rsc/regexp/regexp1.html
|
|
7
|
+
* @class
|
|
8
|
+
*/
|
|
9
|
+
declare class Frag {
|
|
10
|
+
i: number;
|
|
11
|
+
out: PatchList;
|
|
12
|
+
nullable: boolean;
|
|
13
|
+
constructor(i?: number, out?: PatchList, nullable?: boolean);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Compiler from {@code Regexp} (RE2 abstract syntax) to {@code RE2} (compiled regular expression).
|
|
17
|
+
*
|
|
18
|
+
* The only entry point is {@link #compileRegexp}.
|
|
19
|
+
*/
|
|
20
|
+
declare class Compiler {
|
|
21
|
+
prog: Prog;
|
|
22
|
+
static ANY_RUNE_NOT_NL(): number[];
|
|
23
|
+
static ANY_RUNE(): number[];
|
|
24
|
+
static compileRegexp(re: Regexp): Prog;
|
|
25
|
+
constructor();
|
|
26
|
+
newInst(op: number): Frag;
|
|
27
|
+
nop(): Frag;
|
|
28
|
+
fail(): Frag;
|
|
29
|
+
cap(arg: number): Frag;
|
|
30
|
+
cat(f1: Frag, f2: Frag): Frag;
|
|
31
|
+
alt(f1: Frag, f2: Frag): Frag;
|
|
32
|
+
loop(f1: Frag, nongreedy: boolean): Frag;
|
|
33
|
+
quest(f1: Frag, nongreedy: boolean): Frag;
|
|
34
|
+
star(f1: Frag, nongreedy: boolean): Frag;
|
|
35
|
+
plus(f1: Frag, nongreedy: boolean): Frag;
|
|
36
|
+
empty(op: number): Frag;
|
|
37
|
+
rune(runes: number[], flags: number): Frag;
|
|
38
|
+
compile(re: Regexp): Frag;
|
|
39
|
+
}
|
|
40
|
+
export { Compiler };
|