@bufbuild/re2 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/dist/cjs/CharClass.d.ts +30 -0
- package/dist/cjs/CharClass.js +284 -0
- package/dist/cjs/CharGroup.d.ts +8 -0
- package/dist/cjs/CharGroup.js +83 -0
- package/dist/cjs/Codepoint.d.ts +3 -0
- package/dist/cjs/Codepoint.js +62 -0
- package/dist/cjs/Compiler.d.ts +40 -0
- package/dist/cjs/Compiler.js +262 -0
- package/dist/cjs/DFA.d.ts +36 -0
- package/dist/cjs/DFA.js +350 -0
- package/dist/cjs/Inst.d.ts +26 -0
- package/dist/cjs/Inst.js +86 -0
- package/dist/cjs/MachineInput.d.ts +17 -0
- package/dist/cjs/MachineInput.js +72 -0
- package/dist/cjs/Parser.d.ts +111 -0
- package/dist/cjs/Parser.js +1538 -0
- package/dist/cjs/Prefilter.d.ts +19 -0
- package/dist/cjs/Prefilter.js +163 -0
- package/dist/cjs/Prog.d.ts +39 -0
- package/dist/cjs/Prog.js +154 -0
- package/dist/cjs/RE2.d.ts +27 -0
- package/dist/cjs/RE2.js +221 -0
- package/dist/cjs/RE2Flags.d.ts +16 -0
- package/dist/cjs/RE2Flags.js +58 -0
- package/dist/cjs/Regexp.d.ts +43 -0
- package/dist/cjs/Regexp.js +98 -0
- package/dist/cjs/Simplify.d.ts +3 -0
- package/dist/cjs/Simplify.js +230 -0
- package/dist/cjs/Unicode.d.ts +17 -0
- package/dist/cjs/Unicode.js +165 -0
- package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
- package/dist/cjs/UnicodeRangeTable.js +31 -0
- package/dist/cjs/UnicodeTables.d.ts +29 -0
- package/dist/cjs/UnicodeTables.js +571 -0
- package/dist/cjs/Utils.d.ts +22 -0
- package/dist/cjs/Utils.js +119 -0
- package/dist/cjs/__fixtures__/find.d.ts +9 -0
- package/dist/cjs/__fixtures__/find.js +115 -0
- package/dist/cjs/chars.d.ts +2 -0
- package/dist/cjs/chars.js +19 -0
- package/dist/cjs/exceptions.d.ts +55 -0
- package/dist/cjs/exceptions.js +94 -0
- package/dist/cjs/index.d.ts +102 -0
- package/dist/cjs/index.js +173 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/testParser.d.ts +3 -0
- package/dist/cjs/testParser.js +143 -0
- package/dist/esm/CharClass.d.ts +30 -0
- package/dist/esm/CharClass.js +281 -0
- package/dist/esm/CharGroup.d.ts +8 -0
- package/dist/esm/CharGroup.js +78 -0
- package/dist/esm/Codepoint.d.ts +3 -0
- package/dist/esm/Codepoint.js +59 -0
- package/dist/esm/Compiler.d.ts +40 -0
- package/dist/esm/Compiler.js +259 -0
- package/dist/esm/DFA.d.ts +36 -0
- package/dist/esm/DFA.js +347 -0
- package/dist/esm/Inst.d.ts +26 -0
- package/dist/esm/Inst.js +83 -0
- package/dist/esm/MachineInput.d.ts +17 -0
- package/dist/esm/MachineInput.js +68 -0
- package/dist/esm/Parser.d.ts +111 -0
- package/dist/esm/Parser.js +1535 -0
- package/dist/esm/Prefilter.d.ts +19 -0
- package/dist/esm/Prefilter.js +159 -0
- package/dist/esm/Prog.d.ts +39 -0
- package/dist/esm/Prog.js +150 -0
- package/dist/esm/RE2.d.ts +27 -0
- package/dist/esm/RE2.js +218 -0
- package/dist/esm/RE2Flags.d.ts +16 -0
- package/dist/esm/RE2Flags.js +41 -0
- package/dist/esm/Regexp.d.ts +43 -0
- package/dist/esm/Regexp.js +94 -0
- package/dist/esm/Simplify.d.ts +3 -0
- package/dist/esm/Simplify.js +228 -0
- package/dist/esm/Unicode.d.ts +17 -0
- package/dist/esm/Unicode.js +150 -0
- package/dist/esm/UnicodeRangeTable.d.ts +12 -0
- package/dist/esm/UnicodeRangeTable.js +28 -0
- package/dist/esm/UnicodeTables.d.ts +29 -0
- package/dist/esm/UnicodeTables.js +568 -0
- package/dist/esm/Utils.d.ts +22 -0
- package/dist/esm/Utils.js +103 -0
- package/dist/esm/__fixtures__/find.d.ts +9 -0
- package/dist/esm/__fixtures__/find.js +112 -0
- package/dist/esm/chars.d.ts +2 -0
- package/dist/esm/chars.js +14 -0
- package/dist/esm/exceptions.d.ts +55 -0
- package/dist/esm/exceptions.js +86 -0
- package/dist/esm/index.d.ts +102 -0
- package/dist/esm/index.js +163 -0
- package/dist/esm/testParser.d.ts +3 -0
- package/dist/esm/testParser.js +138 -0
- package/package.json +49 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"commonjs"}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.mkCharClass = exports.dumpRegexp = void 0;
|
|
4
|
+
const RE2Flags_js_1 = require("./RE2Flags.js");
|
|
5
|
+
const Regexp_js_1 = require("./Regexp.js");
|
|
6
|
+
const Unicode_js_1 = require("./Unicode.js");
|
|
7
|
+
const OP_NAMES = new Map([
|
|
8
|
+
[Regexp_js_1.Regexp.Op.NO_MATCH, "no"],
|
|
9
|
+
[Regexp_js_1.Regexp.Op.EMPTY_MATCH, "emp"],
|
|
10
|
+
[Regexp_js_1.Regexp.Op.LITERAL, "lit"],
|
|
11
|
+
[Regexp_js_1.Regexp.Op.CHAR_CLASS, "cc"],
|
|
12
|
+
[Regexp_js_1.Regexp.Op.ANY_CHAR_NOT_NL, "dnl"],
|
|
13
|
+
[Regexp_js_1.Regexp.Op.ANY_CHAR, "dot"],
|
|
14
|
+
[Regexp_js_1.Regexp.Op.BEGIN_LINE, "bol"],
|
|
15
|
+
[Regexp_js_1.Regexp.Op.END_LINE, "eol"],
|
|
16
|
+
[Regexp_js_1.Regexp.Op.BEGIN_TEXT, "bot"],
|
|
17
|
+
[Regexp_js_1.Regexp.Op.END_TEXT, "eot"],
|
|
18
|
+
[Regexp_js_1.Regexp.Op.WORD_BOUNDARY, "wb"],
|
|
19
|
+
[Regexp_js_1.Regexp.Op.NO_WORD_BOUNDARY, "nwb"],
|
|
20
|
+
[Regexp_js_1.Regexp.Op.CAPTURE, "cap"],
|
|
21
|
+
[Regexp_js_1.Regexp.Op.STAR, "star"],
|
|
22
|
+
[Regexp_js_1.Regexp.Op.PLUS, "plus"],
|
|
23
|
+
[Regexp_js_1.Regexp.Op.QUEST, "que"],
|
|
24
|
+
[Regexp_js_1.Regexp.Op.REPEAT, "rep"],
|
|
25
|
+
[Regexp_js_1.Regexp.Op.CONCAT, "cat"],
|
|
26
|
+
[Regexp_js_1.Regexp.Op.ALTERNATE, "alt"],
|
|
27
|
+
]);
|
|
28
|
+
const dumpRegexp = (re) => {
|
|
29
|
+
let b = "";
|
|
30
|
+
if (!OP_NAMES.has(re.op)) {
|
|
31
|
+
b += `op${re.op}`;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
const name = OP_NAMES.get(re.op);
|
|
35
|
+
switch (re.op) {
|
|
36
|
+
case Regexp_js_1.Regexp.Op.STAR:
|
|
37
|
+
case Regexp_js_1.Regexp.Op.PLUS:
|
|
38
|
+
case Regexp_js_1.Regexp.Op.QUEST:
|
|
39
|
+
case Regexp_js_1.Regexp.Op.REPEAT:
|
|
40
|
+
if ((re.flags & RE2Flags_js_1.NON_GREEDY) !== 0) {
|
|
41
|
+
b += "n";
|
|
42
|
+
}
|
|
43
|
+
b += name;
|
|
44
|
+
break;
|
|
45
|
+
case Regexp_js_1.Regexp.Op.LITERAL:
|
|
46
|
+
if (re.runes.length > 1) {
|
|
47
|
+
b += "str";
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
b += "lit";
|
|
51
|
+
}
|
|
52
|
+
if ((re.flags & RE2Flags_js_1.FOLD_CASE) !== 0) {
|
|
53
|
+
for (let r of re.runes) {
|
|
54
|
+
if ((0, Unicode_js_1.simpleFold)(r) !== r) {
|
|
55
|
+
b += "fold";
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
break;
|
|
61
|
+
default:
|
|
62
|
+
b += name;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
b += "{";
|
|
67
|
+
switch (re.op) {
|
|
68
|
+
case Regexp_js_1.Regexp.Op.END_TEXT:
|
|
69
|
+
if ((re.flags & RE2Flags_js_1.WAS_DOLLAR) === 0) {
|
|
70
|
+
b += "\\z";
|
|
71
|
+
}
|
|
72
|
+
break;
|
|
73
|
+
case Regexp_js_1.Regexp.Op.LITERAL:
|
|
74
|
+
for (let r of re.runes) {
|
|
75
|
+
b += String.fromCodePoint(r);
|
|
76
|
+
}
|
|
77
|
+
break;
|
|
78
|
+
case Regexp_js_1.Regexp.Op.CONCAT:
|
|
79
|
+
case Regexp_js_1.Regexp.Op.ALTERNATE:
|
|
80
|
+
for (let sub of re.subs) {
|
|
81
|
+
b += (0, exports.dumpRegexp)(sub);
|
|
82
|
+
}
|
|
83
|
+
break;
|
|
84
|
+
case Regexp_js_1.Regexp.Op.STAR:
|
|
85
|
+
case Regexp_js_1.Regexp.Op.PLUS:
|
|
86
|
+
case Regexp_js_1.Regexp.Op.QUEST:
|
|
87
|
+
b += (0, exports.dumpRegexp)(re.subs[0]);
|
|
88
|
+
break;
|
|
89
|
+
case Regexp_js_1.Regexp.Op.REPEAT:
|
|
90
|
+
b += `${re.min},${re.max}`;
|
|
91
|
+
b += " ";
|
|
92
|
+
b += (0, exports.dumpRegexp)(re.subs[0]);
|
|
93
|
+
break;
|
|
94
|
+
case Regexp_js_1.Regexp.Op.CAPTURE:
|
|
95
|
+
if (re.name !== null && re.name.length > 0) {
|
|
96
|
+
b += re.name;
|
|
97
|
+
b += ":";
|
|
98
|
+
}
|
|
99
|
+
b += (0, exports.dumpRegexp)(re.subs[0]);
|
|
100
|
+
break;
|
|
101
|
+
case Regexp_js_1.Regexp.Op.CHAR_CLASS: {
|
|
102
|
+
let sep = "";
|
|
103
|
+
for (let i = 0; i < re.runes.length; i += 2) {
|
|
104
|
+
b += sep;
|
|
105
|
+
sep = " ";
|
|
106
|
+
let lo = re.runes[i];
|
|
107
|
+
let hi = re.runes[i + 1];
|
|
108
|
+
if (lo === hi) {
|
|
109
|
+
b += `0x${lo.toString(16)}`;
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
b += `0x${lo.toString(16)}-0x${hi.toString(16)}`;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
b += "}";
|
|
119
|
+
return b;
|
|
120
|
+
};
|
|
121
|
+
exports.dumpRegexp = dumpRegexp;
|
|
122
|
+
const mkCharClass = (f) => {
|
|
123
|
+
const re = new Regexp_js_1.Regexp(Regexp_js_1.Regexp.Op.CHAR_CLASS);
|
|
124
|
+
let runes = [];
|
|
125
|
+
let lo = -1;
|
|
126
|
+
for (let i = 0; i <= Unicode_js_1.MAX_RUNE; i++) {
|
|
127
|
+
if (f(i)) {
|
|
128
|
+
if (lo < 0) {
|
|
129
|
+
lo = i;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
else if (lo >= 0) {
|
|
133
|
+
runes = [...runes, lo, i - 1];
|
|
134
|
+
lo = -1;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (lo >= 0) {
|
|
138
|
+
runes = [...runes, lo, Unicode_js_1.MAX_RUNE];
|
|
139
|
+
}
|
|
140
|
+
re.runes = runes;
|
|
141
|
+
return (0, exports.dumpRegexp)(re);
|
|
142
|
+
};
|
|
143
|
+
exports.mkCharClass = mkCharClass;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { UnicodeRangeTable } from "./UnicodeRangeTable.js";
|
|
2
|
+
import type { CharGroup } from "./CharGroup.js";
|
|
3
|
+
/**
|
|
4
|
+
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
5
|
+
* pairs of runes [lo, hi], each denoting an inclusive interval.
|
|
6
|
+
*
|
|
7
|
+
* All methods mutate the internal state and return {@code this}, allowing operations to be chained.
|
|
8
|
+
*/
|
|
9
|
+
declare class CharClass {
|
|
10
|
+
static cmp(array: number[], i: number, pivotFrom: number, pivotTo: number): number;
|
|
11
|
+
static qsortIntPair(array: number[], left: number, right: number): void;
|
|
12
|
+
r: number[];
|
|
13
|
+
len: number;
|
|
14
|
+
constructor(r?: number[]);
|
|
15
|
+
toArray(): number[];
|
|
16
|
+
cleanClass(): this;
|
|
17
|
+
appendLiteral(x: number, flags: number): this;
|
|
18
|
+
appendRange(lo: number, hi: number): this;
|
|
19
|
+
appendFoldedRange(lo: number, hi: number): this;
|
|
20
|
+
appendClass(x: number[]): this;
|
|
21
|
+
appendFoldedClass(x: number[]): this;
|
|
22
|
+
appendNegatedClass(x: number[]): this;
|
|
23
|
+
appendTable(table: UnicodeRangeTable): this;
|
|
24
|
+
appendNegatedTable(table: UnicodeRangeTable): this;
|
|
25
|
+
appendTableWithSign(table: UnicodeRangeTable, sign: number): this;
|
|
26
|
+
negateClass(): this;
|
|
27
|
+
appendClassWithSign(x: number[], sign: number): this;
|
|
28
|
+
appendGroup(g: CharGroup, foldCase: boolean): this;
|
|
29
|
+
}
|
|
30
|
+
export { CharClass };
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import { FOLD_CASE } from "./RE2Flags.js";
|
|
2
|
+
import { MAX_FOLD, MAX_RUNE, MIN_FOLD, simpleFold } from "./Unicode.js";
|
|
3
|
+
import { emptyInts } from "./Utils.js";
|
|
4
|
+
/**
|
|
5
|
+
* A "builder"-style helper class for manipulating character classes represented as an array of
|
|
6
|
+
* pairs of runes [lo, hi], each denoting an inclusive interval.
|
|
7
|
+
*
|
|
8
|
+
* All methods mutate the internal state and return {@code this}, allowing operations to be chained.
|
|
9
|
+
*/
|
|
10
|
+
class CharClass {
|
|
11
|
+
// cmp() returns the ordering of the pair (a[i], a[i+1]) relative to
|
|
12
|
+
// (pivotFrom, pivotTo), where the first component of the pair (lo) is
|
|
13
|
+
// ordered naturally and the second component (hi) is in reverse order.
|
|
14
|
+
static cmp(array, i, pivotFrom, pivotTo) {
|
|
15
|
+
const cmp = array[i] - pivotFrom;
|
|
16
|
+
return cmp !== 0 ? cmp : pivotTo - array[i + 1];
|
|
17
|
+
}
|
|
18
|
+
// qsortIntPair() quicksorts pairs of ints in |array| according to lt().
|
|
19
|
+
// Precondition: |left|, |right|, |this.len| must all be even; |this.len > 1|.
|
|
20
|
+
static qsortIntPair(array, left, right) {
|
|
21
|
+
const pivotIndex = (((left + right) / 2) | 0) & ~1;
|
|
22
|
+
const pivotFrom = array[pivotIndex];
|
|
23
|
+
const pivotTo = array[pivotIndex + 1];
|
|
24
|
+
let i = left;
|
|
25
|
+
let j = right;
|
|
26
|
+
while (i <= j) {
|
|
27
|
+
while (i < right && CharClass.cmp(array, i, pivotFrom, pivotTo) < 0) {
|
|
28
|
+
i += 2;
|
|
29
|
+
}
|
|
30
|
+
while (j > left && CharClass.cmp(array, j, pivotFrom, pivotTo) > 0) {
|
|
31
|
+
j -= 2;
|
|
32
|
+
}
|
|
33
|
+
if (i <= j) {
|
|
34
|
+
if (i !== j) {
|
|
35
|
+
let temp = array[i];
|
|
36
|
+
array[i] = array[j];
|
|
37
|
+
array[j] = temp;
|
|
38
|
+
temp = array[i + 1];
|
|
39
|
+
array[i + 1] = array[j + 1];
|
|
40
|
+
array[j + 1] = temp;
|
|
41
|
+
}
|
|
42
|
+
i += 2;
|
|
43
|
+
j -= 2;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (left < j) {
|
|
47
|
+
CharClass.qsortIntPair(array, left, j);
|
|
48
|
+
}
|
|
49
|
+
if (i < right) {
|
|
50
|
+
CharClass.qsortIntPair(array, i, right);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
r;
|
|
54
|
+
len;
|
|
55
|
+
constructor(r = emptyInts()) {
|
|
56
|
+
this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
|
|
57
|
+
this.len = r.length; // prefix of |r| that is defined. Even.
|
|
58
|
+
}
|
|
59
|
+
// Returns the character class as an int array. Subsequent CharClass
|
|
60
|
+
// operations may mutate this array, so typically this is the last operation
|
|
61
|
+
// performed on a given CharClass instance.
|
|
62
|
+
toArray() {
|
|
63
|
+
if (this.len === this.r.length) {
|
|
64
|
+
return this.r;
|
|
65
|
+
}
|
|
66
|
+
return this.r.slice(0, this.len);
|
|
67
|
+
}
|
|
68
|
+
// cleanClass() sorts the ranges (pairs of elements) of this CharClass,
|
|
69
|
+
// merges them, and eliminates duplicates.
|
|
70
|
+
cleanClass() {
|
|
71
|
+
if (this.len < 4) {
|
|
72
|
+
return this;
|
|
73
|
+
}
|
|
74
|
+
// Sort by lo increasing, hi decreasing to break ties.
|
|
75
|
+
CharClass.qsortIntPair(this.r, 0, this.len - 2);
|
|
76
|
+
// Merge abutting, overlapping.
|
|
77
|
+
let w = 2; // write index
|
|
78
|
+
for (let i = 2; i < this.len; i += 2) {
|
|
79
|
+
{
|
|
80
|
+
const lo = this.r[i];
|
|
81
|
+
const hi = this.r[i + 1];
|
|
82
|
+
if (lo <= this.r[w - 1] + 1) {
|
|
83
|
+
// merge with previous range
|
|
84
|
+
if (hi > this.r[w - 1]) {
|
|
85
|
+
this.r[w - 1] = hi;
|
|
86
|
+
}
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
// new disjoint range
|
|
90
|
+
this.r[w] = lo;
|
|
91
|
+
this.r[w + 1] = hi;
|
|
92
|
+
w += 2;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
this.len = w;
|
|
96
|
+
return this;
|
|
97
|
+
}
|
|
98
|
+
// appendLiteral() appends the literal |x| to this CharClass.
|
|
99
|
+
appendLiteral(x, flags) {
|
|
100
|
+
return (flags & FOLD_CASE) !== 0
|
|
101
|
+
? this.appendFoldedRange(x, x)
|
|
102
|
+
: this.appendRange(x, x);
|
|
103
|
+
}
|
|
104
|
+
// appendRange() appends the range [lo-hi] (inclusive) to this CharClass.
|
|
105
|
+
appendRange(lo, hi) {
|
|
106
|
+
// Expand last range or next to last range if it overlaps or abuts.
|
|
107
|
+
// Checking two ranges helps when appending case-folded
|
|
108
|
+
// alphabets, so that one range can be expanding A-Z and the
|
|
109
|
+
// other expanding a-z.
|
|
110
|
+
if (this.len > 0) {
|
|
111
|
+
for (let i = 2; i <= 4; i += 2) {
|
|
112
|
+
// twice, using i=2, i=4
|
|
113
|
+
if (this.len >= i) {
|
|
114
|
+
const rlo = this.r[this.len - i];
|
|
115
|
+
const rhi = this.r[this.len - i + 1];
|
|
116
|
+
if (lo <= rhi + 1 && rlo <= hi + 1) {
|
|
117
|
+
if (lo < rlo) {
|
|
118
|
+
this.r[this.len - i] = lo;
|
|
119
|
+
}
|
|
120
|
+
if (hi > rhi) {
|
|
121
|
+
this.r[this.len - i + 1] = hi;
|
|
122
|
+
}
|
|
123
|
+
return this;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
this.r[this.len++] = lo;
|
|
129
|
+
this.r[this.len++] = hi;
|
|
130
|
+
return this;
|
|
131
|
+
}
|
|
132
|
+
// appendFoldedRange() appends the range [lo-hi] and its case
|
|
133
|
+
// folding-equivalent runes to this CharClass.
|
|
134
|
+
appendFoldedRange(lo, hi) {
|
|
135
|
+
// Optimizations.
|
|
136
|
+
if (lo <= MIN_FOLD && hi >= MAX_FOLD) {
|
|
137
|
+
// Range is full: folding can't add more.
|
|
138
|
+
return this.appendRange(lo, hi);
|
|
139
|
+
}
|
|
140
|
+
if (hi < MIN_FOLD || lo > MAX_FOLD) {
|
|
141
|
+
// Range is outside folding possibilities.
|
|
142
|
+
return this.appendRange(lo, hi);
|
|
143
|
+
}
|
|
144
|
+
if (lo < MIN_FOLD) {
|
|
145
|
+
// [lo, minFold-1] needs no folding.
|
|
146
|
+
this.appendRange(lo, MIN_FOLD - 1);
|
|
147
|
+
lo = MIN_FOLD;
|
|
148
|
+
}
|
|
149
|
+
if (hi > MAX_FOLD) {
|
|
150
|
+
// [maxFold+1, hi] needs no folding.
|
|
151
|
+
this.appendRange(MAX_FOLD + 1, hi);
|
|
152
|
+
hi = MAX_FOLD;
|
|
153
|
+
}
|
|
154
|
+
// Brute force. Depend on appendRange to coalesce ranges on the fly.
|
|
155
|
+
for (let c = lo; c <= hi; c++) {
|
|
156
|
+
this.appendRange(c, c);
|
|
157
|
+
for (let f = simpleFold(c); f !== c; f = simpleFold(f)) {
|
|
158
|
+
this.appendRange(f, f);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return this;
|
|
162
|
+
}
|
|
163
|
+
// appendClass() appends the class |x| to this CharClass.
|
|
164
|
+
// It assumes |x| is clean. Does not mutate |x|.
|
|
165
|
+
appendClass(x) {
|
|
166
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
167
|
+
this.appendRange(x[i], x[i + 1]);
|
|
168
|
+
}
|
|
169
|
+
return this;
|
|
170
|
+
}
|
|
171
|
+
// appendFoldedClass() appends the case folding of the class |x| to this
|
|
172
|
+
// CharClass. Does not mutate |x|.
|
|
173
|
+
appendFoldedClass(x) {
|
|
174
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
175
|
+
this.appendFoldedRange(x[i], x[i + 1]);
|
|
176
|
+
}
|
|
177
|
+
return this;
|
|
178
|
+
}
|
|
179
|
+
// appendNegatedClass() append the negation of the class |x| to this
|
|
180
|
+
// CharClass. It assumes |x| is clean. Does not mutate |x|.
|
|
181
|
+
appendNegatedClass(x) {
|
|
182
|
+
let nextLo = 0;
|
|
183
|
+
for (let i = 0; i < x.length; i += 2) {
|
|
184
|
+
const lo = x[i];
|
|
185
|
+
const hi = x[i + 1];
|
|
186
|
+
if (nextLo <= lo - 1) {
|
|
187
|
+
this.appendRange(nextLo, lo - 1);
|
|
188
|
+
}
|
|
189
|
+
nextLo = hi + 1;
|
|
190
|
+
}
|
|
191
|
+
if (nextLo <= MAX_RUNE) {
|
|
192
|
+
this.appendRange(nextLo, MAX_RUNE);
|
|
193
|
+
}
|
|
194
|
+
return this;
|
|
195
|
+
}
|
|
196
|
+
// appendTable() appends the Unicode range table |table| to this CharClass.
|
|
197
|
+
// Does not mutate |table|.
|
|
198
|
+
appendTable(table) {
|
|
199
|
+
for (let i = 0; i < table.length; ++i) {
|
|
200
|
+
const lo = table.getLo(i);
|
|
201
|
+
const hi = table.getHi(i);
|
|
202
|
+
const stride = table.getStride(i);
|
|
203
|
+
if (stride === 1) {
|
|
204
|
+
this.appendRange(lo, hi);
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
for (let c = lo; c <= hi; c += stride) {
|
|
208
|
+
this.appendRange(c, c);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return this;
|
|
212
|
+
}
|
|
213
|
+
// appendNegatedTable() returns the result of appending the negation of range
|
|
214
|
+
// table |table| to this CharClass. Does not mutate |table|.
|
|
215
|
+
appendNegatedTable(table) {
|
|
216
|
+
let nextLo = 0;
|
|
217
|
+
for (let i = 0; i < table.length; ++i) {
|
|
218
|
+
const lo = table.getLo(i);
|
|
219
|
+
const hi = table.getHi(i);
|
|
220
|
+
const stride = table.getStride(i);
|
|
221
|
+
if (stride === 1) {
|
|
222
|
+
if (nextLo <= lo - 1) {
|
|
223
|
+
this.appendRange(nextLo, lo - 1);
|
|
224
|
+
}
|
|
225
|
+
nextLo = hi + 1;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
for (let c = lo; c <= hi; c += stride) {
|
|
229
|
+
if (nextLo <= c - 1) {
|
|
230
|
+
this.appendRange(nextLo, c - 1);
|
|
231
|
+
}
|
|
232
|
+
nextLo = c + 1;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
if (nextLo <= MAX_RUNE) {
|
|
236
|
+
this.appendRange(nextLo, MAX_RUNE);
|
|
237
|
+
}
|
|
238
|
+
return this;
|
|
239
|
+
}
|
|
240
|
+
// appendTableWithSign() calls append{,Negated}Table depending on sign.
|
|
241
|
+
// Does not mutate |table|.
|
|
242
|
+
appendTableWithSign(table, sign) {
|
|
243
|
+
return sign < 0 ? this.appendNegatedTable(table) : this.appendTable(table);
|
|
244
|
+
}
|
|
245
|
+
// negateClass() negates this CharClass, which must already be clean.
|
|
246
|
+
negateClass() {
|
|
247
|
+
let nextLo = 0; // lo end of next class to add
|
|
248
|
+
let w = 0; // write index
|
|
249
|
+
for (let i = 0; i < this.len; i += 2) {
|
|
250
|
+
const lo = this.r[i];
|
|
251
|
+
const hi = this.r[i + 1];
|
|
252
|
+
if (nextLo <= lo - 1) {
|
|
253
|
+
this.r[w] = nextLo;
|
|
254
|
+
this.r[w + 1] = lo - 1;
|
|
255
|
+
w += 2;
|
|
256
|
+
}
|
|
257
|
+
nextLo = hi + 1;
|
|
258
|
+
}
|
|
259
|
+
this.len = w;
|
|
260
|
+
if (nextLo <= MAX_RUNE) {
|
|
261
|
+
this.r[this.len++] = nextLo;
|
|
262
|
+
this.r[this.len++] = MAX_RUNE;
|
|
263
|
+
}
|
|
264
|
+
return this;
|
|
265
|
+
}
|
|
266
|
+
// appendClassWithSign() calls appendClass() if sign is +1 or
|
|
267
|
+
// appendNegatedClass if sign is -1. Does not mutate |x|.
|
|
268
|
+
appendClassWithSign(x, sign) {
|
|
269
|
+
return sign < 0 ? this.appendNegatedClass(x) : this.appendClass(x);
|
|
270
|
+
}
|
|
271
|
+
// appendGroup() appends CharGroup |g| to this CharClass, folding iff
|
|
272
|
+
// |foldCase|. Does not mutate |g|.
|
|
273
|
+
appendGroup(g, foldCase) {
|
|
274
|
+
let cls = g.cls;
|
|
275
|
+
if (foldCase) {
|
|
276
|
+
cls = new CharClass().appendFoldedClass(cls).cleanClass().toArray();
|
|
277
|
+
}
|
|
278
|
+
return this.appendClassWithSign(cls, g.sign);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
export { CharClass };
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
declare class CharGroup {
|
|
2
|
+
sign: number;
|
|
3
|
+
cls: number[];
|
|
4
|
+
constructor(sign: number, cls: number[]);
|
|
5
|
+
}
|
|
6
|
+
declare const getPerlGroups: () => Map<string, CharGroup>;
|
|
7
|
+
declare const getPosixGroups: () => Map<string, CharGroup>;
|
|
8
|
+
export { CharGroup, getPerlGroups, getPosixGroups };
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// GENERATED BY tools/scripts/make_perl_groups.pl; DO NOT EDIT.
|
|
2
|
+
// ./tools/scripts/make_perl_groups.pl > src/CharGroup.js
|
|
3
|
+
class CharGroup {
|
|
4
|
+
sign;
|
|
5
|
+
cls;
|
|
6
|
+
constructor(sign, cls) {
|
|
7
|
+
this.sign = sign;
|
|
8
|
+
this.cls = cls;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
const code1 = [0x30, 0x39];
|
|
12
|
+
const code2 = [0x9, 0xa, 0xc, 0xd, 0x20, 0x20];
|
|
13
|
+
const code3 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
14
|
+
let _PERL_GROUPS = null;
|
|
15
|
+
const getPerlGroups = () => {
|
|
16
|
+
if (!_PERL_GROUPS) {
|
|
17
|
+
_PERL_GROUPS = new Map([
|
|
18
|
+
["\\d", new CharGroup(+1, code1)],
|
|
19
|
+
["\\D", new CharGroup(-1, code1)],
|
|
20
|
+
["\\s", new CharGroup(+1, code2)],
|
|
21
|
+
["\\S", new CharGroup(-1, code2)],
|
|
22
|
+
["\\w", new CharGroup(+1, code3)],
|
|
23
|
+
["\\W", new CharGroup(-1, code3)],
|
|
24
|
+
]);
|
|
25
|
+
}
|
|
26
|
+
return _PERL_GROUPS;
|
|
27
|
+
};
|
|
28
|
+
const code4 = [0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a];
|
|
29
|
+
const code5 = [0x41, 0x5a, 0x61, 0x7a];
|
|
30
|
+
const code6 = [0x0, 0x7f];
|
|
31
|
+
const code7 = [0x9, 0x9, 0x20, 0x20];
|
|
32
|
+
const code8 = [0x0, 0x1f, 0x7f, 0x7f];
|
|
33
|
+
const code9 = [0x30, 0x39];
|
|
34
|
+
const code10 = [0x21, 0x7e];
|
|
35
|
+
const code11 = [0x61, 0x7a];
|
|
36
|
+
const code12 = [0x20, 0x7e];
|
|
37
|
+
const code13 = [0x21, 0x2f, 0x3a, 0x40, 0x5b, 0x60, 0x7b, 0x7e];
|
|
38
|
+
const code14 = [0x9, 0xd, 0x20, 0x20];
|
|
39
|
+
const code15 = [0x41, 0x5a];
|
|
40
|
+
const code16 = [0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a];
|
|
41
|
+
const code17 = [0x30, 0x39, 0x41, 0x46, 0x61, 0x66];
|
|
42
|
+
let _POSIX_GROUPS = null;
|
|
43
|
+
const getPosixGroups = () => {
|
|
44
|
+
if (!_POSIX_GROUPS) {
|
|
45
|
+
_POSIX_GROUPS = new Map([
|
|
46
|
+
["[:alnum:]", new CharGroup(+1, code4)],
|
|
47
|
+
["[:^alnum:]", new CharGroup(-1, code4)],
|
|
48
|
+
["[:alpha:]", new CharGroup(+1, code5)],
|
|
49
|
+
["[:^alpha:]", new CharGroup(-1, code5)],
|
|
50
|
+
["[:ascii:]", new CharGroup(+1, code6)],
|
|
51
|
+
["[:^ascii:]", new CharGroup(-1, code6)],
|
|
52
|
+
["[:blank:]", new CharGroup(+1, code7)],
|
|
53
|
+
["[:^blank:]", new CharGroup(-1, code7)],
|
|
54
|
+
["[:cntrl:]", new CharGroup(+1, code8)],
|
|
55
|
+
["[:^cntrl:]", new CharGroup(-1, code8)],
|
|
56
|
+
["[:digit:]", new CharGroup(+1, code9)],
|
|
57
|
+
["[:^digit:]", new CharGroup(-1, code9)],
|
|
58
|
+
["[:graph:]", new CharGroup(+1, code10)],
|
|
59
|
+
["[:^graph:]", new CharGroup(-1, code10)],
|
|
60
|
+
["[:lower:]", new CharGroup(+1, code11)],
|
|
61
|
+
["[:^lower:]", new CharGroup(-1, code11)],
|
|
62
|
+
["[:print:]", new CharGroup(+1, code12)],
|
|
63
|
+
["[:^print:]", new CharGroup(-1, code12)],
|
|
64
|
+
["[:punct:]", new CharGroup(+1, code13)],
|
|
65
|
+
["[:^punct:]", new CharGroup(-1, code13)],
|
|
66
|
+
["[:space:]", new CharGroup(+1, code14)],
|
|
67
|
+
["[:^space:]", new CharGroup(-1, code14)],
|
|
68
|
+
["[:upper:]", new CharGroup(+1, code15)],
|
|
69
|
+
["[:^upper:]", new CharGroup(-1, code15)],
|
|
70
|
+
["[:word:]", new CharGroup(+1, code16)],
|
|
71
|
+
["[:^word:]", new CharGroup(-1, code16)],
|
|
72
|
+
["[:xdigit:]", new CharGroup(+1, code17)],
|
|
73
|
+
["[:^xdigit:]", new CharGroup(-1, code17)],
|
|
74
|
+
]);
|
|
75
|
+
}
|
|
76
|
+
return _POSIX_GROUPS;
|
|
77
|
+
};
|
|
78
|
+
export { CharGroup, getPerlGroups, getPosixGroups };
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Various constants and helper for unicode codepoints.
|
|
3
|
+
*/
|
|
4
|
+
const ASCII_SIZE = 128;
|
|
5
|
+
let _ASCII_TO_UPPER = null;
|
|
6
|
+
let _ASCII_TO_LOWER = null;
|
|
7
|
+
const getAsciiToUpper = () => {
|
|
8
|
+
if (!_ASCII_TO_UPPER) {
|
|
9
|
+
_ASCII_TO_UPPER = new Int32Array(ASCII_SIZE);
|
|
10
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
11
|
+
_ASCII_TO_UPPER[i] = i >= 97 && i <= 122 ? i - 32 : i;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
return _ASCII_TO_UPPER;
|
|
15
|
+
};
|
|
16
|
+
const getAsciiToLower = () => {
|
|
17
|
+
if (!_ASCII_TO_LOWER) {
|
|
18
|
+
_ASCII_TO_LOWER = new Int32Array(ASCII_SIZE);
|
|
19
|
+
for (let i = 0; i < ASCII_SIZE; i++) {
|
|
20
|
+
_ASCII_TO_LOWER[i] = i >= 65 && i <= 90 ? i + 32 : i;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return _ASCII_TO_LOWER;
|
|
24
|
+
};
|
|
25
|
+
function toUpperCase(codepoint) {
|
|
26
|
+
if (codepoint < ASCII_SIZE)
|
|
27
|
+
return getAsciiToUpper()[codepoint];
|
|
28
|
+
const s = String.fromCodePoint(codepoint).toUpperCase();
|
|
29
|
+
if (s.length > 1) {
|
|
30
|
+
return codepoint;
|
|
31
|
+
}
|
|
32
|
+
const cp = s.codePointAt(0);
|
|
33
|
+
if (cp === undefined) {
|
|
34
|
+
return codepoint;
|
|
35
|
+
}
|
|
36
|
+
const sOrigin = String.fromCodePoint(cp).toLowerCase();
|
|
37
|
+
if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
|
|
38
|
+
return codepoint;
|
|
39
|
+
}
|
|
40
|
+
return cp;
|
|
41
|
+
}
|
|
42
|
+
function toLowerCase(codepoint) {
|
|
43
|
+
if (codepoint < ASCII_SIZE)
|
|
44
|
+
return getAsciiToLower()[codepoint];
|
|
45
|
+
const s = String.fromCodePoint(codepoint).toLowerCase();
|
|
46
|
+
if (s.length > 1) {
|
|
47
|
+
return codepoint;
|
|
48
|
+
}
|
|
49
|
+
const cp = s.codePointAt(0);
|
|
50
|
+
if (cp === undefined) {
|
|
51
|
+
return codepoint;
|
|
52
|
+
}
|
|
53
|
+
const sOrigin = String.fromCodePoint(cp).toUpperCase();
|
|
54
|
+
if (sOrigin.length > 1 || sOrigin.codePointAt(0) !== codepoint) {
|
|
55
|
+
return codepoint;
|
|
56
|
+
}
|
|
57
|
+
return cp;
|
|
58
|
+
}
|
|
59
|
+
export { toUpperCase, toLowerCase };
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { Regexp } from "./Regexp.js";
|
|
2
|
+
import { Prog, PatchList } from "./Prog.js";
|
|
3
|
+
/**
|
|
4
|
+
* A fragment of a compiled regular expression program.
|
|
5
|
+
*
|
|
6
|
+
* @see http://swtch.com/~rsc/regexp/regexp1.html
|
|
7
|
+
* @class
|
|
8
|
+
*/
|
|
9
|
+
declare class Frag {
|
|
10
|
+
i: number;
|
|
11
|
+
out: PatchList;
|
|
12
|
+
nullable: boolean;
|
|
13
|
+
constructor(i?: number, out?: PatchList, nullable?: boolean);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Compiler from {@code Regexp} (RE2 abstract syntax) to {@code RE2} (compiled regular expression).
|
|
17
|
+
*
|
|
18
|
+
* The only entry point is {@link #compileRegexp}.
|
|
19
|
+
*/
|
|
20
|
+
declare class Compiler {
|
|
21
|
+
prog: Prog;
|
|
22
|
+
static ANY_RUNE_NOT_NL(): number[];
|
|
23
|
+
static ANY_RUNE(): number[];
|
|
24
|
+
static compileRegexp(re: Regexp): Prog;
|
|
25
|
+
constructor();
|
|
26
|
+
newInst(op: number): Frag;
|
|
27
|
+
nop(): Frag;
|
|
28
|
+
fail(): Frag;
|
|
29
|
+
cap(arg: number): Frag;
|
|
30
|
+
cat(f1: Frag, f2: Frag): Frag;
|
|
31
|
+
alt(f1: Frag, f2: Frag): Frag;
|
|
32
|
+
loop(f1: Frag, nongreedy: boolean): Frag;
|
|
33
|
+
quest(f1: Frag, nongreedy: boolean): Frag;
|
|
34
|
+
star(f1: Frag, nongreedy: boolean): Frag;
|
|
35
|
+
plus(f1: Frag, nongreedy: boolean): Frag;
|
|
36
|
+
empty(op: number): Frag;
|
|
37
|
+
rune(runes: number[], flags: number): Frag;
|
|
38
|
+
compile(re: Regexp): Frag;
|
|
39
|
+
}
|
|
40
|
+
export { Compiler };
|