re2js 2.2.1 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +2 -0
- package/build/index.cjs.cjs +83 -54
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +0 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +83 -54
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +83 -54
- package/build/index.umd.js.map +1 -1
- package/package.json +5 -3
package/build/index.umd.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.2.
|
|
6
|
-
* @author
|
|
5
|
+
* @version v2.2.3
|
|
6
|
+
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
9
9
|
* @license MIT
|
|
@@ -100,7 +100,7 @@
|
|
|
100
100
|
}
|
|
101
101
|
class Codepoint {
|
|
102
102
|
// codePointAt(0)
|
|
103
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
103
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
104
104
|
|
|
105
105
|
// convert unicode codepoint to upper case codepoint
|
|
106
106
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -154,10 +154,6 @@
|
|
|
154
154
|
getStride(index) {
|
|
155
155
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
156
156
|
}
|
|
157
|
-
get(index) {
|
|
158
|
-
const i = index * this.SIZE;
|
|
159
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
160
|
-
}
|
|
161
157
|
get length() {
|
|
162
158
|
return this.data.length / this.SIZE;
|
|
163
159
|
}
|
|
@@ -654,6 +650,9 @@
|
|
|
654
650
|
static emptyInts() {
|
|
655
651
|
return [];
|
|
656
652
|
}
|
|
653
|
+
static isByteArray(input) {
|
|
654
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
655
|
+
}
|
|
657
656
|
|
|
658
657
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
659
658
|
static isalnum(c) {
|
|
@@ -955,7 +954,7 @@
|
|
|
955
954
|
* @returns {number[]}
|
|
956
955
|
*/
|
|
957
956
|
asBytes() {
|
|
958
|
-
return this.charSequence.toString()
|
|
957
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
959
958
|
}
|
|
960
959
|
|
|
961
960
|
/**
|
|
@@ -980,7 +979,7 @@
|
|
|
980
979
|
* @returns {Utf8MatcherInput}
|
|
981
980
|
*/
|
|
982
981
|
static utf8(input) {
|
|
983
|
-
if (
|
|
982
|
+
if (Utils.isByteArray(input)) {
|
|
984
983
|
return new Utf8MatcherInput(input);
|
|
985
984
|
}
|
|
986
985
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1112,10 +1111,10 @@
|
|
|
1112
1111
|
if (start < this.start) {
|
|
1113
1112
|
start = this.start;
|
|
1114
1113
|
}
|
|
1115
|
-
r1 = this.step(start) >> 3;
|
|
1114
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1116
1115
|
}
|
|
1117
1116
|
}
|
|
1118
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1117
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1119
1118
|
return Utils.emptyOpContext(r1, r2);
|
|
1120
1119
|
}
|
|
1121
1120
|
|
|
@@ -1197,14 +1196,17 @@
|
|
|
1197
1196
|
index(re2, pos) {
|
|
1198
1197
|
pos += this.start;
|
|
1199
1198
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1200
|
-
|
|
1199
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1200
|
+
return -1;
|
|
1201
|
+
}
|
|
1202
|
+
return i - pos;
|
|
1201
1203
|
}
|
|
1202
1204
|
|
|
1203
1205
|
// Returns a bitmask of EMPTY_* flags.
|
|
1204
1206
|
context(pos) {
|
|
1205
1207
|
pos += this.start;
|
|
1206
|
-
const r1 = pos >
|
|
1207
|
-
const r2 = pos < this.
|
|
1208
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1209
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1208
1210
|
return Utils.emptyOpContext(r1, r2);
|
|
1209
1211
|
}
|
|
1210
1212
|
prefixLength(re2) {
|
|
@@ -1392,7 +1394,7 @@
|
|
|
1392
1394
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1393
1395
|
if (input instanceof MatcherInputBase) {
|
|
1394
1396
|
this.resetMatcherInput(input);
|
|
1395
|
-
} else if (
|
|
1397
|
+
} else if (Utils.isByteArray(input)) {
|
|
1396
1398
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1397
1399
|
} else {
|
|
1398
1400
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1554,10 +1556,7 @@
|
|
|
1554
1556
|
if (group === 0 || this.hasGroups) {
|
|
1555
1557
|
return;
|
|
1556
1558
|
}
|
|
1557
|
-
|
|
1558
|
-
if (end > this.matcherInputLength) {
|
|
1559
|
-
end = this.matcherInputLength;
|
|
1560
|
-
}
|
|
1559
|
+
const end = this.matcherInputLength;
|
|
1561
1560
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1562
1561
|
const ok = res[0];
|
|
1563
1562
|
if (!ok) {
|
|
@@ -1753,7 +1752,10 @@
|
|
|
1753
1752
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1754
1753
|
}
|
|
1755
1754
|
const groupName = replacement.substring(i + 1, j);
|
|
1756
|
-
|
|
1755
|
+
const groupVal = this.group(groupName);
|
|
1756
|
+
if (groupVal !== null) {
|
|
1757
|
+
res += groupVal;
|
|
1758
|
+
}
|
|
1757
1759
|
last = j + 1;
|
|
1758
1760
|
i = j;
|
|
1759
1761
|
continue;
|
|
@@ -1799,6 +1801,22 @@
|
|
|
1799
1801
|
i++;
|
|
1800
1802
|
last = i + 1;
|
|
1801
1803
|
continue;
|
|
1804
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1805
|
+
if (last < i) {
|
|
1806
|
+
res += replacement.substring(last, i);
|
|
1807
|
+
}
|
|
1808
|
+
res += this.substring(0, this.start(0));
|
|
1809
|
+
i++;
|
|
1810
|
+
last = i + 1;
|
|
1811
|
+
continue;
|
|
1812
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1813
|
+
if (last < i) {
|
|
1814
|
+
res += replacement.substring(last, i);
|
|
1815
|
+
}
|
|
1816
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1817
|
+
i++;
|
|
1818
|
+
last = i + 1;
|
|
1819
|
+
continue;
|
|
1802
1820
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1803
1821
|
let n = c - Codepoint.CODES.get('0');
|
|
1804
1822
|
if (last < i) {
|
|
@@ -1841,7 +1859,10 @@
|
|
|
1841
1859
|
}
|
|
1842
1860
|
const groupName = replacement.substring(i + 1, j);
|
|
1843
1861
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1844
|
-
|
|
1862
|
+
const groupVal = this.group(groupName);
|
|
1863
|
+
if (groupVal !== null) {
|
|
1864
|
+
res += groupVal;
|
|
1865
|
+
}
|
|
1845
1866
|
} else {
|
|
1846
1867
|
res += `$<${groupName}>`;
|
|
1847
1868
|
}
|
|
@@ -3347,7 +3368,9 @@
|
|
|
3347
3368
|
}
|
|
3348
3369
|
runes.sort((a, b) => a - b);
|
|
3349
3370
|
} else {
|
|
3350
|
-
|
|
3371
|
+
for (let j = 0; j < inst.runes.length; j++) {
|
|
3372
|
+
runes.push(inst.runes[j]);
|
|
3373
|
+
}
|
|
3351
3374
|
}
|
|
3352
3375
|
onePassRunes[pc] = runes;
|
|
3353
3376
|
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
@@ -4174,7 +4197,9 @@
|
|
|
4174
4197
|
return new Prefilter(Prefilter.Type.NONE);
|
|
4175
4198
|
}
|
|
4176
4199
|
if (s.type === Prefilter.Type.OR) {
|
|
4177
|
-
|
|
4200
|
+
for (let j = 0; j < s.subs.length; j++) {
|
|
4201
|
+
newSubs.push(s.subs[j]);
|
|
4202
|
+
}
|
|
4178
4203
|
} else {
|
|
4179
4204
|
newSubs.push(s);
|
|
4180
4205
|
}
|
|
@@ -4339,13 +4364,6 @@
|
|
|
4339
4364
|
// start every program with a fail instruction, so we'll never want to point
|
|
4340
4365
|
// at its output link.
|
|
4341
4366
|
|
|
4342
|
-
next(l) {
|
|
4343
|
-
const i = this.inst[l >> 1];
|
|
4344
|
-
if ((l & 1) === 0) {
|
|
4345
|
-
return i.out;
|
|
4346
|
-
}
|
|
4347
|
-
return i.arg;
|
|
4348
|
-
}
|
|
4349
4367
|
patch(l, val) {
|
|
4350
4368
|
let head = l.head;
|
|
4351
4369
|
while (head !== 0) {
|
|
@@ -5675,6 +5693,7 @@
|
|
|
5675
5693
|
case Codepoint.CODES.get('6'):
|
|
5676
5694
|
case Codepoint.CODES.get('7'):
|
|
5677
5695
|
{
|
|
5696
|
+
// Single non-zero digit is a backreference; not supported
|
|
5678
5697
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5679
5698
|
break;
|
|
5680
5699
|
}
|
|
@@ -5682,6 +5701,7 @@
|
|
|
5682
5701
|
// eslint-disable-next-line no-fallthrough
|
|
5683
5702
|
case Codepoint.CODES.get('0'):
|
|
5684
5703
|
{
|
|
5704
|
+
// Consume up to three octal digits; already have one.
|
|
5685
5705
|
let r = c - Codepoint.CODES.get('0');
|
|
5686
5706
|
for (let i = 1; i < 3; i++) {
|
|
5687
5707
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7462,7 +7482,11 @@
|
|
|
7462
7482
|
*/
|
|
7463
7483
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7464
7484
|
if (!(input instanceof MatcherInputBase)) {
|
|
7465
|
-
|
|
7485
|
+
if (Utils.isByteArray(input)) {
|
|
7486
|
+
input = MatcherInput.utf8(input);
|
|
7487
|
+
} else {
|
|
7488
|
+
input = MatcherInput.utf16(input);
|
|
7489
|
+
}
|
|
7466
7490
|
}
|
|
7467
7491
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7468
7492
|
}
|
|
@@ -7968,7 +7992,7 @@
|
|
|
7968
7992
|
}
|
|
7969
7993
|
match(input) {
|
|
7970
7994
|
if (!this.prog) this.compile();
|
|
7971
|
-
const machineInput =
|
|
7995
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7972
7996
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7973
7997
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7974
7998
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -8025,45 +8049,50 @@
|
|
|
8025
8049
|
{
|
|
8026
8050
|
if (i + 2 < size) {
|
|
8027
8051
|
let nextCh = data[i + 2];
|
|
8028
|
-
|
|
8052
|
+
let code = nextCh.charCodeAt(0);
|
|
8053
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8054
|
+
let val = code % 32;
|
|
8029
8055
|
result += '\\x';
|
|
8030
|
-
result += (
|
|
8031
|
-
result += (
|
|
8056
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8057
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8032
8058
|
i += 3;
|
|
8033
8059
|
changed = true;
|
|
8034
8060
|
continue;
|
|
8035
8061
|
}
|
|
8036
8062
|
}
|
|
8037
|
-
result += '
|
|
8063
|
+
result += 'c';
|
|
8038
8064
|
i += 2;
|
|
8065
|
+
changed = true;
|
|
8039
8066
|
continue;
|
|
8040
8067
|
}
|
|
8041
8068
|
case 'u':
|
|
8042
8069
|
{
|
|
8043
8070
|
if (i + 2 < size) {
|
|
8044
8071
|
let nextCh = data[i + 2];
|
|
8045
|
-
if (
|
|
8046
|
-
result += '\\x{' + nextCh;
|
|
8047
|
-
i += 3;
|
|
8048
|
-
for (let j = 0; j < 3 && i < size; ++i, ++j) {
|
|
8049
|
-
nextCh = data[i];
|
|
8050
|
-
if (!TranslateRegExpString.isHexadecimal(nextCh)) {
|
|
8051
|
-
break;
|
|
8052
|
-
}
|
|
8053
|
-
result += nextCh;
|
|
8054
|
-
}
|
|
8055
|
-
result += '}';
|
|
8056
|
-
changed = true;
|
|
8057
|
-
continue;
|
|
8058
|
-
} else if (nextCh === '{') {
|
|
8072
|
+
if (nextCh === '{') {
|
|
8059
8073
|
result += '\\x';
|
|
8060
8074
|
i += 2;
|
|
8061
8075
|
changed = true;
|
|
8062
8076
|
continue;
|
|
8077
|
+
} else if (i + 5 < size) {
|
|
8078
|
+
let isHex4 = true;
|
|
8079
|
+
for (let j = 0; j < 4; j++) {
|
|
8080
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8081
|
+
isHex4 = false;
|
|
8082
|
+
break;
|
|
8083
|
+
}
|
|
8084
|
+
}
|
|
8085
|
+
if (isHex4) {
|
|
8086
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8087
|
+
i += 6;
|
|
8088
|
+
changed = true;
|
|
8089
|
+
continue;
|
|
8090
|
+
}
|
|
8063
8091
|
}
|
|
8064
8092
|
}
|
|
8065
|
-
result += '
|
|
8093
|
+
result += 'u';
|
|
8066
8094
|
i += 2;
|
|
8095
|
+
changed = true;
|
|
8067
8096
|
continue;
|
|
8068
8097
|
}
|
|
8069
8098
|
default:
|
|
@@ -8298,7 +8327,7 @@
|
|
|
8298
8327
|
* @returns {Matcher}
|
|
8299
8328
|
*/
|
|
8300
8329
|
matcher(input) {
|
|
8301
|
-
if (
|
|
8330
|
+
if (Utils.isByteArray(input)) {
|
|
8302
8331
|
input = MatcherInput.utf8(input);
|
|
8303
8332
|
}
|
|
8304
8333
|
return new Matcher(this, input);
|
|
@@ -8314,7 +8343,7 @@
|
|
|
8314
8343
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8315
8344
|
*/
|
|
8316
8345
|
test(input) {
|
|
8317
|
-
if (
|
|
8346
|
+
if (Utils.isByteArray(input)) {
|
|
8318
8347
|
// Reuse the existing UTF-8 fast-path method
|
|
8319
8348
|
return this.re2Input.matchUTF8(input);
|
|
8320
8349
|
}
|
|
@@ -8333,7 +8362,7 @@
|
|
|
8333
8362
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8334
8363
|
*/
|
|
8335
8364
|
testExact(input) {
|
|
8336
|
-
const machineInput =
|
|
8365
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8337
8366
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8338
8367
|
}
|
|
8339
8368
|
|