re2js 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/build/index.cjs.cjs +76 -51
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +0 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +76 -51
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +76 -51
- package/build/index.umd.js.map +1 -1
- package/package.json +4 -2
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.2.
|
|
5
|
+
* @version v2.2.3
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -100,7 +100,7 @@
|
|
|
100
100
|
}
|
|
101
101
|
class Codepoint {
|
|
102
102
|
// codePointAt(0)
|
|
103
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
103
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
104
104
|
|
|
105
105
|
// convert unicode codepoint to upper case codepoint
|
|
106
106
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -154,10 +154,6 @@
|
|
|
154
154
|
getStride(index) {
|
|
155
155
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
156
156
|
}
|
|
157
|
-
get(index) {
|
|
158
|
-
const i = index * this.SIZE;
|
|
159
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
160
|
-
}
|
|
161
157
|
get length() {
|
|
162
158
|
return this.data.length / this.SIZE;
|
|
163
159
|
}
|
|
@@ -654,6 +650,9 @@
|
|
|
654
650
|
static emptyInts() {
|
|
655
651
|
return [];
|
|
656
652
|
}
|
|
653
|
+
static isByteArray(input) {
|
|
654
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
655
|
+
}
|
|
657
656
|
|
|
658
657
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
659
658
|
static isalnum(c) {
|
|
@@ -955,7 +954,7 @@
|
|
|
955
954
|
* @returns {number[]}
|
|
956
955
|
*/
|
|
957
956
|
asBytes() {
|
|
958
|
-
return this.charSequence.toString()
|
|
957
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
959
958
|
}
|
|
960
959
|
|
|
961
960
|
/**
|
|
@@ -980,7 +979,7 @@
|
|
|
980
979
|
* @returns {Utf8MatcherInput}
|
|
981
980
|
*/
|
|
982
981
|
static utf8(input) {
|
|
983
|
-
if (
|
|
982
|
+
if (Utils.isByteArray(input)) {
|
|
984
983
|
return new Utf8MatcherInput(input);
|
|
985
984
|
}
|
|
986
985
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1112,10 +1111,10 @@
|
|
|
1112
1111
|
if (start < this.start) {
|
|
1113
1112
|
start = this.start;
|
|
1114
1113
|
}
|
|
1115
|
-
r1 = this.step(start) >> 3;
|
|
1114
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1116
1115
|
}
|
|
1117
1116
|
}
|
|
1118
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1117
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1119
1118
|
return Utils.emptyOpContext(r1, r2);
|
|
1120
1119
|
}
|
|
1121
1120
|
|
|
@@ -1197,14 +1196,17 @@
|
|
|
1197
1196
|
index(re2, pos) {
|
|
1198
1197
|
pos += this.start;
|
|
1199
1198
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1200
|
-
|
|
1199
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1200
|
+
return -1;
|
|
1201
|
+
}
|
|
1202
|
+
return i - pos;
|
|
1201
1203
|
}
|
|
1202
1204
|
|
|
1203
1205
|
// Returns a bitmask of EMPTY_* flags.
|
|
1204
1206
|
context(pos) {
|
|
1205
1207
|
pos += this.start;
|
|
1206
|
-
const r1 = pos >
|
|
1207
|
-
const r2 = pos < this.
|
|
1208
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1209
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1208
1210
|
return Utils.emptyOpContext(r1, r2);
|
|
1209
1211
|
}
|
|
1210
1212
|
prefixLength(re2) {
|
|
@@ -1392,7 +1394,7 @@
|
|
|
1392
1394
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1393
1395
|
if (input instanceof MatcherInputBase) {
|
|
1394
1396
|
this.resetMatcherInput(input);
|
|
1395
|
-
} else if (
|
|
1397
|
+
} else if (Utils.isByteArray(input)) {
|
|
1396
1398
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1397
1399
|
} else {
|
|
1398
1400
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1554,10 +1556,7 @@
|
|
|
1554
1556
|
if (group === 0 || this.hasGroups) {
|
|
1555
1557
|
return;
|
|
1556
1558
|
}
|
|
1557
|
-
|
|
1558
|
-
if (end > this.matcherInputLength) {
|
|
1559
|
-
end = this.matcherInputLength;
|
|
1560
|
-
}
|
|
1559
|
+
const end = this.matcherInputLength;
|
|
1561
1560
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1562
1561
|
const ok = res[0];
|
|
1563
1562
|
if (!ok) {
|
|
@@ -1753,7 +1752,10 @@
|
|
|
1753
1752
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1754
1753
|
}
|
|
1755
1754
|
const groupName = replacement.substring(i + 1, j);
|
|
1756
|
-
|
|
1755
|
+
const groupVal = this.group(groupName);
|
|
1756
|
+
if (groupVal !== null) {
|
|
1757
|
+
res += groupVal;
|
|
1758
|
+
}
|
|
1757
1759
|
last = j + 1;
|
|
1758
1760
|
i = j;
|
|
1759
1761
|
continue;
|
|
@@ -1799,6 +1801,22 @@
|
|
|
1799
1801
|
i++;
|
|
1800
1802
|
last = i + 1;
|
|
1801
1803
|
continue;
|
|
1804
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1805
|
+
if (last < i) {
|
|
1806
|
+
res += replacement.substring(last, i);
|
|
1807
|
+
}
|
|
1808
|
+
res += this.substring(0, this.start(0));
|
|
1809
|
+
i++;
|
|
1810
|
+
last = i + 1;
|
|
1811
|
+
continue;
|
|
1812
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1813
|
+
if (last < i) {
|
|
1814
|
+
res += replacement.substring(last, i);
|
|
1815
|
+
}
|
|
1816
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1817
|
+
i++;
|
|
1818
|
+
last = i + 1;
|
|
1819
|
+
continue;
|
|
1802
1820
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1803
1821
|
let n = c - Codepoint.CODES.get('0');
|
|
1804
1822
|
if (last < i) {
|
|
@@ -1841,7 +1859,10 @@
|
|
|
1841
1859
|
}
|
|
1842
1860
|
const groupName = replacement.substring(i + 1, j);
|
|
1843
1861
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1844
|
-
|
|
1862
|
+
const groupVal = this.group(groupName);
|
|
1863
|
+
if (groupVal !== null) {
|
|
1864
|
+
res += groupVal;
|
|
1865
|
+
}
|
|
1845
1866
|
} else {
|
|
1846
1867
|
res += `$<${groupName}>`;
|
|
1847
1868
|
}
|
|
@@ -4343,13 +4364,6 @@
|
|
|
4343
4364
|
// start every program with a fail instruction, so we'll never want to point
|
|
4344
4365
|
// at its output link.
|
|
4345
4366
|
|
|
4346
|
-
next(l) {
|
|
4347
|
-
const i = this.inst[l >> 1];
|
|
4348
|
-
if ((l & 1) === 0) {
|
|
4349
|
-
return i.out;
|
|
4350
|
-
}
|
|
4351
|
-
return i.arg;
|
|
4352
|
-
}
|
|
4353
4367
|
patch(l, val) {
|
|
4354
4368
|
let head = l.head;
|
|
4355
4369
|
while (head !== 0) {
|
|
@@ -5679,6 +5693,7 @@
|
|
|
5679
5693
|
case Codepoint.CODES.get('6'):
|
|
5680
5694
|
case Codepoint.CODES.get('7'):
|
|
5681
5695
|
{
|
|
5696
|
+
// Single non-zero digit is a backreference; not supported
|
|
5682
5697
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5683
5698
|
break;
|
|
5684
5699
|
}
|
|
@@ -5686,6 +5701,7 @@
|
|
|
5686
5701
|
// eslint-disable-next-line no-fallthrough
|
|
5687
5702
|
case Codepoint.CODES.get('0'):
|
|
5688
5703
|
{
|
|
5704
|
+
// Consume up to three octal digits; already have one.
|
|
5689
5705
|
let r = c - Codepoint.CODES.get('0');
|
|
5690
5706
|
for (let i = 1; i < 3; i++) {
|
|
5691
5707
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7466,7 +7482,11 @@
|
|
|
7466
7482
|
*/
|
|
7467
7483
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7468
7484
|
if (!(input instanceof MatcherInputBase)) {
|
|
7469
|
-
|
|
7485
|
+
if (Utils.isByteArray(input)) {
|
|
7486
|
+
input = MatcherInput.utf8(input);
|
|
7487
|
+
} else {
|
|
7488
|
+
input = MatcherInput.utf16(input);
|
|
7489
|
+
}
|
|
7470
7490
|
}
|
|
7471
7491
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7472
7492
|
}
|
|
@@ -7972,7 +7992,7 @@
|
|
|
7972
7992
|
}
|
|
7973
7993
|
match(input) {
|
|
7974
7994
|
if (!this.prog) this.compile();
|
|
7975
|
-
const machineInput =
|
|
7995
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7976
7996
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7977
7997
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7978
7998
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -8029,45 +8049,50 @@
|
|
|
8029
8049
|
{
|
|
8030
8050
|
if (i + 2 < size) {
|
|
8031
8051
|
let nextCh = data[i + 2];
|
|
8032
|
-
|
|
8052
|
+
let code = nextCh.charCodeAt(0);
|
|
8053
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8054
|
+
let val = code % 32;
|
|
8033
8055
|
result += '\\x';
|
|
8034
|
-
result += (
|
|
8035
|
-
result += (
|
|
8056
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8057
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8036
8058
|
i += 3;
|
|
8037
8059
|
changed = true;
|
|
8038
8060
|
continue;
|
|
8039
8061
|
}
|
|
8040
8062
|
}
|
|
8041
|
-
result += '
|
|
8063
|
+
result += 'c';
|
|
8042
8064
|
i += 2;
|
|
8065
|
+
changed = true;
|
|
8043
8066
|
continue;
|
|
8044
8067
|
}
|
|
8045
8068
|
case 'u':
|
|
8046
8069
|
{
|
|
8047
8070
|
if (i + 2 < size) {
|
|
8048
8071
|
let nextCh = data[i + 2];
|
|
8049
|
-
if (
|
|
8050
|
-
result += '\\x{' + nextCh;
|
|
8051
|
-
i += 3;
|
|
8052
|
-
for (let j = 0; j < 3 && i < size; ++i, ++j) {
|
|
8053
|
-
nextCh = data[i];
|
|
8054
|
-
if (!TranslateRegExpString.isHexadecimal(nextCh)) {
|
|
8055
|
-
break;
|
|
8056
|
-
}
|
|
8057
|
-
result += nextCh;
|
|
8058
|
-
}
|
|
8059
|
-
result += '}';
|
|
8060
|
-
changed = true;
|
|
8061
|
-
continue;
|
|
8062
|
-
} else if (nextCh === '{') {
|
|
8072
|
+
if (nextCh === '{') {
|
|
8063
8073
|
result += '\\x';
|
|
8064
8074
|
i += 2;
|
|
8065
8075
|
changed = true;
|
|
8066
8076
|
continue;
|
|
8077
|
+
} else if (i + 5 < size) {
|
|
8078
|
+
let isHex4 = true;
|
|
8079
|
+
for (let j = 0; j < 4; j++) {
|
|
8080
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8081
|
+
isHex4 = false;
|
|
8082
|
+
break;
|
|
8083
|
+
}
|
|
8084
|
+
}
|
|
8085
|
+
if (isHex4) {
|
|
8086
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8087
|
+
i += 6;
|
|
8088
|
+
changed = true;
|
|
8089
|
+
continue;
|
|
8090
|
+
}
|
|
8067
8091
|
}
|
|
8068
8092
|
}
|
|
8069
|
-
result += '
|
|
8093
|
+
result += 'u';
|
|
8070
8094
|
i += 2;
|
|
8095
|
+
changed = true;
|
|
8071
8096
|
continue;
|
|
8072
8097
|
}
|
|
8073
8098
|
default:
|
|
@@ -8302,7 +8327,7 @@
|
|
|
8302
8327
|
* @returns {Matcher}
|
|
8303
8328
|
*/
|
|
8304
8329
|
matcher(input) {
|
|
8305
|
-
if (
|
|
8330
|
+
if (Utils.isByteArray(input)) {
|
|
8306
8331
|
input = MatcherInput.utf8(input);
|
|
8307
8332
|
}
|
|
8308
8333
|
return new Matcher(this, input);
|
|
@@ -8318,7 +8343,7 @@
|
|
|
8318
8343
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8319
8344
|
*/
|
|
8320
8345
|
test(input) {
|
|
8321
|
-
if (
|
|
8346
|
+
if (Utils.isByteArray(input)) {
|
|
8322
8347
|
// Reuse the existing UTF-8 fast-path method
|
|
8323
8348
|
return this.re2Input.matchUTF8(input);
|
|
8324
8349
|
}
|
|
@@ -8337,7 +8362,7 @@
|
|
|
8337
8362
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8338
8363
|
*/
|
|
8339
8364
|
testExact(input) {
|
|
8340
|
-
const machineInput =
|
|
8365
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8341
8366
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8342
8367
|
}
|
|
8343
8368
|
|