re2js 2.2.1 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +2 -0
- package/build/index.cjs.cjs +83 -54
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +0 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +83 -54
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +83 -54
- package/build/index.umd.js.map +1 -1
- package/package.json +5 -3
package/LICENSE
CHANGED
package/README.md
CHANGED
|
@@ -473,6 +473,8 @@ Parameters:
|
|
|
473
473
|
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
474
474
|
- `$$` inserts a literal `$`
|
|
475
475
|
- `$<name>` can be used to reference named capture groups
|
|
476
|
+
- `` $` `` inserts the portion of the string that precedes the matched substring
|
|
477
|
+
- `$'` inserts the portion of the string that follows the matched substring
|
|
476
478
|
- on invalid group - ignore it
|
|
477
479
|
- `javaMode (Boolean)`: If set to `true`, the replacement follows Java's rules for replacement. Defaults to `false`. If `javaMode = true`, changed rules for capture groups and special characters:
|
|
478
480
|
- `$0` refers to the entire matched substring
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.2.
|
|
6
|
-
* @author
|
|
5
|
+
* @version v2.2.3
|
|
6
|
+
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
9
9
|
* @license MIT
|
|
@@ -96,7 +96,7 @@ for (let i = 0; i < ASCII_SIZE; i++) {
|
|
|
96
96
|
}
|
|
97
97
|
class Codepoint {
|
|
98
98
|
// codePointAt(0)
|
|
99
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
99
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
100
100
|
|
|
101
101
|
// convert unicode codepoint to upper case codepoint
|
|
102
102
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -150,10 +150,6 @@ class UnicodeRangeTable {
|
|
|
150
150
|
getStride(index) {
|
|
151
151
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
152
152
|
}
|
|
153
|
-
get(index) {
|
|
154
|
-
const i = index * this.SIZE;
|
|
155
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
156
|
-
}
|
|
157
153
|
get length() {
|
|
158
154
|
return this.data.length / this.SIZE;
|
|
159
155
|
}
|
|
@@ -650,6 +646,9 @@ class Utils {
|
|
|
650
646
|
static emptyInts() {
|
|
651
647
|
return [];
|
|
652
648
|
}
|
|
649
|
+
static isByteArray(input) {
|
|
650
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
651
|
+
}
|
|
653
652
|
|
|
654
653
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
655
654
|
static isalnum(c) {
|
|
@@ -951,7 +950,7 @@ class Utf16MatcherInput extends MatcherInputBase {
|
|
|
951
950
|
* @returns {number[]}
|
|
952
951
|
*/
|
|
953
952
|
asBytes() {
|
|
954
|
-
return this.charSequence.toString()
|
|
953
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
955
954
|
}
|
|
956
955
|
|
|
957
956
|
/**
|
|
@@ -976,7 +975,7 @@ class MatcherInput {
|
|
|
976
975
|
* @returns {Utf8MatcherInput}
|
|
977
976
|
*/
|
|
978
977
|
static utf8(input) {
|
|
979
|
-
if (
|
|
978
|
+
if (Utils.isByteArray(input)) {
|
|
980
979
|
return new Utf8MatcherInput(input);
|
|
981
980
|
}
|
|
982
981
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1108,10 +1107,10 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1108
1107
|
if (start < this.start) {
|
|
1109
1108
|
start = this.start;
|
|
1110
1109
|
}
|
|
1111
|
-
r1 = this.step(start) >> 3;
|
|
1110
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1112
1111
|
}
|
|
1113
1112
|
}
|
|
1114
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1113
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1115
1114
|
return Utils.emptyOpContext(r1, r2);
|
|
1116
1115
|
}
|
|
1117
1116
|
|
|
@@ -1193,14 +1192,17 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1193
1192
|
index(re2, pos) {
|
|
1194
1193
|
pos += this.start;
|
|
1195
1194
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1196
|
-
|
|
1195
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1196
|
+
return -1;
|
|
1197
|
+
}
|
|
1198
|
+
return i - pos;
|
|
1197
1199
|
}
|
|
1198
1200
|
|
|
1199
1201
|
// Returns a bitmask of EMPTY_* flags.
|
|
1200
1202
|
context(pos) {
|
|
1201
1203
|
pos += this.start;
|
|
1202
|
-
const r1 = pos >
|
|
1203
|
-
const r2 = pos < this.
|
|
1204
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1205
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1204
1206
|
return Utils.emptyOpContext(r1, r2);
|
|
1205
1207
|
}
|
|
1206
1208
|
prefixLength(re2) {
|
|
@@ -1388,7 +1390,7 @@ class Matcher {
|
|
|
1388
1390
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1389
1391
|
if (input instanceof MatcherInputBase) {
|
|
1390
1392
|
this.resetMatcherInput(input);
|
|
1391
|
-
} else if (
|
|
1393
|
+
} else if (Utils.isByteArray(input)) {
|
|
1392
1394
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1393
1395
|
} else {
|
|
1394
1396
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1550,10 +1552,7 @@ class Matcher {
|
|
|
1550
1552
|
if (group === 0 || this.hasGroups) {
|
|
1551
1553
|
return;
|
|
1552
1554
|
}
|
|
1553
|
-
|
|
1554
|
-
if (end > this.matcherInputLength) {
|
|
1555
|
-
end = this.matcherInputLength;
|
|
1556
|
-
}
|
|
1555
|
+
const end = this.matcherInputLength;
|
|
1557
1556
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1558
1557
|
const ok = res[0];
|
|
1559
1558
|
if (!ok) {
|
|
@@ -1749,7 +1748,10 @@ class Matcher {
|
|
|
1749
1748
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1750
1749
|
}
|
|
1751
1750
|
const groupName = replacement.substring(i + 1, j);
|
|
1752
|
-
|
|
1751
|
+
const groupVal = this.group(groupName);
|
|
1752
|
+
if (groupVal !== null) {
|
|
1753
|
+
res += groupVal;
|
|
1754
|
+
}
|
|
1753
1755
|
last = j + 1;
|
|
1754
1756
|
i = j;
|
|
1755
1757
|
continue;
|
|
@@ -1795,6 +1797,22 @@ class Matcher {
|
|
|
1795
1797
|
i++;
|
|
1796
1798
|
last = i + 1;
|
|
1797
1799
|
continue;
|
|
1800
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1801
|
+
if (last < i) {
|
|
1802
|
+
res += replacement.substring(last, i);
|
|
1803
|
+
}
|
|
1804
|
+
res += this.substring(0, this.start(0));
|
|
1805
|
+
i++;
|
|
1806
|
+
last = i + 1;
|
|
1807
|
+
continue;
|
|
1808
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1809
|
+
if (last < i) {
|
|
1810
|
+
res += replacement.substring(last, i);
|
|
1811
|
+
}
|
|
1812
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1813
|
+
i++;
|
|
1814
|
+
last = i + 1;
|
|
1815
|
+
continue;
|
|
1798
1816
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1799
1817
|
let n = c - Codepoint.CODES.get('0');
|
|
1800
1818
|
if (last < i) {
|
|
@@ -1837,7 +1855,10 @@ class Matcher {
|
|
|
1837
1855
|
}
|
|
1838
1856
|
const groupName = replacement.substring(i + 1, j);
|
|
1839
1857
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1840
|
-
|
|
1858
|
+
const groupVal = this.group(groupName);
|
|
1859
|
+
if (groupVal !== null) {
|
|
1860
|
+
res += groupVal;
|
|
1861
|
+
}
|
|
1841
1862
|
} else {
|
|
1842
1863
|
res += `$<${groupName}>`;
|
|
1843
1864
|
}
|
|
@@ -3343,7 +3364,9 @@ const makeOnePass = p => {
|
|
|
3343
3364
|
}
|
|
3344
3365
|
runes.sort((a, b) => a - b);
|
|
3345
3366
|
} else {
|
|
3346
|
-
|
|
3367
|
+
for (let j = 0; j < inst.runes.length; j++) {
|
|
3368
|
+
runes.push(inst.runes[j]);
|
|
3369
|
+
}
|
|
3347
3370
|
}
|
|
3348
3371
|
onePassRunes[pc] = runes;
|
|
3349
3372
|
inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
|
|
@@ -4170,7 +4193,9 @@ class PrefilterTree {
|
|
|
4170
4193
|
return new Prefilter(Prefilter.Type.NONE);
|
|
4171
4194
|
}
|
|
4172
4195
|
if (s.type === Prefilter.Type.OR) {
|
|
4173
|
-
|
|
4196
|
+
for (let j = 0; j < s.subs.length; j++) {
|
|
4197
|
+
newSubs.push(s.subs[j]);
|
|
4198
|
+
}
|
|
4174
4199
|
} else {
|
|
4175
4200
|
newSubs.push(s);
|
|
4176
4201
|
}
|
|
@@ -4335,13 +4360,6 @@ class Prog {
|
|
|
4335
4360
|
// start every program with a fail instruction, so we'll never want to point
|
|
4336
4361
|
// at its output link.
|
|
4337
4362
|
|
|
4338
|
-
next(l) {
|
|
4339
|
-
const i = this.inst[l >> 1];
|
|
4340
|
-
if ((l & 1) === 0) {
|
|
4341
|
-
return i.out;
|
|
4342
|
-
}
|
|
4343
|
-
return i.arg;
|
|
4344
|
-
}
|
|
4345
4363
|
patch(l, val) {
|
|
4346
4364
|
let head = l.head;
|
|
4347
4365
|
while (head !== 0) {
|
|
@@ -5671,6 +5689,7 @@ class Parser {
|
|
|
5671
5689
|
case Codepoint.CODES.get('6'):
|
|
5672
5690
|
case Codepoint.CODES.get('7'):
|
|
5673
5691
|
{
|
|
5692
|
+
// Single non-zero digit is a backreference; not supported
|
|
5674
5693
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5675
5694
|
break;
|
|
5676
5695
|
}
|
|
@@ -5678,6 +5697,7 @@ class Parser {
|
|
|
5678
5697
|
// eslint-disable-next-line no-fallthrough
|
|
5679
5698
|
case Codepoint.CODES.get('0'):
|
|
5680
5699
|
{
|
|
5700
|
+
// Consume up to three octal digits; already have one.
|
|
5681
5701
|
let r = c - Codepoint.CODES.get('0');
|
|
5682
5702
|
for (let i = 1; i < 3; i++) {
|
|
5683
5703
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7458,7 +7478,11 @@ class RE2 {
|
|
|
7458
7478
|
*/
|
|
7459
7479
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7460
7480
|
if (!(input instanceof MatcherInputBase)) {
|
|
7461
|
-
|
|
7481
|
+
if (Utils.isByteArray(input)) {
|
|
7482
|
+
input = MatcherInput.utf8(input);
|
|
7483
|
+
} else {
|
|
7484
|
+
input = MatcherInput.utf16(input);
|
|
7485
|
+
}
|
|
7462
7486
|
}
|
|
7463
7487
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7464
7488
|
}
|
|
@@ -7964,7 +7988,7 @@ class RE2Set {
|
|
|
7964
7988
|
}
|
|
7965
7989
|
match(input) {
|
|
7966
7990
|
if (!this.prog) this.compile();
|
|
7967
|
-
const machineInput =
|
|
7991
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7968
7992
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7969
7993
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7970
7994
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -8021,45 +8045,50 @@ class TranslateRegExpString {
|
|
|
8021
8045
|
{
|
|
8022
8046
|
if (i + 2 < size) {
|
|
8023
8047
|
let nextCh = data[i + 2];
|
|
8024
|
-
|
|
8048
|
+
let code = nextCh.charCodeAt(0);
|
|
8049
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8050
|
+
let val = code % 32;
|
|
8025
8051
|
result += '\\x';
|
|
8026
|
-
result += (
|
|
8027
|
-
result += (
|
|
8052
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8053
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8028
8054
|
i += 3;
|
|
8029
8055
|
changed = true;
|
|
8030
8056
|
continue;
|
|
8031
8057
|
}
|
|
8032
8058
|
}
|
|
8033
|
-
result += '
|
|
8059
|
+
result += 'c';
|
|
8034
8060
|
i += 2;
|
|
8061
|
+
changed = true;
|
|
8035
8062
|
continue;
|
|
8036
8063
|
}
|
|
8037
8064
|
case 'u':
|
|
8038
8065
|
{
|
|
8039
8066
|
if (i + 2 < size) {
|
|
8040
8067
|
let nextCh = data[i + 2];
|
|
8041
|
-
if (
|
|
8042
|
-
result += '\\x{' + nextCh;
|
|
8043
|
-
i += 3;
|
|
8044
|
-
for (let j = 0; j < 3 && i < size; ++i, ++j) {
|
|
8045
|
-
nextCh = data[i];
|
|
8046
|
-
if (!TranslateRegExpString.isHexadecimal(nextCh)) {
|
|
8047
|
-
break;
|
|
8048
|
-
}
|
|
8049
|
-
result += nextCh;
|
|
8050
|
-
}
|
|
8051
|
-
result += '}';
|
|
8052
|
-
changed = true;
|
|
8053
|
-
continue;
|
|
8054
|
-
} else if (nextCh === '{') {
|
|
8068
|
+
if (nextCh === '{') {
|
|
8055
8069
|
result += '\\x';
|
|
8056
8070
|
i += 2;
|
|
8057
8071
|
changed = true;
|
|
8058
8072
|
continue;
|
|
8073
|
+
} else if (i + 5 < size) {
|
|
8074
|
+
let isHex4 = true;
|
|
8075
|
+
for (let j = 0; j < 4; j++) {
|
|
8076
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8077
|
+
isHex4 = false;
|
|
8078
|
+
break;
|
|
8079
|
+
}
|
|
8080
|
+
}
|
|
8081
|
+
if (isHex4) {
|
|
8082
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8083
|
+
i += 6;
|
|
8084
|
+
changed = true;
|
|
8085
|
+
continue;
|
|
8086
|
+
}
|
|
8059
8087
|
}
|
|
8060
8088
|
}
|
|
8061
|
-
result += '
|
|
8089
|
+
result += 'u';
|
|
8062
8090
|
i += 2;
|
|
8091
|
+
changed = true;
|
|
8063
8092
|
continue;
|
|
8064
8093
|
}
|
|
8065
8094
|
default:
|
|
@@ -8294,7 +8323,7 @@ class RE2JS {
|
|
|
8294
8323
|
* @returns {Matcher}
|
|
8295
8324
|
*/
|
|
8296
8325
|
matcher(input) {
|
|
8297
|
-
if (
|
|
8326
|
+
if (Utils.isByteArray(input)) {
|
|
8298
8327
|
input = MatcherInput.utf8(input);
|
|
8299
8328
|
}
|
|
8300
8329
|
return new Matcher(this, input);
|
|
@@ -8310,7 +8339,7 @@ class RE2JS {
|
|
|
8310
8339
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8311
8340
|
*/
|
|
8312
8341
|
test(input) {
|
|
8313
|
-
if (
|
|
8342
|
+
if (Utils.isByteArray(input)) {
|
|
8314
8343
|
// Reuse the existing UTF-8 fast-path method
|
|
8315
8344
|
return this.re2Input.matchUTF8(input);
|
|
8316
8345
|
}
|
|
@@ -8329,7 +8358,7 @@ class RE2JS {
|
|
|
8329
8358
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8330
8359
|
*/
|
|
8331
8360
|
testExact(input) {
|
|
8332
|
-
const machineInput =
|
|
8361
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8333
8362
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8334
8363
|
}
|
|
8335
8364
|
|