re2js 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/build/index.cjs.cjs +76 -51
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +0 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +76 -51
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +76 -51
- package/build/index.umd.js.map +1 -1
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -473,6 +473,8 @@ Parameters:
|
|
|
473
473
|
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
474
474
|
- `$$` inserts a literal `$`
|
|
475
475
|
- `$<name>` can be used to reference named capture groups
|
|
476
|
+
- `` $` `` inserts the portion of the string that precedes the matched substring
|
|
477
|
+
- `$'` inserts the portion of the string that follows the matched substring
|
|
476
478
|
- on invalid group - ignore it
|
|
477
479
|
- `javaMode (Boolean)`: If set to `true`, the replacement follows Java's rules for replacement. Defaults to `false`. If `javaMode = true`, changed rules for capture groups and special characters:
|
|
478
480
|
- `$0` refers to the entire matched substring
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.2.
|
|
5
|
+
* @version v2.2.3
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -96,7 +96,7 @@ for (let i = 0; i < ASCII_SIZE; i++) {
|
|
|
96
96
|
}
|
|
97
97
|
class Codepoint {
|
|
98
98
|
// codePointAt(0)
|
|
99
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
99
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
100
100
|
|
|
101
101
|
// convert unicode codepoint to upper case codepoint
|
|
102
102
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -150,10 +150,6 @@ class UnicodeRangeTable {
|
|
|
150
150
|
getStride(index) {
|
|
151
151
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
152
152
|
}
|
|
153
|
-
get(index) {
|
|
154
|
-
const i = index * this.SIZE;
|
|
155
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
156
|
-
}
|
|
157
153
|
get length() {
|
|
158
154
|
return this.data.length / this.SIZE;
|
|
159
155
|
}
|
|
@@ -650,6 +646,9 @@ class Utils {
|
|
|
650
646
|
static emptyInts() {
|
|
651
647
|
return [];
|
|
652
648
|
}
|
|
649
|
+
static isByteArray(input) {
|
|
650
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
651
|
+
}
|
|
653
652
|
|
|
654
653
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
655
654
|
static isalnum(c) {
|
|
@@ -951,7 +950,7 @@ class Utf16MatcherInput extends MatcherInputBase {
|
|
|
951
950
|
* @returns {number[]}
|
|
952
951
|
*/
|
|
953
952
|
asBytes() {
|
|
954
|
-
return this.charSequence.toString()
|
|
953
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
955
954
|
}
|
|
956
955
|
|
|
957
956
|
/**
|
|
@@ -976,7 +975,7 @@ class MatcherInput {
|
|
|
976
975
|
* @returns {Utf8MatcherInput}
|
|
977
976
|
*/
|
|
978
977
|
static utf8(input) {
|
|
979
|
-
if (
|
|
978
|
+
if (Utils.isByteArray(input)) {
|
|
980
979
|
return new Utf8MatcherInput(input);
|
|
981
980
|
}
|
|
982
981
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1108,10 +1107,10 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1108
1107
|
if (start < this.start) {
|
|
1109
1108
|
start = this.start;
|
|
1110
1109
|
}
|
|
1111
|
-
r1 = this.step(start) >> 3;
|
|
1110
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1112
1111
|
}
|
|
1113
1112
|
}
|
|
1114
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1113
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1115
1114
|
return Utils.emptyOpContext(r1, r2);
|
|
1116
1115
|
}
|
|
1117
1116
|
|
|
@@ -1193,14 +1192,17 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1193
1192
|
index(re2, pos) {
|
|
1194
1193
|
pos += this.start;
|
|
1195
1194
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1196
|
-
|
|
1195
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1196
|
+
return -1;
|
|
1197
|
+
}
|
|
1198
|
+
return i - pos;
|
|
1197
1199
|
}
|
|
1198
1200
|
|
|
1199
1201
|
// Returns a bitmask of EMPTY_* flags.
|
|
1200
1202
|
context(pos) {
|
|
1201
1203
|
pos += this.start;
|
|
1202
|
-
const r1 = pos >
|
|
1203
|
-
const r2 = pos < this.
|
|
1204
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1205
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1204
1206
|
return Utils.emptyOpContext(r1, r2);
|
|
1205
1207
|
}
|
|
1206
1208
|
prefixLength(re2) {
|
|
@@ -1388,7 +1390,7 @@ class Matcher {
|
|
|
1388
1390
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1389
1391
|
if (input instanceof MatcherInputBase) {
|
|
1390
1392
|
this.resetMatcherInput(input);
|
|
1391
|
-
} else if (
|
|
1393
|
+
} else if (Utils.isByteArray(input)) {
|
|
1392
1394
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1393
1395
|
} else {
|
|
1394
1396
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1550,10 +1552,7 @@ class Matcher {
|
|
|
1550
1552
|
if (group === 0 || this.hasGroups) {
|
|
1551
1553
|
return;
|
|
1552
1554
|
}
|
|
1553
|
-
|
|
1554
|
-
if (end > this.matcherInputLength) {
|
|
1555
|
-
end = this.matcherInputLength;
|
|
1556
|
-
}
|
|
1555
|
+
const end = this.matcherInputLength;
|
|
1557
1556
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1558
1557
|
const ok = res[0];
|
|
1559
1558
|
if (!ok) {
|
|
@@ -1749,7 +1748,10 @@ class Matcher {
|
|
|
1749
1748
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1750
1749
|
}
|
|
1751
1750
|
const groupName = replacement.substring(i + 1, j);
|
|
1752
|
-
|
|
1751
|
+
const groupVal = this.group(groupName);
|
|
1752
|
+
if (groupVal !== null) {
|
|
1753
|
+
res += groupVal;
|
|
1754
|
+
}
|
|
1753
1755
|
last = j + 1;
|
|
1754
1756
|
i = j;
|
|
1755
1757
|
continue;
|
|
@@ -1795,6 +1797,22 @@ class Matcher {
|
|
|
1795
1797
|
i++;
|
|
1796
1798
|
last = i + 1;
|
|
1797
1799
|
continue;
|
|
1800
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1801
|
+
if (last < i) {
|
|
1802
|
+
res += replacement.substring(last, i);
|
|
1803
|
+
}
|
|
1804
|
+
res += this.substring(0, this.start(0));
|
|
1805
|
+
i++;
|
|
1806
|
+
last = i + 1;
|
|
1807
|
+
continue;
|
|
1808
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1809
|
+
if (last < i) {
|
|
1810
|
+
res += replacement.substring(last, i);
|
|
1811
|
+
}
|
|
1812
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1813
|
+
i++;
|
|
1814
|
+
last = i + 1;
|
|
1815
|
+
continue;
|
|
1798
1816
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1799
1817
|
let n = c - Codepoint.CODES.get('0');
|
|
1800
1818
|
if (last < i) {
|
|
@@ -1837,7 +1855,10 @@ class Matcher {
|
|
|
1837
1855
|
}
|
|
1838
1856
|
const groupName = replacement.substring(i + 1, j);
|
|
1839
1857
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1840
|
-
|
|
1858
|
+
const groupVal = this.group(groupName);
|
|
1859
|
+
if (groupVal !== null) {
|
|
1860
|
+
res += groupVal;
|
|
1861
|
+
}
|
|
1841
1862
|
} else {
|
|
1842
1863
|
res += `$<${groupName}>`;
|
|
1843
1864
|
}
|
|
@@ -4339,13 +4360,6 @@ class Prog {
|
|
|
4339
4360
|
// start every program with a fail instruction, so we'll never want to point
|
|
4340
4361
|
// at its output link.
|
|
4341
4362
|
|
|
4342
|
-
next(l) {
|
|
4343
|
-
const i = this.inst[l >> 1];
|
|
4344
|
-
if ((l & 1) === 0) {
|
|
4345
|
-
return i.out;
|
|
4346
|
-
}
|
|
4347
|
-
return i.arg;
|
|
4348
|
-
}
|
|
4349
4363
|
patch(l, val) {
|
|
4350
4364
|
let head = l.head;
|
|
4351
4365
|
while (head !== 0) {
|
|
@@ -5675,6 +5689,7 @@ class Parser {
|
|
|
5675
5689
|
case Codepoint.CODES.get('6'):
|
|
5676
5690
|
case Codepoint.CODES.get('7'):
|
|
5677
5691
|
{
|
|
5692
|
+
// Single non-zero digit is a backreference; not supported
|
|
5678
5693
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5679
5694
|
break;
|
|
5680
5695
|
}
|
|
@@ -5682,6 +5697,7 @@ class Parser {
|
|
|
5682
5697
|
// eslint-disable-next-line no-fallthrough
|
|
5683
5698
|
case Codepoint.CODES.get('0'):
|
|
5684
5699
|
{
|
|
5700
|
+
// Consume up to three octal digits; already have one.
|
|
5685
5701
|
let r = c - Codepoint.CODES.get('0');
|
|
5686
5702
|
for (let i = 1; i < 3; i++) {
|
|
5687
5703
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7462,7 +7478,11 @@ class RE2 {
|
|
|
7462
7478
|
*/
|
|
7463
7479
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7464
7480
|
if (!(input instanceof MatcherInputBase)) {
|
|
7465
|
-
|
|
7481
|
+
if (Utils.isByteArray(input)) {
|
|
7482
|
+
input = MatcherInput.utf8(input);
|
|
7483
|
+
} else {
|
|
7484
|
+
input = MatcherInput.utf16(input);
|
|
7485
|
+
}
|
|
7466
7486
|
}
|
|
7467
7487
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7468
7488
|
}
|
|
@@ -7968,7 +7988,7 @@ class RE2Set {
|
|
|
7968
7988
|
}
|
|
7969
7989
|
match(input) {
|
|
7970
7990
|
if (!this.prog) this.compile();
|
|
7971
|
-
const machineInput =
|
|
7991
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7972
7992
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7973
7993
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7974
7994
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -8025,45 +8045,50 @@ class TranslateRegExpString {
|
|
|
8025
8045
|
{
|
|
8026
8046
|
if (i + 2 < size) {
|
|
8027
8047
|
let nextCh = data[i + 2];
|
|
8028
|
-
|
|
8048
|
+
let code = nextCh.charCodeAt(0);
|
|
8049
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8050
|
+
let val = code % 32;
|
|
8029
8051
|
result += '\\x';
|
|
8030
|
-
result += (
|
|
8031
|
-
result += (
|
|
8052
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8053
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8032
8054
|
i += 3;
|
|
8033
8055
|
changed = true;
|
|
8034
8056
|
continue;
|
|
8035
8057
|
}
|
|
8036
8058
|
}
|
|
8037
|
-
result += '
|
|
8059
|
+
result += 'c';
|
|
8038
8060
|
i += 2;
|
|
8061
|
+
changed = true;
|
|
8039
8062
|
continue;
|
|
8040
8063
|
}
|
|
8041
8064
|
case 'u':
|
|
8042
8065
|
{
|
|
8043
8066
|
if (i + 2 < size) {
|
|
8044
8067
|
let nextCh = data[i + 2];
|
|
8045
|
-
if (
|
|
8046
|
-
result += '\\x{' + nextCh;
|
|
8047
|
-
i += 3;
|
|
8048
|
-
for (let j = 0; j < 3 && i < size; ++i, ++j) {
|
|
8049
|
-
nextCh = data[i];
|
|
8050
|
-
if (!TranslateRegExpString.isHexadecimal(nextCh)) {
|
|
8051
|
-
break;
|
|
8052
|
-
}
|
|
8053
|
-
result += nextCh;
|
|
8054
|
-
}
|
|
8055
|
-
result += '}';
|
|
8056
|
-
changed = true;
|
|
8057
|
-
continue;
|
|
8058
|
-
} else if (nextCh === '{') {
|
|
8068
|
+
if (nextCh === '{') {
|
|
8059
8069
|
result += '\\x';
|
|
8060
8070
|
i += 2;
|
|
8061
8071
|
changed = true;
|
|
8062
8072
|
continue;
|
|
8073
|
+
} else if (i + 5 < size) {
|
|
8074
|
+
let isHex4 = true;
|
|
8075
|
+
for (let j = 0; j < 4; j++) {
|
|
8076
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8077
|
+
isHex4 = false;
|
|
8078
|
+
break;
|
|
8079
|
+
}
|
|
8080
|
+
}
|
|
8081
|
+
if (isHex4) {
|
|
8082
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8083
|
+
i += 6;
|
|
8084
|
+
changed = true;
|
|
8085
|
+
continue;
|
|
8086
|
+
}
|
|
8063
8087
|
}
|
|
8064
8088
|
}
|
|
8065
|
-
result += '
|
|
8089
|
+
result += 'u';
|
|
8066
8090
|
i += 2;
|
|
8091
|
+
changed = true;
|
|
8067
8092
|
continue;
|
|
8068
8093
|
}
|
|
8069
8094
|
default:
|
|
@@ -8298,7 +8323,7 @@ class RE2JS {
|
|
|
8298
8323
|
* @returns {Matcher}
|
|
8299
8324
|
*/
|
|
8300
8325
|
matcher(input) {
|
|
8301
|
-
if (
|
|
8326
|
+
if (Utils.isByteArray(input)) {
|
|
8302
8327
|
input = MatcherInput.utf8(input);
|
|
8303
8328
|
}
|
|
8304
8329
|
return new Matcher(this, input);
|
|
@@ -8314,7 +8339,7 @@ class RE2JS {
|
|
|
8314
8339
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8315
8340
|
*/
|
|
8316
8341
|
test(input) {
|
|
8317
|
-
if (
|
|
8342
|
+
if (Utils.isByteArray(input)) {
|
|
8318
8343
|
// Reuse the existing UTF-8 fast-path method
|
|
8319
8344
|
return this.re2Input.matchUTF8(input);
|
|
8320
8345
|
}
|
|
@@ -8333,7 +8358,7 @@ class RE2JS {
|
|
|
8333
8358
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8334
8359
|
*/
|
|
8335
8360
|
testExact(input) {
|
|
8336
|
-
const machineInput =
|
|
8361
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8337
8362
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8338
8363
|
}
|
|
8339
8364
|
|