re2js 2.2.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/build/index.cjs.cjs +256 -74
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +54 -79
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +256 -74
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +256 -74
- package/build/index.umd.js.map +1 -1
- package/package.json +4 -2
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -94,7 +94,7 @@ for (let i = 0; i < ASCII_SIZE; i++) {
|
|
|
94
94
|
}
|
|
95
95
|
class Codepoint {
|
|
96
96
|
// codePointAt(0)
|
|
97
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
97
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
98
98
|
|
|
99
99
|
// convert unicode codepoint to upper case codepoint
|
|
100
100
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -148,10 +148,6 @@ class UnicodeRangeTable {
|
|
|
148
148
|
getStride(index) {
|
|
149
149
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
150
150
|
}
|
|
151
|
-
get(index) {
|
|
152
|
-
const i = index * this.SIZE;
|
|
153
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
154
|
-
}
|
|
155
151
|
get length() {
|
|
156
152
|
return this.data.length / this.SIZE;
|
|
157
153
|
}
|
|
@@ -648,6 +644,9 @@ class Utils {
|
|
|
648
644
|
static emptyInts() {
|
|
649
645
|
return [];
|
|
650
646
|
}
|
|
647
|
+
static isByteArray(input) {
|
|
648
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
649
|
+
}
|
|
651
650
|
|
|
652
651
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
653
652
|
static isalnum(c) {
|
|
@@ -949,7 +948,7 @@ class Utf16MatcherInput extends MatcherInputBase {
|
|
|
949
948
|
* @returns {number[]}
|
|
950
949
|
*/
|
|
951
950
|
asBytes() {
|
|
952
|
-
return this.charSequence.toString()
|
|
951
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
953
952
|
}
|
|
954
953
|
|
|
955
954
|
/**
|
|
@@ -974,7 +973,7 @@ class MatcherInput {
|
|
|
974
973
|
* @returns {Utf8MatcherInput}
|
|
975
974
|
*/
|
|
976
975
|
static utf8(input) {
|
|
977
|
-
if (
|
|
976
|
+
if (Utils.isByteArray(input)) {
|
|
978
977
|
return new Utf8MatcherInput(input);
|
|
979
978
|
}
|
|
980
979
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1106,10 +1105,10 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1106
1105
|
if (start < this.start) {
|
|
1107
1106
|
start = this.start;
|
|
1108
1107
|
}
|
|
1109
|
-
r1 = this.step(start) >> 3;
|
|
1108
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1110
1109
|
}
|
|
1111
1110
|
}
|
|
1112
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1111
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1113
1112
|
return Utils.emptyOpContext(r1, r2);
|
|
1114
1113
|
}
|
|
1115
1114
|
|
|
@@ -1191,14 +1190,17 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1191
1190
|
index(re2, pos) {
|
|
1192
1191
|
pos += this.start;
|
|
1193
1192
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1194
|
-
|
|
1193
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1194
|
+
return -1;
|
|
1195
|
+
}
|
|
1196
|
+
return i - pos;
|
|
1195
1197
|
}
|
|
1196
1198
|
|
|
1197
1199
|
// Returns a bitmask of EMPTY_* flags.
|
|
1198
1200
|
context(pos) {
|
|
1199
1201
|
pos += this.start;
|
|
1200
|
-
const r1 = pos >
|
|
1201
|
-
const r2 = pos < this.
|
|
1202
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1203
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1202
1204
|
return Utils.emptyOpContext(r1, r2);
|
|
1203
1205
|
}
|
|
1204
1206
|
prefixLength(re2) {
|
|
@@ -1327,6 +1329,11 @@ class RE2JSInternalException extends RE2JSException {
|
|
|
1327
1329
|
*
|
|
1328
1330
|
* @author rsc@google.com (Russ Cox)
|
|
1329
1331
|
*/
|
|
1332
|
+
|
|
1333
|
+
/**
|
|
1334
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1335
|
+
*/
|
|
1336
|
+
|
|
1330
1337
|
class Matcher {
|
|
1331
1338
|
/**
|
|
1332
1339
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1364,14 +1371,17 @@ class Matcher {
|
|
|
1364
1371
|
}
|
|
1365
1372
|
/**
|
|
1366
1373
|
*
|
|
1367
|
-
* @param {
|
|
1368
|
-
* @param {
|
|
1374
|
+
* @param {RE2JS_Pattern} pattern
|
|
1375
|
+
* @param {Uint8Array|number[]|string} input
|
|
1369
1376
|
*/
|
|
1370
1377
|
constructor(pattern, input) {
|
|
1371
1378
|
if (pattern === null) {
|
|
1372
1379
|
throw new Error('pattern is null');
|
|
1373
1380
|
}
|
|
1374
|
-
|
|
1381
|
+
/**
|
|
1382
|
+
* The pattern being matched.
|
|
1383
|
+
* @type {RE2JS_Pattern}
|
|
1384
|
+
*/
|
|
1375
1385
|
this.patternInput = pattern;
|
|
1376
1386
|
const re2 = this.patternInput.re2();
|
|
1377
1387
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1386,7 +1396,7 @@ class Matcher {
|
|
|
1386
1396
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1387
1397
|
if (input instanceof MatcherInputBase) {
|
|
1388
1398
|
this.resetMatcherInput(input);
|
|
1389
|
-
} else if (
|
|
1399
|
+
} else if (Utils.isByteArray(input)) {
|
|
1390
1400
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1391
1401
|
} else {
|
|
1392
1402
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1395,7 +1405,7 @@ class Matcher {
|
|
|
1395
1405
|
|
|
1396
1406
|
/**
|
|
1397
1407
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1398
|
-
* @returns {
|
|
1408
|
+
* @returns {RE2JS_Pattern}
|
|
1399
1409
|
*/
|
|
1400
1410
|
pattern() {
|
|
1401
1411
|
return this.patternInput;
|
|
@@ -1425,7 +1435,7 @@ class Matcher {
|
|
|
1425
1435
|
|
|
1426
1436
|
/**
|
|
1427
1437
|
* Resets the {@code Matcher} and changes the input.
|
|
1428
|
-
* @param {
|
|
1438
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
1429
1439
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1430
1440
|
*/
|
|
1431
1441
|
resetMatcherInput(input) {
|
|
@@ -1490,7 +1500,7 @@ class Matcher {
|
|
|
1490
1500
|
/**
|
|
1491
1501
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1492
1502
|
* @param {string|number} [group=0]
|
|
1493
|
-
* @returns {
|
|
1503
|
+
* @returns {string|null}
|
|
1494
1504
|
*/
|
|
1495
1505
|
group(group = 0) {
|
|
1496
1506
|
if (typeof group === 'string') {
|
|
@@ -1548,10 +1558,7 @@ class Matcher {
|
|
|
1548
1558
|
if (group === 0 || this.hasGroups) {
|
|
1549
1559
|
return;
|
|
1550
1560
|
}
|
|
1551
|
-
|
|
1552
|
-
if (end > this.matcherInputLength) {
|
|
1553
|
-
end = this.matcherInputLength;
|
|
1554
|
-
}
|
|
1561
|
+
const end = this.matcherInputLength;
|
|
1555
1562
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1556
1563
|
const ok = res[0];
|
|
1557
1564
|
if (!ok) {
|
|
@@ -1585,7 +1592,7 @@ class Matcher {
|
|
|
1585
1592
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1586
1593
|
* is a match, {@code find} sets the match state to describe it.
|
|
1587
1594
|
*
|
|
1588
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1595
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1589
1596
|
* @returns {boolean} if it finds a match
|
|
1590
1597
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1591
1598
|
*/
|
|
@@ -1747,7 +1754,10 @@ class Matcher {
|
|
|
1747
1754
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1748
1755
|
}
|
|
1749
1756
|
const groupName = replacement.substring(i + 1, j);
|
|
1750
|
-
|
|
1757
|
+
const groupVal = this.group(groupName);
|
|
1758
|
+
if (groupVal !== null) {
|
|
1759
|
+
res += groupVal;
|
|
1760
|
+
}
|
|
1751
1761
|
last = j + 1;
|
|
1752
1762
|
i = j;
|
|
1753
1763
|
continue;
|
|
@@ -1793,6 +1803,22 @@ class Matcher {
|
|
|
1793
1803
|
i++;
|
|
1794
1804
|
last = i + 1;
|
|
1795
1805
|
continue;
|
|
1806
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1807
|
+
if (last < i) {
|
|
1808
|
+
res += replacement.substring(last, i);
|
|
1809
|
+
}
|
|
1810
|
+
res += this.substring(0, this.start(0));
|
|
1811
|
+
i++;
|
|
1812
|
+
last = i + 1;
|
|
1813
|
+
continue;
|
|
1814
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1815
|
+
if (last < i) {
|
|
1816
|
+
res += replacement.substring(last, i);
|
|
1817
|
+
}
|
|
1818
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1819
|
+
i++;
|
|
1820
|
+
last = i + 1;
|
|
1821
|
+
continue;
|
|
1796
1822
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1797
1823
|
let n = c - Codepoint.CODES.get('0');
|
|
1798
1824
|
if (last < i) {
|
|
@@ -1835,7 +1861,10 @@ class Matcher {
|
|
|
1835
1861
|
}
|
|
1836
1862
|
const groupName = replacement.substring(i + 1, j);
|
|
1837
1863
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1838
|
-
|
|
1864
|
+
const groupVal = this.group(groupName);
|
|
1865
|
+
if (groupVal !== null) {
|
|
1866
|
+
res += groupVal;
|
|
1867
|
+
}
|
|
1839
1868
|
} else {
|
|
1840
1869
|
res += `$<${groupName}>`;
|
|
1841
1870
|
}
|
|
@@ -4337,13 +4366,6 @@ class Prog {
|
|
|
4337
4366
|
// start every program with a fail instruction, so we'll never want to point
|
|
4338
4367
|
// at its output link.
|
|
4339
4368
|
|
|
4340
|
-
next(l) {
|
|
4341
|
-
const i = this.inst[l >> 1];
|
|
4342
|
-
if ((l & 1) === 0) {
|
|
4343
|
-
return i.out;
|
|
4344
|
-
}
|
|
4345
|
-
return i.arg;
|
|
4346
|
-
}
|
|
4347
4369
|
patch(l, val) {
|
|
4348
4370
|
let head = l.head;
|
|
4349
4371
|
while (head !== 0) {
|
|
@@ -5673,6 +5695,7 @@ class Parser {
|
|
|
5673
5695
|
case Codepoint.CODES.get('6'):
|
|
5674
5696
|
case Codepoint.CODES.get('7'):
|
|
5675
5697
|
{
|
|
5698
|
+
// Single non-zero digit is a backreference; not supported
|
|
5676
5699
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5677
5700
|
break;
|
|
5678
5701
|
}
|
|
@@ -5680,6 +5703,7 @@ class Parser {
|
|
|
5680
5703
|
// eslint-disable-next-line no-fallthrough
|
|
5681
5704
|
case Codepoint.CODES.get('0'):
|
|
5682
5705
|
{
|
|
5706
|
+
// Consume up to three octal digits; already have one.
|
|
5683
5707
|
let r = c - Codepoint.CODES.get('0');
|
|
5684
5708
|
for (let i = 1; i < 3; i++) {
|
|
5685
5709
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7460,7 +7484,11 @@ class RE2 {
|
|
|
7460
7484
|
*/
|
|
7461
7485
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7462
7486
|
if (!(input instanceof MatcherInputBase)) {
|
|
7463
|
-
|
|
7487
|
+
if (Utils.isByteArray(input)) {
|
|
7488
|
+
input = MatcherInput.utf8(input);
|
|
7489
|
+
} else {
|
|
7490
|
+
input = MatcherInput.utf16(input);
|
|
7491
|
+
}
|
|
7464
7492
|
}
|
|
7465
7493
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7466
7494
|
}
|
|
@@ -7915,9 +7943,18 @@ class RE2 {
|
|
|
7915
7943
|
}
|
|
7916
7944
|
|
|
7917
7945
|
class RE2Set {
|
|
7946
|
+
/** @type {number} */
|
|
7918
7947
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7948
|
+
/** @type {number} */
|
|
7919
7949
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7950
|
+
/** @type {number} */
|
|
7920
7951
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7952
|
+
|
|
7953
|
+
/**
|
|
7954
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7955
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7956
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7957
|
+
*/
|
|
7921
7958
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7922
7959
|
this.anchor = anchor;
|
|
7923
7960
|
this.jsFlags = flags;
|
|
@@ -7934,6 +7971,14 @@ class RE2Set {
|
|
|
7934
7971
|
this.dfa = null;
|
|
7935
7972
|
this.dummyRe2 = null;
|
|
7936
7973
|
}
|
|
7974
|
+
|
|
7975
|
+
/**
|
|
7976
|
+
* Adds a new regular expression pattern to the set.
|
|
7977
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7978
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7979
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7980
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7981
|
+
*/
|
|
7937
7982
|
add(pattern) {
|
|
7938
7983
|
if (this.prog) {
|
|
7939
7984
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7952,6 +7997,12 @@ class RE2Set {
|
|
|
7952
7997
|
this.regexps.push(Simplify.simplify(re));
|
|
7953
7998
|
return this.regexps.length - 1;
|
|
7954
7999
|
}
|
|
8000
|
+
|
|
8001
|
+
/**
|
|
8002
|
+
* Compiles the added patterns into a single state machine.
|
|
8003
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8004
|
+
* @returns {void}
|
|
8005
|
+
*/
|
|
7955
8006
|
compile() {
|
|
7956
8007
|
if (this.prog) return;
|
|
7957
8008
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7964,9 +8015,15 @@ class RE2Set {
|
|
|
7964
8015
|
longest: false
|
|
7965
8016
|
};
|
|
7966
8017
|
}
|
|
8018
|
+
|
|
8019
|
+
/**
|
|
8020
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8021
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8022
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8023
|
+
*/
|
|
7967
8024
|
match(input) {
|
|
7968
8025
|
if (!this.prog) this.compile();
|
|
7969
|
-
const machineInput =
|
|
8026
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7970
8027
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7971
8028
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7972
8029
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -7989,13 +8046,19 @@ class RE2Set {
|
|
|
7989
8046
|
* Transform JS regex string to RE2 regex string
|
|
7990
8047
|
*/
|
|
7991
8048
|
class TranslateRegExpString {
|
|
7992
|
-
static isUpperCaseAlpha(ch) {
|
|
7993
|
-
return 'A' <= ch && ch <= 'Z';
|
|
7994
|
-
}
|
|
7995
8049
|
static isHexadecimal(ch) {
|
|
7996
8050
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
7997
8051
|
}
|
|
7998
8052
|
static translate(data) {
|
|
8053
|
+
let prefixFlags = '';
|
|
8054
|
+
if (data instanceof RegExp) {
|
|
8055
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8056
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8057
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8058
|
+
|
|
8059
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8060
|
+
data = data.source;
|
|
8061
|
+
}
|
|
7999
8062
|
if (typeof data !== 'string') {
|
|
8000
8063
|
return data;
|
|
8001
8064
|
}
|
|
@@ -8006,6 +8069,7 @@ class TranslateRegExpString {
|
|
|
8006
8069
|
result = '(?:)';
|
|
8007
8070
|
changed = true;
|
|
8008
8071
|
}
|
|
8072
|
+
let inCharClass = false;
|
|
8009
8073
|
let i = 0;
|
|
8010
8074
|
while (i < size) {
|
|
8011
8075
|
let ch = data[i];
|
|
@@ -8023,54 +8087,160 @@ class TranslateRegExpString {
|
|
|
8023
8087
|
{
|
|
8024
8088
|
if (i + 2 < size) {
|
|
8025
8089
|
let nextCh = data[i + 2];
|
|
8026
|
-
|
|
8090
|
+
let code = nextCh.charCodeAt(0);
|
|
8091
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8092
|
+
let val = code % 32;
|
|
8027
8093
|
result += '\\x';
|
|
8028
|
-
result += (
|
|
8029
|
-
result += (
|
|
8094
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8095
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8030
8096
|
i += 3;
|
|
8031
8097
|
changed = true;
|
|
8032
8098
|
continue;
|
|
8033
8099
|
}
|
|
8034
8100
|
}
|
|
8035
|
-
result += '
|
|
8101
|
+
result += 'c';
|
|
8036
8102
|
i += 2;
|
|
8103
|
+
changed = true;
|
|
8037
8104
|
continue;
|
|
8038
8105
|
}
|
|
8039
8106
|
case 'u':
|
|
8040
8107
|
{
|
|
8041
8108
|
if (i + 2 < size) {
|
|
8042
8109
|
let nextCh = data[i + 2];
|
|
8043
|
-
if (
|
|
8044
|
-
|
|
8045
|
-
i
|
|
8046
|
-
|
|
8047
|
-
|
|
8048
|
-
|
|
8110
|
+
if (nextCh === '{') {
|
|
8111
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8112
|
+
let j = i + 3;
|
|
8113
|
+
let hasHex = false;
|
|
8114
|
+
let closed = false;
|
|
8115
|
+
while (j < size) {
|
|
8116
|
+
const hexChar = data[j];
|
|
8117
|
+
if (hexChar === '}') {
|
|
8118
|
+
closed = true;
|
|
8119
|
+
break;
|
|
8120
|
+
}
|
|
8121
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8049
8122
|
break;
|
|
8050
8123
|
}
|
|
8051
|
-
|
|
8124
|
+
hasHex = true;
|
|
8125
|
+
j++;
|
|
8126
|
+
}
|
|
8127
|
+
if (closed && hasHex) {
|
|
8128
|
+
result += '\\x';
|
|
8129
|
+
i += 2;
|
|
8130
|
+
changed = true;
|
|
8131
|
+
continue;
|
|
8132
|
+
}
|
|
8133
|
+
} else if (i + 5 < size) {
|
|
8134
|
+
let isHex4 = true;
|
|
8135
|
+
for (let j = 0; j < 4; j++) {
|
|
8136
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8137
|
+
isHex4 = false;
|
|
8138
|
+
break;
|
|
8139
|
+
}
|
|
8140
|
+
}
|
|
8141
|
+
if (isHex4) {
|
|
8142
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8143
|
+
i += 6;
|
|
8144
|
+
changed = true;
|
|
8145
|
+
continue;
|
|
8052
8146
|
}
|
|
8053
|
-
result += '}';
|
|
8054
|
-
changed = true;
|
|
8055
|
-
continue;
|
|
8056
|
-
} else if (nextCh === '{') {
|
|
8057
|
-
result += '\\x';
|
|
8058
|
-
i += 2;
|
|
8059
|
-
changed = true;
|
|
8060
|
-
continue;
|
|
8061
8147
|
}
|
|
8062
8148
|
}
|
|
8063
|
-
|
|
8149
|
+
|
|
8150
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8151
|
+
result += 'u';
|
|
8152
|
+
i += 2;
|
|
8153
|
+
changed = true;
|
|
8154
|
+
continue;
|
|
8155
|
+
}
|
|
8156
|
+
case 'x':
|
|
8157
|
+
{
|
|
8158
|
+
let isValidHex = false;
|
|
8159
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8160
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8161
|
+
let j = i + 3;
|
|
8162
|
+
let hasHex = false;
|
|
8163
|
+
let closed = false;
|
|
8164
|
+
while (j < size) {
|
|
8165
|
+
const hexChar = data[j];
|
|
8166
|
+
if (hexChar === '}') {
|
|
8167
|
+
closed = true;
|
|
8168
|
+
break;
|
|
8169
|
+
}
|
|
8170
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8171
|
+
break;
|
|
8172
|
+
}
|
|
8173
|
+
hasHex = true;
|
|
8174
|
+
j++;
|
|
8175
|
+
}
|
|
8176
|
+
if (closed && hasHex) {
|
|
8177
|
+
isValidHex = true;
|
|
8178
|
+
}
|
|
8179
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8180
|
+
isValidHex = true;
|
|
8181
|
+
}
|
|
8182
|
+
if (isValidHex) {
|
|
8183
|
+
result += '\\x';
|
|
8184
|
+
i += 2;
|
|
8185
|
+
} else {
|
|
8186
|
+
result += 'x';
|
|
8187
|
+
i += 2;
|
|
8188
|
+
changed = true;
|
|
8189
|
+
}
|
|
8190
|
+
continue;
|
|
8191
|
+
}
|
|
8192
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8193
|
+
case 'n':
|
|
8194
|
+
case 'r':
|
|
8195
|
+
case 't':
|
|
8196
|
+
case 'a':
|
|
8197
|
+
case 'f':
|
|
8198
|
+
case 'v':
|
|
8199
|
+
case 'd':
|
|
8200
|
+
case 'D':
|
|
8201
|
+
case 's':
|
|
8202
|
+
case 'S':
|
|
8203
|
+
case 'w':
|
|
8204
|
+
case 'W':
|
|
8205
|
+
case 'b':
|
|
8206
|
+
case 'B':
|
|
8207
|
+
case 'p':
|
|
8208
|
+
case 'P':
|
|
8209
|
+
case 'A':
|
|
8210
|
+
case 'z':
|
|
8211
|
+
case 'Q':
|
|
8212
|
+
case 'E':
|
|
8213
|
+
case '0':
|
|
8214
|
+
case '1':
|
|
8215
|
+
case '2':
|
|
8216
|
+
case '3':
|
|
8217
|
+
case '4':
|
|
8218
|
+
case '5':
|
|
8219
|
+
case '6':
|
|
8220
|
+
case '7':
|
|
8221
|
+
{
|
|
8222
|
+
result += '\\' + ch;
|
|
8064
8223
|
i += 2;
|
|
8065
8224
|
continue;
|
|
8066
8225
|
}
|
|
8067
8226
|
default:
|
|
8068
8227
|
{
|
|
8069
|
-
result += '\\';
|
|
8070
8228
|
let cp = data.codePointAt(i + 1);
|
|
8071
|
-
let
|
|
8072
|
-
|
|
8073
|
-
|
|
8229
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8230
|
+
if (isAlphaNum) {
|
|
8231
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8232
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8233
|
+
let symSize = Utils.charCount(cp);
|
|
8234
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8235
|
+
i += symSize + 1;
|
|
8236
|
+
changed = true;
|
|
8237
|
+
} else {
|
|
8238
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8239
|
+
result += '\\';
|
|
8240
|
+
let symSize = Utils.charCount(cp);
|
|
8241
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8242
|
+
i += symSize + 1;
|
|
8243
|
+
}
|
|
8074
8244
|
continue;
|
|
8075
8245
|
}
|
|
8076
8246
|
}
|
|
@@ -8080,7 +8250,13 @@ class TranslateRegExpString {
|
|
|
8080
8250
|
i += 1;
|
|
8081
8251
|
changed = true;
|
|
8082
8252
|
continue;
|
|
8083
|
-
} else if (ch === '
|
|
8253
|
+
} else if (ch === '[') {
|
|
8254
|
+
// Track entry into a character class (protects syntax inside)
|
|
8255
|
+
inCharClass = true;
|
|
8256
|
+
} else if (ch === ']') {
|
|
8257
|
+
// Track exit of a character class
|
|
8258
|
+
inCharClass = false;
|
|
8259
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8084
8260
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8085
8261
|
result += '(?P<';
|
|
8086
8262
|
i += 3;
|
|
@@ -8093,7 +8269,13 @@ class TranslateRegExpString {
|
|
|
8093
8269
|
result += data.substring(i, i + symSize);
|
|
8094
8270
|
i += symSize;
|
|
8095
8271
|
}
|
|
8096
|
-
|
|
8272
|
+
const finalResult = changed ? result : data;
|
|
8273
|
+
|
|
8274
|
+
// Append any extracted inline flags
|
|
8275
|
+
if (prefixFlags.length > 0) {
|
|
8276
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8277
|
+
}
|
|
8278
|
+
return finalResult;
|
|
8097
8279
|
}
|
|
8098
8280
|
}
|
|
8099
8281
|
|
|
@@ -8171,7 +8353,7 @@ class RE2JS {
|
|
|
8171
8353
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8172
8354
|
* resulting regex is safe and properly formatted before compilation.
|
|
8173
8355
|
*
|
|
8174
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8356
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8175
8357
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8176
8358
|
*/
|
|
8177
8359
|
static translateRegExp(expr) {
|
|
@@ -8215,7 +8397,7 @@ class RE2JS {
|
|
|
8215
8397
|
* Matches a string against a regular expression.
|
|
8216
8398
|
*
|
|
8217
8399
|
* @param {string} regex the regular expression
|
|
8218
|
-
* @param {string|number[]} input the input
|
|
8400
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8219
8401
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8220
8402
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8221
8403
|
*/
|
|
@@ -8282,7 +8464,7 @@ class RE2JS {
|
|
|
8282
8464
|
/**
|
|
8283
8465
|
* Matches a string against a regular expression.
|
|
8284
8466
|
*
|
|
8285
|
-
* @param {string|number[]} input the input
|
|
8467
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8286
8468
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8287
8469
|
*/
|
|
8288
8470
|
matches(input) {
|
|
@@ -8292,11 +8474,11 @@ class RE2JS {
|
|
|
8292
8474
|
/**
|
|
8293
8475
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8294
8476
|
*
|
|
8295
|
-
* @param {string|number[]} input the input string
|
|
8477
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8296
8478
|
* @returns {Matcher}
|
|
8297
8479
|
*/
|
|
8298
8480
|
matcher(input) {
|
|
8299
|
-
if (
|
|
8481
|
+
if (Utils.isByteArray(input)) {
|
|
8300
8482
|
input = MatcherInput.utf8(input);
|
|
8301
8483
|
}
|
|
8302
8484
|
return new Matcher(this, input);
|
|
@@ -8308,11 +8490,11 @@ class RE2JS {
|
|
|
8308
8490
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8309
8491
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8310
8492
|
*
|
|
8311
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8493
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8312
8494
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8313
8495
|
*/
|
|
8314
8496
|
test(input) {
|
|
8315
|
-
if (
|
|
8497
|
+
if (Utils.isByteArray(input)) {
|
|
8316
8498
|
// Reuse the existing UTF-8 fast-path method
|
|
8317
8499
|
return this.re2Input.matchUTF8(input);
|
|
8318
8500
|
}
|
|
@@ -8327,11 +8509,11 @@ class RE2JS {
|
|
|
8327
8509
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8328
8510
|
* it securely routes execution through the DFA fast-path.
|
|
8329
8511
|
*
|
|
8330
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8512
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8331
8513
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8332
8514
|
*/
|
|
8333
8515
|
testExact(input) {
|
|
8334
|
-
const machineInput =
|
|
8516
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8335
8517
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8336
8518
|
}
|
|
8337
8519
|
|