re2js 2.2.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/build/index.cjs.cjs +256 -74
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +54 -79
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +256 -74
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +256 -74
- package/build/index.umd.js.map +1 -1
- package/package.json +4 -2
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -100,7 +100,7 @@
|
|
|
100
100
|
}
|
|
101
101
|
class Codepoint {
|
|
102
102
|
// codePointAt(0)
|
|
103
|
-
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
103
|
+
static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
|
|
104
104
|
|
|
105
105
|
// convert unicode codepoint to upper case codepoint
|
|
106
106
|
// return same codepoint, if cannot do it (or codepoint not have upper variation)
|
|
@@ -154,10 +154,6 @@
|
|
|
154
154
|
getStride(index) {
|
|
155
155
|
return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
|
|
156
156
|
}
|
|
157
|
-
get(index) {
|
|
158
|
-
const i = index * this.SIZE;
|
|
159
|
-
return [this.data[i], this.data[i + 1], this.getStride(index)];
|
|
160
|
-
}
|
|
161
157
|
get length() {
|
|
162
158
|
return this.data.length / this.SIZE;
|
|
163
159
|
}
|
|
@@ -654,6 +650,9 @@
|
|
|
654
650
|
static emptyInts() {
|
|
655
651
|
return [];
|
|
656
652
|
}
|
|
653
|
+
static isByteArray(input) {
|
|
654
|
+
return Array.isArray(input) || input instanceof Uint8Array;
|
|
655
|
+
}
|
|
657
656
|
|
|
658
657
|
// Returns true iff |c| is an ASCII letter or decimal digit.
|
|
659
658
|
static isalnum(c) {
|
|
@@ -955,7 +954,7 @@
|
|
|
955
954
|
* @returns {number[]}
|
|
956
955
|
*/
|
|
957
956
|
asBytes() {
|
|
958
|
-
return this.charSequence.toString()
|
|
957
|
+
return Utils.stringToUtf8ByteArray(this.charSequence.toString());
|
|
959
958
|
}
|
|
960
959
|
|
|
961
960
|
/**
|
|
@@ -980,7 +979,7 @@
|
|
|
980
979
|
* @returns {Utf8MatcherInput}
|
|
981
980
|
*/
|
|
982
981
|
static utf8(input) {
|
|
983
|
-
if (
|
|
982
|
+
if (Utils.isByteArray(input)) {
|
|
984
983
|
return new Utf8MatcherInput(input);
|
|
985
984
|
}
|
|
986
985
|
return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
|
|
@@ -1112,10 +1111,10 @@
|
|
|
1112
1111
|
if (start < this.start) {
|
|
1113
1112
|
start = this.start;
|
|
1114
1113
|
}
|
|
1115
|
-
r1 = this.step(start) >> 3;
|
|
1114
|
+
r1 = this.step(start - this.start) >> 3;
|
|
1116
1115
|
}
|
|
1117
1116
|
}
|
|
1118
|
-
const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
|
|
1117
|
+
const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
|
|
1119
1118
|
return Utils.emptyOpContext(r1, r2);
|
|
1120
1119
|
}
|
|
1121
1120
|
|
|
@@ -1197,14 +1196,17 @@
|
|
|
1197
1196
|
index(re2, pos) {
|
|
1198
1197
|
pos += this.start;
|
|
1199
1198
|
const i = this.charSequence.indexOf(re2.prefix, pos);
|
|
1200
|
-
|
|
1199
|
+
if (i < 0 || i > this.end - re2.prefix.length) {
|
|
1200
|
+
return -1;
|
|
1201
|
+
}
|
|
1202
|
+
return i - pos;
|
|
1201
1203
|
}
|
|
1202
1204
|
|
|
1203
1205
|
// Returns a bitmask of EMPTY_* flags.
|
|
1204
1206
|
context(pos) {
|
|
1205
1207
|
pos += this.start;
|
|
1206
|
-
const r1 = pos >
|
|
1207
|
-
const r2 = pos < this.
|
|
1208
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
|
|
1209
|
+
const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
|
|
1208
1210
|
return Utils.emptyOpContext(r1, r2);
|
|
1209
1211
|
}
|
|
1210
1212
|
prefixLength(re2) {
|
|
@@ -1333,6 +1335,11 @@
|
|
|
1333
1335
|
*
|
|
1334
1336
|
* @author rsc@google.com (Russ Cox)
|
|
1335
1337
|
*/
|
|
1338
|
+
|
|
1339
|
+
/**
|
|
1340
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1341
|
+
*/
|
|
1342
|
+
|
|
1336
1343
|
class Matcher {
|
|
1337
1344
|
/**
|
|
1338
1345
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1370,14 +1377,17 @@
|
|
|
1370
1377
|
}
|
|
1371
1378
|
/**
|
|
1372
1379
|
*
|
|
1373
|
-
* @param {
|
|
1374
|
-
* @param {
|
|
1380
|
+
* @param {RE2JS_Pattern} pattern
|
|
1381
|
+
* @param {Uint8Array|number[]|string} input
|
|
1375
1382
|
*/
|
|
1376
1383
|
constructor(pattern, input) {
|
|
1377
1384
|
if (pattern === null) {
|
|
1378
1385
|
throw new Error('pattern is null');
|
|
1379
1386
|
}
|
|
1380
|
-
|
|
1387
|
+
/**
|
|
1388
|
+
* The pattern being matched.
|
|
1389
|
+
* @type {RE2JS_Pattern}
|
|
1390
|
+
*/
|
|
1381
1391
|
this.patternInput = pattern;
|
|
1382
1392
|
const re2 = this.patternInput.re2();
|
|
1383
1393
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1392,7 +1402,7 @@
|
|
|
1392
1402
|
this.numberOfInstructions = re2.numberOfInstructions();
|
|
1393
1403
|
if (input instanceof MatcherInputBase) {
|
|
1394
1404
|
this.resetMatcherInput(input);
|
|
1395
|
-
} else if (
|
|
1405
|
+
} else if (Utils.isByteArray(input)) {
|
|
1396
1406
|
this.resetMatcherInput(MatcherInput.utf8(input));
|
|
1397
1407
|
} else {
|
|
1398
1408
|
this.resetMatcherInput(MatcherInput.utf16(input));
|
|
@@ -1401,7 +1411,7 @@
|
|
|
1401
1411
|
|
|
1402
1412
|
/**
|
|
1403
1413
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1404
|
-
* @returns {
|
|
1414
|
+
* @returns {RE2JS_Pattern}
|
|
1405
1415
|
*/
|
|
1406
1416
|
pattern() {
|
|
1407
1417
|
return this.patternInput;
|
|
@@ -1431,7 +1441,7 @@
|
|
|
1431
1441
|
|
|
1432
1442
|
/**
|
|
1433
1443
|
* Resets the {@code Matcher} and changes the input.
|
|
1434
|
-
* @param {
|
|
1444
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
1435
1445
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1436
1446
|
*/
|
|
1437
1447
|
resetMatcherInput(input) {
|
|
@@ -1496,7 +1506,7 @@
|
|
|
1496
1506
|
/**
|
|
1497
1507
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1498
1508
|
* @param {string|number} [group=0]
|
|
1499
|
-
* @returns {
|
|
1509
|
+
* @returns {string|null}
|
|
1500
1510
|
*/
|
|
1501
1511
|
group(group = 0) {
|
|
1502
1512
|
if (typeof group === 'string') {
|
|
@@ -1554,10 +1564,7 @@
|
|
|
1554
1564
|
if (group === 0 || this.hasGroups) {
|
|
1555
1565
|
return;
|
|
1556
1566
|
}
|
|
1557
|
-
|
|
1558
|
-
if (end > this.matcherInputLength) {
|
|
1559
|
-
end = this.matcherInputLength;
|
|
1560
|
-
}
|
|
1567
|
+
const end = this.matcherInputLength;
|
|
1561
1568
|
const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
|
|
1562
1569
|
const ok = res[0];
|
|
1563
1570
|
if (!ok) {
|
|
@@ -1591,7 +1598,7 @@
|
|
|
1591
1598
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1592
1599
|
* is a match, {@code find} sets the match state to describe it.
|
|
1593
1600
|
*
|
|
1594
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1601
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1595
1602
|
* @returns {boolean} if it finds a match
|
|
1596
1603
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1597
1604
|
*/
|
|
@@ -1753,7 +1760,10 @@
|
|
|
1753
1760
|
throw new RE2JSGroupException("named capture group is missing trailing '}'");
|
|
1754
1761
|
}
|
|
1755
1762
|
const groupName = replacement.substring(i + 1, j);
|
|
1756
|
-
|
|
1763
|
+
const groupVal = this.group(groupName);
|
|
1764
|
+
if (groupVal !== null) {
|
|
1765
|
+
res += groupVal;
|
|
1766
|
+
}
|
|
1757
1767
|
last = j + 1;
|
|
1758
1768
|
i = j;
|
|
1759
1769
|
continue;
|
|
@@ -1799,6 +1809,22 @@
|
|
|
1799
1809
|
i++;
|
|
1800
1810
|
last = i + 1;
|
|
1801
1811
|
continue;
|
|
1812
|
+
} else if (Codepoint.CODES.get('`') === c) {
|
|
1813
|
+
if (last < i) {
|
|
1814
|
+
res += replacement.substring(last, i);
|
|
1815
|
+
}
|
|
1816
|
+
res += this.substring(0, this.start(0));
|
|
1817
|
+
i++;
|
|
1818
|
+
last = i + 1;
|
|
1819
|
+
continue;
|
|
1820
|
+
} else if (Codepoint.CODES.get("'") === c) {
|
|
1821
|
+
if (last < i) {
|
|
1822
|
+
res += replacement.substring(last, i);
|
|
1823
|
+
}
|
|
1824
|
+
res += this.substring(this.end(0), this.matcherInputLength);
|
|
1825
|
+
i++;
|
|
1826
|
+
last = i + 1;
|
|
1827
|
+
continue;
|
|
1802
1828
|
} else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
|
|
1803
1829
|
let n = c - Codepoint.CODES.get('0');
|
|
1804
1830
|
if (last < i) {
|
|
@@ -1841,7 +1867,10 @@
|
|
|
1841
1867
|
}
|
|
1842
1868
|
const groupName = replacement.substring(i + 1, j);
|
|
1843
1869
|
if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
|
|
1844
|
-
|
|
1870
|
+
const groupVal = this.group(groupName);
|
|
1871
|
+
if (groupVal !== null) {
|
|
1872
|
+
res += groupVal;
|
|
1873
|
+
}
|
|
1845
1874
|
} else {
|
|
1846
1875
|
res += `$<${groupName}>`;
|
|
1847
1876
|
}
|
|
@@ -4343,13 +4372,6 @@
|
|
|
4343
4372
|
// start every program with a fail instruction, so we'll never want to point
|
|
4344
4373
|
// at its output link.
|
|
4345
4374
|
|
|
4346
|
-
next(l) {
|
|
4347
|
-
const i = this.inst[l >> 1];
|
|
4348
|
-
if ((l & 1) === 0) {
|
|
4349
|
-
return i.out;
|
|
4350
|
-
}
|
|
4351
|
-
return i.arg;
|
|
4352
|
-
}
|
|
4353
4375
|
patch(l, val) {
|
|
4354
4376
|
let head = l.head;
|
|
4355
4377
|
while (head !== 0) {
|
|
@@ -5679,6 +5701,7 @@
|
|
|
5679
5701
|
case Codepoint.CODES.get('6'):
|
|
5680
5702
|
case Codepoint.CODES.get('7'):
|
|
5681
5703
|
{
|
|
5704
|
+
// Single non-zero digit is a backreference; not supported
|
|
5682
5705
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
5683
5706
|
break;
|
|
5684
5707
|
}
|
|
@@ -5686,6 +5709,7 @@
|
|
|
5686
5709
|
// eslint-disable-next-line no-fallthrough
|
|
5687
5710
|
case Codepoint.CODES.get('0'):
|
|
5688
5711
|
{
|
|
5712
|
+
// Consume up to three octal digits; already have one.
|
|
5689
5713
|
let r = c - Codepoint.CODES.get('0');
|
|
5690
5714
|
for (let i = 1; i < 3; i++) {
|
|
5691
5715
|
if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
|
|
@@ -7466,7 +7490,11 @@
|
|
|
7466
7490
|
*/
|
|
7467
7491
|
matchWithGroup(input, start, end, anchor, ngroup) {
|
|
7468
7492
|
if (!(input instanceof MatcherInputBase)) {
|
|
7469
|
-
|
|
7493
|
+
if (Utils.isByteArray(input)) {
|
|
7494
|
+
input = MatcherInput.utf8(input);
|
|
7495
|
+
} else {
|
|
7496
|
+
input = MatcherInput.utf16(input);
|
|
7497
|
+
}
|
|
7470
7498
|
}
|
|
7471
7499
|
return this.matchMachineInput(input, start, end, anchor, ngroup);
|
|
7472
7500
|
}
|
|
@@ -7921,9 +7949,18 @@
|
|
|
7921
7949
|
}
|
|
7922
7950
|
|
|
7923
7951
|
class RE2Set {
|
|
7952
|
+
/** @type {number} */
|
|
7924
7953
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7954
|
+
/** @type {number} */
|
|
7925
7955
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7956
|
+
/** @type {number} */
|
|
7926
7957
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7958
|
+
|
|
7959
|
+
/**
|
|
7960
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7961
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7962
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7963
|
+
*/
|
|
7927
7964
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7928
7965
|
this.anchor = anchor;
|
|
7929
7966
|
this.jsFlags = flags;
|
|
@@ -7940,6 +7977,14 @@
|
|
|
7940
7977
|
this.dfa = null;
|
|
7941
7978
|
this.dummyRe2 = null;
|
|
7942
7979
|
}
|
|
7980
|
+
|
|
7981
|
+
/**
|
|
7982
|
+
* Adds a new regular expression pattern to the set.
|
|
7983
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7984
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7985
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7986
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7987
|
+
*/
|
|
7943
7988
|
add(pattern) {
|
|
7944
7989
|
if (this.prog) {
|
|
7945
7990
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7958,6 +8003,12 @@
|
|
|
7958
8003
|
this.regexps.push(Simplify.simplify(re));
|
|
7959
8004
|
return this.regexps.length - 1;
|
|
7960
8005
|
}
|
|
8006
|
+
|
|
8007
|
+
/**
|
|
8008
|
+
* Compiles the added patterns into a single state machine.
|
|
8009
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8010
|
+
* @returns {void}
|
|
8011
|
+
*/
|
|
7961
8012
|
compile() {
|
|
7962
8013
|
if (this.prog) return;
|
|
7963
8014
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7970,9 +8021,15 @@
|
|
|
7970
8021
|
longest: false
|
|
7971
8022
|
};
|
|
7972
8023
|
}
|
|
8024
|
+
|
|
8025
|
+
/**
|
|
8026
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8027
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8028
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8029
|
+
*/
|
|
7973
8030
|
match(input) {
|
|
7974
8031
|
if (!this.prog) this.compile();
|
|
7975
|
-
const machineInput =
|
|
8032
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
7976
8033
|
let internalAnchor = RE2Flags.UNANCHORED;
|
|
7977
8034
|
if (this.anchor === RE2Set.ANCHOR_START) {
|
|
7978
8035
|
internalAnchor = RE2Flags.ANCHOR_START;
|
|
@@ -7995,13 +8052,19 @@
|
|
|
7995
8052
|
* Transform JS regex string to RE2 regex string
|
|
7996
8053
|
*/
|
|
7997
8054
|
class TranslateRegExpString {
|
|
7998
|
-
static isUpperCaseAlpha(ch) {
|
|
7999
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8000
|
-
}
|
|
8001
8055
|
static isHexadecimal(ch) {
|
|
8002
8056
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8003
8057
|
}
|
|
8004
8058
|
static translate(data) {
|
|
8059
|
+
let prefixFlags = '';
|
|
8060
|
+
if (data instanceof RegExp) {
|
|
8061
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8062
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8063
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8064
|
+
|
|
8065
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8066
|
+
data = data.source;
|
|
8067
|
+
}
|
|
8005
8068
|
if (typeof data !== 'string') {
|
|
8006
8069
|
return data;
|
|
8007
8070
|
}
|
|
@@ -8012,6 +8075,7 @@
|
|
|
8012
8075
|
result = '(?:)';
|
|
8013
8076
|
changed = true;
|
|
8014
8077
|
}
|
|
8078
|
+
let inCharClass = false;
|
|
8015
8079
|
let i = 0;
|
|
8016
8080
|
while (i < size) {
|
|
8017
8081
|
let ch = data[i];
|
|
@@ -8029,54 +8093,160 @@
|
|
|
8029
8093
|
{
|
|
8030
8094
|
if (i + 2 < size) {
|
|
8031
8095
|
let nextCh = data[i + 2];
|
|
8032
|
-
|
|
8096
|
+
let code = nextCh.charCodeAt(0);
|
|
8097
|
+
if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
|
|
8098
|
+
let val = code % 32;
|
|
8033
8099
|
result += '\\x';
|
|
8034
|
-
result += (
|
|
8035
|
-
result += (
|
|
8100
|
+
result += (val >> 4).toString(16).toUpperCase();
|
|
8101
|
+
result += (val & 15).toString(16).toUpperCase();
|
|
8036
8102
|
i += 3;
|
|
8037
8103
|
changed = true;
|
|
8038
8104
|
continue;
|
|
8039
8105
|
}
|
|
8040
8106
|
}
|
|
8041
|
-
result += '
|
|
8107
|
+
result += 'c';
|
|
8042
8108
|
i += 2;
|
|
8109
|
+
changed = true;
|
|
8043
8110
|
continue;
|
|
8044
8111
|
}
|
|
8045
8112
|
case 'u':
|
|
8046
8113
|
{
|
|
8047
8114
|
if (i + 2 < size) {
|
|
8048
8115
|
let nextCh = data[i + 2];
|
|
8049
|
-
if (
|
|
8050
|
-
|
|
8051
|
-
i
|
|
8052
|
-
|
|
8053
|
-
|
|
8054
|
-
|
|
8116
|
+
if (nextCh === '{') {
|
|
8117
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8118
|
+
let j = i + 3;
|
|
8119
|
+
let hasHex = false;
|
|
8120
|
+
let closed = false;
|
|
8121
|
+
while (j < size) {
|
|
8122
|
+
const hexChar = data[j];
|
|
8123
|
+
if (hexChar === '}') {
|
|
8124
|
+
closed = true;
|
|
8125
|
+
break;
|
|
8126
|
+
}
|
|
8127
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8055
8128
|
break;
|
|
8056
8129
|
}
|
|
8057
|
-
|
|
8130
|
+
hasHex = true;
|
|
8131
|
+
j++;
|
|
8132
|
+
}
|
|
8133
|
+
if (closed && hasHex) {
|
|
8134
|
+
result += '\\x';
|
|
8135
|
+
i += 2;
|
|
8136
|
+
changed = true;
|
|
8137
|
+
continue;
|
|
8138
|
+
}
|
|
8139
|
+
} else if (i + 5 < size) {
|
|
8140
|
+
let isHex4 = true;
|
|
8141
|
+
for (let j = 0; j < 4; j++) {
|
|
8142
|
+
if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
|
|
8143
|
+
isHex4 = false;
|
|
8144
|
+
break;
|
|
8145
|
+
}
|
|
8146
|
+
}
|
|
8147
|
+
if (isHex4) {
|
|
8148
|
+
result += '\\x{' + data.substring(i + 2, i + 6) + '}';
|
|
8149
|
+
i += 6;
|
|
8150
|
+
changed = true;
|
|
8151
|
+
continue;
|
|
8058
8152
|
}
|
|
8059
|
-
result += '}';
|
|
8060
|
-
changed = true;
|
|
8061
|
-
continue;
|
|
8062
|
-
} else if (nextCh === '{') {
|
|
8063
|
-
result += '\\x';
|
|
8064
|
-
i += 2;
|
|
8065
|
-
changed = true;
|
|
8066
|
-
continue;
|
|
8067
8153
|
}
|
|
8068
8154
|
}
|
|
8069
|
-
|
|
8155
|
+
|
|
8156
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8157
|
+
result += 'u';
|
|
8158
|
+
i += 2;
|
|
8159
|
+
changed = true;
|
|
8160
|
+
continue;
|
|
8161
|
+
}
|
|
8162
|
+
case 'x':
|
|
8163
|
+
{
|
|
8164
|
+
let isValidHex = false;
|
|
8165
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8166
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8167
|
+
let j = i + 3;
|
|
8168
|
+
let hasHex = false;
|
|
8169
|
+
let closed = false;
|
|
8170
|
+
while (j < size) {
|
|
8171
|
+
const hexChar = data[j];
|
|
8172
|
+
if (hexChar === '}') {
|
|
8173
|
+
closed = true;
|
|
8174
|
+
break;
|
|
8175
|
+
}
|
|
8176
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8177
|
+
break;
|
|
8178
|
+
}
|
|
8179
|
+
hasHex = true;
|
|
8180
|
+
j++;
|
|
8181
|
+
}
|
|
8182
|
+
if (closed && hasHex) {
|
|
8183
|
+
isValidHex = true;
|
|
8184
|
+
}
|
|
8185
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8186
|
+
isValidHex = true;
|
|
8187
|
+
}
|
|
8188
|
+
if (isValidHex) {
|
|
8189
|
+
result += '\\x';
|
|
8190
|
+
i += 2;
|
|
8191
|
+
} else {
|
|
8192
|
+
result += 'x';
|
|
8193
|
+
i += 2;
|
|
8194
|
+
changed = true;
|
|
8195
|
+
}
|
|
8196
|
+
continue;
|
|
8197
|
+
}
|
|
8198
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8199
|
+
case 'n':
|
|
8200
|
+
case 'r':
|
|
8201
|
+
case 't':
|
|
8202
|
+
case 'a':
|
|
8203
|
+
case 'f':
|
|
8204
|
+
case 'v':
|
|
8205
|
+
case 'd':
|
|
8206
|
+
case 'D':
|
|
8207
|
+
case 's':
|
|
8208
|
+
case 'S':
|
|
8209
|
+
case 'w':
|
|
8210
|
+
case 'W':
|
|
8211
|
+
case 'b':
|
|
8212
|
+
case 'B':
|
|
8213
|
+
case 'p':
|
|
8214
|
+
case 'P':
|
|
8215
|
+
case 'A':
|
|
8216
|
+
case 'z':
|
|
8217
|
+
case 'Q':
|
|
8218
|
+
case 'E':
|
|
8219
|
+
case '0':
|
|
8220
|
+
case '1':
|
|
8221
|
+
case '2':
|
|
8222
|
+
case '3':
|
|
8223
|
+
case '4':
|
|
8224
|
+
case '5':
|
|
8225
|
+
case '6':
|
|
8226
|
+
case '7':
|
|
8227
|
+
{
|
|
8228
|
+
result += '\\' + ch;
|
|
8070
8229
|
i += 2;
|
|
8071
8230
|
continue;
|
|
8072
8231
|
}
|
|
8073
8232
|
default:
|
|
8074
8233
|
{
|
|
8075
|
-
result += '\\';
|
|
8076
8234
|
let cp = data.codePointAt(i + 1);
|
|
8077
|
-
let
|
|
8078
|
-
|
|
8079
|
-
|
|
8235
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8236
|
+
if (isAlphaNum) {
|
|
8237
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8238
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8239
|
+
let symSize = Utils.charCount(cp);
|
|
8240
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8241
|
+
i += symSize + 1;
|
|
8242
|
+
changed = true;
|
|
8243
|
+
} else {
|
|
8244
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8245
|
+
result += '\\';
|
|
8246
|
+
let symSize = Utils.charCount(cp);
|
|
8247
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8248
|
+
i += symSize + 1;
|
|
8249
|
+
}
|
|
8080
8250
|
continue;
|
|
8081
8251
|
}
|
|
8082
8252
|
}
|
|
@@ -8086,7 +8256,13 @@
|
|
|
8086
8256
|
i += 1;
|
|
8087
8257
|
changed = true;
|
|
8088
8258
|
continue;
|
|
8089
|
-
} else if (ch === '
|
|
8259
|
+
} else if (ch === '[') {
|
|
8260
|
+
// Track entry into a character class (protects syntax inside)
|
|
8261
|
+
inCharClass = true;
|
|
8262
|
+
} else if (ch === ']') {
|
|
8263
|
+
// Track exit of a character class
|
|
8264
|
+
inCharClass = false;
|
|
8265
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8090
8266
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8091
8267
|
result += '(?P<';
|
|
8092
8268
|
i += 3;
|
|
@@ -8099,7 +8275,13 @@
|
|
|
8099
8275
|
result += data.substring(i, i + symSize);
|
|
8100
8276
|
i += symSize;
|
|
8101
8277
|
}
|
|
8102
|
-
|
|
8278
|
+
const finalResult = changed ? result : data;
|
|
8279
|
+
|
|
8280
|
+
// Append any extracted inline flags
|
|
8281
|
+
if (prefixFlags.length > 0) {
|
|
8282
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8283
|
+
}
|
|
8284
|
+
return finalResult;
|
|
8103
8285
|
}
|
|
8104
8286
|
}
|
|
8105
8287
|
|
|
@@ -8177,7 +8359,7 @@
|
|
|
8177
8359
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8178
8360
|
* resulting regex is safe and properly formatted before compilation.
|
|
8179
8361
|
*
|
|
8180
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8362
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8181
8363
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8182
8364
|
*/
|
|
8183
8365
|
static translateRegExp(expr) {
|
|
@@ -8221,7 +8403,7 @@
|
|
|
8221
8403
|
* Matches a string against a regular expression.
|
|
8222
8404
|
*
|
|
8223
8405
|
* @param {string} regex the regular expression
|
|
8224
|
-
* @param {string|number[]} input the input
|
|
8406
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8225
8407
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8226
8408
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8227
8409
|
*/
|
|
@@ -8288,7 +8470,7 @@
|
|
|
8288
8470
|
/**
|
|
8289
8471
|
* Matches a string against a regular expression.
|
|
8290
8472
|
*
|
|
8291
|
-
* @param {string|number[]} input the input
|
|
8473
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8292
8474
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8293
8475
|
*/
|
|
8294
8476
|
matches(input) {
|
|
@@ -8298,11 +8480,11 @@
|
|
|
8298
8480
|
/**
|
|
8299
8481
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8300
8482
|
*
|
|
8301
|
-
* @param {string|number[]} input the input string
|
|
8483
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8302
8484
|
* @returns {Matcher}
|
|
8303
8485
|
*/
|
|
8304
8486
|
matcher(input) {
|
|
8305
|
-
if (
|
|
8487
|
+
if (Utils.isByteArray(input)) {
|
|
8306
8488
|
input = MatcherInput.utf8(input);
|
|
8307
8489
|
}
|
|
8308
8490
|
return new Matcher(this, input);
|
|
@@ -8314,11 +8496,11 @@
|
|
|
8314
8496
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8315
8497
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8316
8498
|
*
|
|
8317
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8499
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8318
8500
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8319
8501
|
*/
|
|
8320
8502
|
test(input) {
|
|
8321
|
-
if (
|
|
8503
|
+
if (Utils.isByteArray(input)) {
|
|
8322
8504
|
// Reuse the existing UTF-8 fast-path method
|
|
8323
8505
|
return this.re2Input.matchUTF8(input);
|
|
8324
8506
|
}
|
|
@@ -8333,11 +8515,11 @@
|
|
|
8333
8515
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8334
8516
|
* it securely routes execution through the DFA fast-path.
|
|
8335
8517
|
*
|
|
8336
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8518
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8337
8519
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8338
8520
|
*/
|
|
8339
8521
|
testExact(input) {
|
|
8340
|
-
const machineInput =
|
|
8522
|
+
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
8341
8523
|
return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
|
|
8342
8524
|
}
|
|
8343
8525
|
|