re2js 2.2.3 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.cjs.cjs +184 -27
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +54 -78
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +184 -27
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +184 -27
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1335,6 +1335,11 @@
|
|
|
1335
1335
|
*
|
|
1336
1336
|
* @author rsc@google.com (Russ Cox)
|
|
1337
1337
|
*/
|
|
1338
|
+
|
|
1339
|
+
/**
|
|
1340
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1341
|
+
*/
|
|
1342
|
+
|
|
1338
1343
|
class Matcher {
|
|
1339
1344
|
/**
|
|
1340
1345
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1372,14 +1377,17 @@
|
|
|
1372
1377
|
}
|
|
1373
1378
|
/**
|
|
1374
1379
|
*
|
|
1375
|
-
* @param {
|
|
1376
|
-
* @param {
|
|
1380
|
+
* @param {RE2JS_Pattern} pattern
|
|
1381
|
+
* @param {Uint8Array|number[]|string} input
|
|
1377
1382
|
*/
|
|
1378
1383
|
constructor(pattern, input) {
|
|
1379
1384
|
if (pattern === null) {
|
|
1380
1385
|
throw new Error('pattern is null');
|
|
1381
1386
|
}
|
|
1382
|
-
|
|
1387
|
+
/**
|
|
1388
|
+
* The pattern being matched.
|
|
1389
|
+
* @type {RE2JS_Pattern}
|
|
1390
|
+
*/
|
|
1383
1391
|
this.patternInput = pattern;
|
|
1384
1392
|
const re2 = this.patternInput.re2();
|
|
1385
1393
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1403,7 +1411,7 @@
|
|
|
1403
1411
|
|
|
1404
1412
|
/**
|
|
1405
1413
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1406
|
-
* @returns {
|
|
1414
|
+
* @returns {RE2JS_Pattern}
|
|
1407
1415
|
*/
|
|
1408
1416
|
pattern() {
|
|
1409
1417
|
return this.patternInput;
|
|
@@ -1433,7 +1441,7 @@
|
|
|
1433
1441
|
|
|
1434
1442
|
/**
|
|
1435
1443
|
* Resets the {@code Matcher} and changes the input.
|
|
1436
|
-
* @param {
|
|
1444
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
1437
1445
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1438
1446
|
*/
|
|
1439
1447
|
resetMatcherInput(input) {
|
|
@@ -1498,7 +1506,7 @@
|
|
|
1498
1506
|
/**
|
|
1499
1507
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1500
1508
|
* @param {string|number} [group=0]
|
|
1501
|
-
* @returns {
|
|
1509
|
+
* @returns {string|null}
|
|
1502
1510
|
*/
|
|
1503
1511
|
group(group = 0) {
|
|
1504
1512
|
if (typeof group === 'string') {
|
|
@@ -1590,7 +1598,7 @@
|
|
|
1590
1598
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1591
1599
|
* is a match, {@code find} sets the match state to describe it.
|
|
1592
1600
|
*
|
|
1593
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1601
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1594
1602
|
* @returns {boolean} if it finds a match
|
|
1595
1603
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1596
1604
|
*/
|
|
@@ -7941,9 +7949,18 @@
|
|
|
7941
7949
|
}
|
|
7942
7950
|
|
|
7943
7951
|
class RE2Set {
|
|
7952
|
+
/** @type {number} */
|
|
7944
7953
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7954
|
+
/** @type {number} */
|
|
7945
7955
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7956
|
+
/** @type {number} */
|
|
7946
7957
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7958
|
+
|
|
7959
|
+
/**
|
|
7960
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7961
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7962
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7963
|
+
*/
|
|
7947
7964
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7948
7965
|
this.anchor = anchor;
|
|
7949
7966
|
this.jsFlags = flags;
|
|
@@ -7960,6 +7977,14 @@
|
|
|
7960
7977
|
this.dfa = null;
|
|
7961
7978
|
this.dummyRe2 = null;
|
|
7962
7979
|
}
|
|
7980
|
+
|
|
7981
|
+
/**
|
|
7982
|
+
* Adds a new regular expression pattern to the set.
|
|
7983
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7984
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7985
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7986
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7987
|
+
*/
|
|
7963
7988
|
add(pattern) {
|
|
7964
7989
|
if (this.prog) {
|
|
7965
7990
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7978,6 +8003,12 @@
|
|
|
7978
8003
|
this.regexps.push(Simplify.simplify(re));
|
|
7979
8004
|
return this.regexps.length - 1;
|
|
7980
8005
|
}
|
|
8006
|
+
|
|
8007
|
+
/**
|
|
8008
|
+
* Compiles the added patterns into a single state machine.
|
|
8009
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8010
|
+
* @returns {void}
|
|
8011
|
+
*/
|
|
7981
8012
|
compile() {
|
|
7982
8013
|
if (this.prog) return;
|
|
7983
8014
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7990,6 +8021,12 @@
|
|
|
7990
8021
|
longest: false
|
|
7991
8022
|
};
|
|
7992
8023
|
}
|
|
8024
|
+
|
|
8025
|
+
/**
|
|
8026
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8027
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8028
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8029
|
+
*/
|
|
7993
8030
|
match(input) {
|
|
7994
8031
|
if (!this.prog) this.compile();
|
|
7995
8032
|
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
@@ -8015,13 +8052,19 @@
|
|
|
8015
8052
|
* Transform JS regex string to RE2 regex string
|
|
8016
8053
|
*/
|
|
8017
8054
|
class TranslateRegExpString {
|
|
8018
|
-
static isUpperCaseAlpha(ch) {
|
|
8019
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8020
|
-
}
|
|
8021
8055
|
static isHexadecimal(ch) {
|
|
8022
8056
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8023
8057
|
}
|
|
8024
8058
|
static translate(data) {
|
|
8059
|
+
let prefixFlags = '';
|
|
8060
|
+
if (data instanceof RegExp) {
|
|
8061
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8062
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8063
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8064
|
+
|
|
8065
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8066
|
+
data = data.source;
|
|
8067
|
+
}
|
|
8025
8068
|
if (typeof data !== 'string') {
|
|
8026
8069
|
return data;
|
|
8027
8070
|
}
|
|
@@ -8032,6 +8075,7 @@
|
|
|
8032
8075
|
result = '(?:)';
|
|
8033
8076
|
changed = true;
|
|
8034
8077
|
}
|
|
8078
|
+
let inCharClass = false;
|
|
8035
8079
|
let i = 0;
|
|
8036
8080
|
while (i < size) {
|
|
8037
8081
|
let ch = data[i];
|
|
@@ -8070,10 +8114,28 @@
|
|
|
8070
8114
|
if (i + 2 < size) {
|
|
8071
8115
|
let nextCh = data[i + 2];
|
|
8072
8116
|
if (nextCh === '{') {
|
|
8073
|
-
|
|
8074
|
-
i
|
|
8075
|
-
|
|
8076
|
-
|
|
8117
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8118
|
+
let j = i + 3;
|
|
8119
|
+
let hasHex = false;
|
|
8120
|
+
let closed = false;
|
|
8121
|
+
while (j < size) {
|
|
8122
|
+
const hexChar = data[j];
|
|
8123
|
+
if (hexChar === '}') {
|
|
8124
|
+
closed = true;
|
|
8125
|
+
break;
|
|
8126
|
+
}
|
|
8127
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8128
|
+
break;
|
|
8129
|
+
}
|
|
8130
|
+
hasHex = true;
|
|
8131
|
+
j++;
|
|
8132
|
+
}
|
|
8133
|
+
if (closed && hasHex) {
|
|
8134
|
+
result += '\\x';
|
|
8135
|
+
i += 2;
|
|
8136
|
+
changed = true;
|
|
8137
|
+
continue;
|
|
8138
|
+
}
|
|
8077
8139
|
} else if (i + 5 < size) {
|
|
8078
8140
|
let isHex4 = true;
|
|
8079
8141
|
for (let j = 0; j < 4; j++) {
|
|
@@ -8090,18 +8152,101 @@
|
|
|
8090
8152
|
}
|
|
8091
8153
|
}
|
|
8092
8154
|
}
|
|
8155
|
+
|
|
8156
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8093
8157
|
result += 'u';
|
|
8094
8158
|
i += 2;
|
|
8095
8159
|
changed = true;
|
|
8096
8160
|
continue;
|
|
8097
8161
|
}
|
|
8162
|
+
case 'x':
|
|
8163
|
+
{
|
|
8164
|
+
let isValidHex = false;
|
|
8165
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8166
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8167
|
+
let j = i + 3;
|
|
8168
|
+
let hasHex = false;
|
|
8169
|
+
let closed = false;
|
|
8170
|
+
while (j < size) {
|
|
8171
|
+
const hexChar = data[j];
|
|
8172
|
+
if (hexChar === '}') {
|
|
8173
|
+
closed = true;
|
|
8174
|
+
break;
|
|
8175
|
+
}
|
|
8176
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8177
|
+
break;
|
|
8178
|
+
}
|
|
8179
|
+
hasHex = true;
|
|
8180
|
+
j++;
|
|
8181
|
+
}
|
|
8182
|
+
if (closed && hasHex) {
|
|
8183
|
+
isValidHex = true;
|
|
8184
|
+
}
|
|
8185
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8186
|
+
isValidHex = true;
|
|
8187
|
+
}
|
|
8188
|
+
if (isValidHex) {
|
|
8189
|
+
result += '\\x';
|
|
8190
|
+
i += 2;
|
|
8191
|
+
} else {
|
|
8192
|
+
result += 'x';
|
|
8193
|
+
i += 2;
|
|
8194
|
+
changed = true;
|
|
8195
|
+
}
|
|
8196
|
+
continue;
|
|
8197
|
+
}
|
|
8198
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8199
|
+
case 'n':
|
|
8200
|
+
case 'r':
|
|
8201
|
+
case 't':
|
|
8202
|
+
case 'a':
|
|
8203
|
+
case 'f':
|
|
8204
|
+
case 'v':
|
|
8205
|
+
case 'd':
|
|
8206
|
+
case 'D':
|
|
8207
|
+
case 's':
|
|
8208
|
+
case 'S':
|
|
8209
|
+
case 'w':
|
|
8210
|
+
case 'W':
|
|
8211
|
+
case 'b':
|
|
8212
|
+
case 'B':
|
|
8213
|
+
case 'p':
|
|
8214
|
+
case 'P':
|
|
8215
|
+
case 'A':
|
|
8216
|
+
case 'z':
|
|
8217
|
+
case 'Q':
|
|
8218
|
+
case 'E':
|
|
8219
|
+
case '0':
|
|
8220
|
+
case '1':
|
|
8221
|
+
case '2':
|
|
8222
|
+
case '3':
|
|
8223
|
+
case '4':
|
|
8224
|
+
case '5':
|
|
8225
|
+
case '6':
|
|
8226
|
+
case '7':
|
|
8227
|
+
{
|
|
8228
|
+
result += '\\' + ch;
|
|
8229
|
+
i += 2;
|
|
8230
|
+
continue;
|
|
8231
|
+
}
|
|
8098
8232
|
default:
|
|
8099
8233
|
{
|
|
8100
|
-
result += '\\';
|
|
8101
8234
|
let cp = data.codePointAt(i + 1);
|
|
8102
|
-
let
|
|
8103
|
-
|
|
8104
|
-
|
|
8235
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8236
|
+
if (isAlphaNum) {
|
|
8237
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8238
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8239
|
+
let symSize = Utils.charCount(cp);
|
|
8240
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8241
|
+
i += symSize + 1;
|
|
8242
|
+
changed = true;
|
|
8243
|
+
} else {
|
|
8244
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8245
|
+
result += '\\';
|
|
8246
|
+
let symSize = Utils.charCount(cp);
|
|
8247
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8248
|
+
i += symSize + 1;
|
|
8249
|
+
}
|
|
8105
8250
|
continue;
|
|
8106
8251
|
}
|
|
8107
8252
|
}
|
|
@@ -8111,7 +8256,13 @@
|
|
|
8111
8256
|
i += 1;
|
|
8112
8257
|
changed = true;
|
|
8113
8258
|
continue;
|
|
8114
|
-
} else if (ch === '
|
|
8259
|
+
} else if (ch === '[') {
|
|
8260
|
+
// Track entry into a character class (protects syntax inside)
|
|
8261
|
+
inCharClass = true;
|
|
8262
|
+
} else if (ch === ']') {
|
|
8263
|
+
// Track exit of a character class
|
|
8264
|
+
inCharClass = false;
|
|
8265
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8115
8266
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8116
8267
|
result += '(?P<';
|
|
8117
8268
|
i += 3;
|
|
@@ -8124,7 +8275,13 @@
|
|
|
8124
8275
|
result += data.substring(i, i + symSize);
|
|
8125
8276
|
i += symSize;
|
|
8126
8277
|
}
|
|
8127
|
-
|
|
8278
|
+
const finalResult = changed ? result : data;
|
|
8279
|
+
|
|
8280
|
+
// Append any extracted inline flags
|
|
8281
|
+
if (prefixFlags.length > 0) {
|
|
8282
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8283
|
+
}
|
|
8284
|
+
return finalResult;
|
|
8128
8285
|
}
|
|
8129
8286
|
}
|
|
8130
8287
|
|
|
@@ -8202,7 +8359,7 @@
|
|
|
8202
8359
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8203
8360
|
* resulting regex is safe and properly formatted before compilation.
|
|
8204
8361
|
*
|
|
8205
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8362
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8206
8363
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8207
8364
|
*/
|
|
8208
8365
|
static translateRegExp(expr) {
|
|
@@ -8246,7 +8403,7 @@
|
|
|
8246
8403
|
* Matches a string against a regular expression.
|
|
8247
8404
|
*
|
|
8248
8405
|
* @param {string} regex the regular expression
|
|
8249
|
-
* @param {string|number[]} input the input
|
|
8406
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8250
8407
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8251
8408
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8252
8409
|
*/
|
|
@@ -8313,7 +8470,7 @@
|
|
|
8313
8470
|
/**
|
|
8314
8471
|
* Matches a string against a regular expression.
|
|
8315
8472
|
*
|
|
8316
|
-
* @param {string|number[]} input the input
|
|
8473
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8317
8474
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8318
8475
|
*/
|
|
8319
8476
|
matches(input) {
|
|
@@ -8323,7 +8480,7 @@
|
|
|
8323
8480
|
/**
|
|
8324
8481
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8325
8482
|
*
|
|
8326
|
-
* @param {string|number[]} input the input string
|
|
8483
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8327
8484
|
* @returns {Matcher}
|
|
8328
8485
|
*/
|
|
8329
8486
|
matcher(input) {
|
|
@@ -8339,7 +8496,7 @@
|
|
|
8339
8496
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8340
8497
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8341
8498
|
*
|
|
8342
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8499
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8343
8500
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8344
8501
|
*/
|
|
8345
8502
|
test(input) {
|
|
@@ -8358,7 +8515,7 @@
|
|
|
8358
8515
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8359
8516
|
* it securely routes execution through the DFA fast-path.
|
|
8360
8517
|
*
|
|
8361
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8518
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8362
8519
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8363
8520
|
*/
|
|
8364
8521
|
testExact(input) {
|