re2js 2.2.3 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/build/index.cjs.cjs +178 -25
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +56 -67
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +178 -25
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +178 -25
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.1
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1335,6 +1335,7 @@
|
|
|
1335
1335
|
*
|
|
1336
1336
|
* @author rsc@google.com (Russ Cox)
|
|
1337
1337
|
*/
|
|
1338
|
+
|
|
1338
1339
|
class Matcher {
|
|
1339
1340
|
/**
|
|
1340
1341
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1373,13 +1374,16 @@
|
|
|
1373
1374
|
/**
|
|
1374
1375
|
*
|
|
1375
1376
|
* @param {RE2JS} pattern
|
|
1376
|
-
* @param {
|
|
1377
|
+
* @param {string|number[]|Uint8Array} input
|
|
1377
1378
|
*/
|
|
1378
1379
|
constructor(pattern, input) {
|
|
1379
1380
|
if (pattern === null) {
|
|
1380
1381
|
throw new Error('pattern is null');
|
|
1381
1382
|
}
|
|
1382
|
-
|
|
1383
|
+
/**
|
|
1384
|
+
* The pattern being matched.
|
|
1385
|
+
* @type {RE2JS}
|
|
1386
|
+
*/
|
|
1383
1387
|
this.patternInput = pattern;
|
|
1384
1388
|
const re2 = this.patternInput.re2();
|
|
1385
1389
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1433,7 +1437,7 @@
|
|
|
1433
1437
|
|
|
1434
1438
|
/**
|
|
1435
1439
|
* Resets the {@code Matcher} and changes the input.
|
|
1436
|
-
* @param {
|
|
1440
|
+
* @param {MatcherInputBase} input
|
|
1437
1441
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1438
1442
|
*/
|
|
1439
1443
|
resetMatcherInput(input) {
|
|
@@ -1498,7 +1502,7 @@
|
|
|
1498
1502
|
/**
|
|
1499
1503
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1500
1504
|
* @param {string|number} [group=0]
|
|
1501
|
-
* @returns {
|
|
1505
|
+
* @returns {string|null}
|
|
1502
1506
|
*/
|
|
1503
1507
|
group(group = 0) {
|
|
1504
1508
|
if (typeof group === 'string') {
|
|
@@ -1590,7 +1594,7 @@
|
|
|
1590
1594
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1591
1595
|
* is a match, {@code find} sets the match state to describe it.
|
|
1592
1596
|
*
|
|
1593
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1597
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1594
1598
|
* @returns {boolean} if it finds a match
|
|
1595
1599
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1596
1600
|
*/
|
|
@@ -7941,9 +7945,18 @@
|
|
|
7941
7945
|
}
|
|
7942
7946
|
|
|
7943
7947
|
class RE2Set {
|
|
7948
|
+
/** @type {number} */
|
|
7944
7949
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7950
|
+
/** @type {number} */
|
|
7945
7951
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7952
|
+
/** @type {number} */
|
|
7946
7953
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7954
|
+
|
|
7955
|
+
/**
|
|
7956
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7957
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7958
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7959
|
+
*/
|
|
7947
7960
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7948
7961
|
this.anchor = anchor;
|
|
7949
7962
|
this.jsFlags = flags;
|
|
@@ -7960,6 +7973,14 @@
|
|
|
7960
7973
|
this.dfa = null;
|
|
7961
7974
|
this.dummyRe2 = null;
|
|
7962
7975
|
}
|
|
7976
|
+
|
|
7977
|
+
/**
|
|
7978
|
+
* Adds a new regular expression pattern to the set.
|
|
7979
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7980
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7981
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7982
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7983
|
+
*/
|
|
7963
7984
|
add(pattern) {
|
|
7964
7985
|
if (this.prog) {
|
|
7965
7986
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7978,6 +7999,12 @@
|
|
|
7978
7999
|
this.regexps.push(Simplify.simplify(re));
|
|
7979
8000
|
return this.regexps.length - 1;
|
|
7980
8001
|
}
|
|
8002
|
+
|
|
8003
|
+
/**
|
|
8004
|
+
* Compiles the added patterns into a single state machine.
|
|
8005
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8006
|
+
* @returns {void}
|
|
8007
|
+
*/
|
|
7981
8008
|
compile() {
|
|
7982
8009
|
if (this.prog) return;
|
|
7983
8010
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7990,6 +8017,12 @@
|
|
|
7990
8017
|
longest: false
|
|
7991
8018
|
};
|
|
7992
8019
|
}
|
|
8020
|
+
|
|
8021
|
+
/**
|
|
8022
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8023
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8024
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8025
|
+
*/
|
|
7993
8026
|
match(input) {
|
|
7994
8027
|
if (!this.prog) this.compile();
|
|
7995
8028
|
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
@@ -8015,13 +8048,19 @@
|
|
|
8015
8048
|
* Transform JS regex string to RE2 regex string
|
|
8016
8049
|
*/
|
|
8017
8050
|
class TranslateRegExpString {
|
|
8018
|
-
static isUpperCaseAlpha(ch) {
|
|
8019
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8020
|
-
}
|
|
8021
8051
|
static isHexadecimal(ch) {
|
|
8022
8052
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8023
8053
|
}
|
|
8024
8054
|
static translate(data) {
|
|
8055
|
+
let prefixFlags = '';
|
|
8056
|
+
if (data instanceof RegExp) {
|
|
8057
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8058
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8059
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8060
|
+
|
|
8061
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8062
|
+
data = data.source;
|
|
8063
|
+
}
|
|
8025
8064
|
if (typeof data !== 'string') {
|
|
8026
8065
|
return data;
|
|
8027
8066
|
}
|
|
@@ -8032,6 +8071,7 @@
|
|
|
8032
8071
|
result = '(?:)';
|
|
8033
8072
|
changed = true;
|
|
8034
8073
|
}
|
|
8074
|
+
let inCharClass = false;
|
|
8035
8075
|
let i = 0;
|
|
8036
8076
|
while (i < size) {
|
|
8037
8077
|
let ch = data[i];
|
|
@@ -8070,10 +8110,28 @@
|
|
|
8070
8110
|
if (i + 2 < size) {
|
|
8071
8111
|
let nextCh = data[i + 2];
|
|
8072
8112
|
if (nextCh === '{') {
|
|
8073
|
-
|
|
8074
|
-
i
|
|
8075
|
-
|
|
8076
|
-
|
|
8113
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8114
|
+
let j = i + 3;
|
|
8115
|
+
let hasHex = false;
|
|
8116
|
+
let closed = false;
|
|
8117
|
+
while (j < size) {
|
|
8118
|
+
const hexChar = data[j];
|
|
8119
|
+
if (hexChar === '}') {
|
|
8120
|
+
closed = true;
|
|
8121
|
+
break;
|
|
8122
|
+
}
|
|
8123
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8124
|
+
break;
|
|
8125
|
+
}
|
|
8126
|
+
hasHex = true;
|
|
8127
|
+
j++;
|
|
8128
|
+
}
|
|
8129
|
+
if (closed && hasHex) {
|
|
8130
|
+
result += '\\x';
|
|
8131
|
+
i += 2;
|
|
8132
|
+
changed = true;
|
|
8133
|
+
continue;
|
|
8134
|
+
}
|
|
8077
8135
|
} else if (i + 5 < size) {
|
|
8078
8136
|
let isHex4 = true;
|
|
8079
8137
|
for (let j = 0; j < 4; j++) {
|
|
@@ -8090,18 +8148,101 @@
|
|
|
8090
8148
|
}
|
|
8091
8149
|
}
|
|
8092
8150
|
}
|
|
8151
|
+
|
|
8152
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8093
8153
|
result += 'u';
|
|
8094
8154
|
i += 2;
|
|
8095
8155
|
changed = true;
|
|
8096
8156
|
continue;
|
|
8097
8157
|
}
|
|
8158
|
+
case 'x':
|
|
8159
|
+
{
|
|
8160
|
+
let isValidHex = false;
|
|
8161
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8162
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8163
|
+
let j = i + 3;
|
|
8164
|
+
let hasHex = false;
|
|
8165
|
+
let closed = false;
|
|
8166
|
+
while (j < size) {
|
|
8167
|
+
const hexChar = data[j];
|
|
8168
|
+
if (hexChar === '}') {
|
|
8169
|
+
closed = true;
|
|
8170
|
+
break;
|
|
8171
|
+
}
|
|
8172
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8173
|
+
break;
|
|
8174
|
+
}
|
|
8175
|
+
hasHex = true;
|
|
8176
|
+
j++;
|
|
8177
|
+
}
|
|
8178
|
+
if (closed && hasHex) {
|
|
8179
|
+
isValidHex = true;
|
|
8180
|
+
}
|
|
8181
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8182
|
+
isValidHex = true;
|
|
8183
|
+
}
|
|
8184
|
+
if (isValidHex) {
|
|
8185
|
+
result += '\\x';
|
|
8186
|
+
i += 2;
|
|
8187
|
+
} else {
|
|
8188
|
+
result += 'x';
|
|
8189
|
+
i += 2;
|
|
8190
|
+
changed = true;
|
|
8191
|
+
}
|
|
8192
|
+
continue;
|
|
8193
|
+
}
|
|
8194
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8195
|
+
case 'n':
|
|
8196
|
+
case 'r':
|
|
8197
|
+
case 't':
|
|
8198
|
+
case 'a':
|
|
8199
|
+
case 'f':
|
|
8200
|
+
case 'v':
|
|
8201
|
+
case 'd':
|
|
8202
|
+
case 'D':
|
|
8203
|
+
case 's':
|
|
8204
|
+
case 'S':
|
|
8205
|
+
case 'w':
|
|
8206
|
+
case 'W':
|
|
8207
|
+
case 'b':
|
|
8208
|
+
case 'B':
|
|
8209
|
+
case 'p':
|
|
8210
|
+
case 'P':
|
|
8211
|
+
case 'A':
|
|
8212
|
+
case 'z':
|
|
8213
|
+
case 'Q':
|
|
8214
|
+
case 'E':
|
|
8215
|
+
case '0':
|
|
8216
|
+
case '1':
|
|
8217
|
+
case '2':
|
|
8218
|
+
case '3':
|
|
8219
|
+
case '4':
|
|
8220
|
+
case '5':
|
|
8221
|
+
case '6':
|
|
8222
|
+
case '7':
|
|
8223
|
+
{
|
|
8224
|
+
result += '\\' + ch;
|
|
8225
|
+
i += 2;
|
|
8226
|
+
continue;
|
|
8227
|
+
}
|
|
8098
8228
|
default:
|
|
8099
8229
|
{
|
|
8100
|
-
result += '\\';
|
|
8101
8230
|
let cp = data.codePointAt(i + 1);
|
|
8102
|
-
let
|
|
8103
|
-
|
|
8104
|
-
|
|
8231
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8232
|
+
if (isAlphaNum) {
|
|
8233
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8234
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8235
|
+
let symSize = Utils.charCount(cp);
|
|
8236
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8237
|
+
i += symSize + 1;
|
|
8238
|
+
changed = true;
|
|
8239
|
+
} else {
|
|
8240
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8241
|
+
result += '\\';
|
|
8242
|
+
let symSize = Utils.charCount(cp);
|
|
8243
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8244
|
+
i += symSize + 1;
|
|
8245
|
+
}
|
|
8105
8246
|
continue;
|
|
8106
8247
|
}
|
|
8107
8248
|
}
|
|
@@ -8111,7 +8252,13 @@
|
|
|
8111
8252
|
i += 1;
|
|
8112
8253
|
changed = true;
|
|
8113
8254
|
continue;
|
|
8114
|
-
} else if (ch === '
|
|
8255
|
+
} else if (ch === '[') {
|
|
8256
|
+
// Track entry into a character class (protects syntax inside)
|
|
8257
|
+
inCharClass = true;
|
|
8258
|
+
} else if (ch === ']') {
|
|
8259
|
+
// Track exit of a character class
|
|
8260
|
+
inCharClass = false;
|
|
8261
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8115
8262
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8116
8263
|
result += '(?P<';
|
|
8117
8264
|
i += 3;
|
|
@@ -8124,7 +8271,13 @@
|
|
|
8124
8271
|
result += data.substring(i, i + symSize);
|
|
8125
8272
|
i += symSize;
|
|
8126
8273
|
}
|
|
8127
|
-
|
|
8274
|
+
const finalResult = changed ? result : data;
|
|
8275
|
+
|
|
8276
|
+
// Append any extracted inline flags
|
|
8277
|
+
if (prefixFlags.length > 0) {
|
|
8278
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8279
|
+
}
|
|
8280
|
+
return finalResult;
|
|
8128
8281
|
}
|
|
8129
8282
|
}
|
|
8130
8283
|
|
|
@@ -8202,7 +8355,7 @@
|
|
|
8202
8355
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8203
8356
|
* resulting regex is safe and properly formatted before compilation.
|
|
8204
8357
|
*
|
|
8205
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8358
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8206
8359
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8207
8360
|
*/
|
|
8208
8361
|
static translateRegExp(expr) {
|
|
@@ -8246,7 +8399,7 @@
|
|
|
8246
8399
|
* Matches a string against a regular expression.
|
|
8247
8400
|
*
|
|
8248
8401
|
* @param {string} regex the regular expression
|
|
8249
|
-
* @param {string|number[]} input the input
|
|
8402
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8250
8403
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8251
8404
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8252
8405
|
*/
|
|
@@ -8313,7 +8466,7 @@
|
|
|
8313
8466
|
/**
|
|
8314
8467
|
* Matches a string against a regular expression.
|
|
8315
8468
|
*
|
|
8316
|
-
* @param {string|number[]} input the input
|
|
8469
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8317
8470
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8318
8471
|
*/
|
|
8319
8472
|
matches(input) {
|
|
@@ -8323,7 +8476,7 @@
|
|
|
8323
8476
|
/**
|
|
8324
8477
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8325
8478
|
*
|
|
8326
|
-
* @param {string|number[]} input the input string
|
|
8479
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8327
8480
|
* @returns {Matcher}
|
|
8328
8481
|
*/
|
|
8329
8482
|
matcher(input) {
|
|
@@ -8339,7 +8492,7 @@
|
|
|
8339
8492
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8340
8493
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8341
8494
|
*
|
|
8342
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8495
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8343
8496
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8344
8497
|
*/
|
|
8345
8498
|
test(input) {
|
|
@@ -8358,7 +8511,7 @@
|
|
|
8358
8511
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8359
8512
|
* it securely routes execution through the DFA fast-path.
|
|
8360
8513
|
*
|
|
8361
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8514
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8362
8515
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8363
8516
|
*/
|
|
8364
8517
|
testExact(input) {
|