re2js 2.2.3 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.cjs.cjs +184 -27
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +54 -78
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +184 -27
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +184 -27
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -577,6 +577,10 @@ const unicodeRegexp = RE2JS.translateRegExp('\\u{1F600}') // '\\x{1F600}'
|
|
|
577
577
|
|
|
578
578
|
RE2JS.matches(unicodeRegexp, '😀') // true
|
|
579
579
|
RE2JS.matches(unicodeRegexp, '😃') // false
|
|
580
|
+
|
|
581
|
+
// also support native Regex
|
|
582
|
+
RE2JS.translateRegExp(/foo/ims) // '(?ims)foo'
|
|
583
|
+
RE2JS.translateRegExp(/bar/giy) // '(?i)bar'
|
|
580
584
|
```
|
|
581
585
|
|
|
582
586
|
## Performance and Architecture
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1331,6 +1331,11 @@ class RE2JSInternalException extends RE2JSException {
|
|
|
1331
1331
|
*
|
|
1332
1332
|
* @author rsc@google.com (Russ Cox)
|
|
1333
1333
|
*/
|
|
1334
|
+
|
|
1335
|
+
/**
|
|
1336
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1337
|
+
*/
|
|
1338
|
+
|
|
1334
1339
|
class Matcher {
|
|
1335
1340
|
/**
|
|
1336
1341
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1368,14 +1373,17 @@ class Matcher {
|
|
|
1368
1373
|
}
|
|
1369
1374
|
/**
|
|
1370
1375
|
*
|
|
1371
|
-
* @param {
|
|
1372
|
-
* @param {
|
|
1376
|
+
* @param {RE2JS_Pattern} pattern
|
|
1377
|
+
* @param {Uint8Array|number[]|string} input
|
|
1373
1378
|
*/
|
|
1374
1379
|
constructor(pattern, input) {
|
|
1375
1380
|
if (pattern === null) {
|
|
1376
1381
|
throw new Error('pattern is null');
|
|
1377
1382
|
}
|
|
1378
|
-
|
|
1383
|
+
/**
|
|
1384
|
+
* The pattern being matched.
|
|
1385
|
+
* @type {RE2JS_Pattern}
|
|
1386
|
+
*/
|
|
1379
1387
|
this.patternInput = pattern;
|
|
1380
1388
|
const re2 = this.patternInput.re2();
|
|
1381
1389
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1399,7 +1407,7 @@ class Matcher {
|
|
|
1399
1407
|
|
|
1400
1408
|
/**
|
|
1401
1409
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1402
|
-
* @returns {
|
|
1410
|
+
* @returns {RE2JS_Pattern}
|
|
1403
1411
|
*/
|
|
1404
1412
|
pattern() {
|
|
1405
1413
|
return this.patternInput;
|
|
@@ -1429,7 +1437,7 @@ class Matcher {
|
|
|
1429
1437
|
|
|
1430
1438
|
/**
|
|
1431
1439
|
* Resets the {@code Matcher} and changes the input.
|
|
1432
|
-
* @param {
|
|
1440
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
1433
1441
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1434
1442
|
*/
|
|
1435
1443
|
resetMatcherInput(input) {
|
|
@@ -1494,7 +1502,7 @@ class Matcher {
|
|
|
1494
1502
|
/**
|
|
1495
1503
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1496
1504
|
* @param {string|number} [group=0]
|
|
1497
|
-
* @returns {
|
|
1505
|
+
* @returns {string|null}
|
|
1498
1506
|
*/
|
|
1499
1507
|
group(group = 0) {
|
|
1500
1508
|
if (typeof group === 'string') {
|
|
@@ -1586,7 +1594,7 @@ class Matcher {
|
|
|
1586
1594
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1587
1595
|
* is a match, {@code find} sets the match state to describe it.
|
|
1588
1596
|
*
|
|
1589
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1597
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1590
1598
|
* @returns {boolean} if it finds a match
|
|
1591
1599
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1592
1600
|
*/
|
|
@@ -7937,9 +7945,18 @@ class RE2 {
|
|
|
7937
7945
|
}
|
|
7938
7946
|
|
|
7939
7947
|
class RE2Set {
|
|
7948
|
+
/** @type {number} */
|
|
7940
7949
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7950
|
+
/** @type {number} */
|
|
7941
7951
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7952
|
+
/** @type {number} */
|
|
7942
7953
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7954
|
+
|
|
7955
|
+
/**
|
|
7956
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7957
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7958
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7959
|
+
*/
|
|
7943
7960
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7944
7961
|
this.anchor = anchor;
|
|
7945
7962
|
this.jsFlags = flags;
|
|
@@ -7956,6 +7973,14 @@ class RE2Set {
|
|
|
7956
7973
|
this.dfa = null;
|
|
7957
7974
|
this.dummyRe2 = null;
|
|
7958
7975
|
}
|
|
7976
|
+
|
|
7977
|
+
/**
|
|
7978
|
+
* Adds a new regular expression pattern to the set.
|
|
7979
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7980
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7981
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7982
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7983
|
+
*/
|
|
7959
7984
|
add(pattern) {
|
|
7960
7985
|
if (this.prog) {
|
|
7961
7986
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7974,6 +7999,12 @@ class RE2Set {
|
|
|
7974
7999
|
this.regexps.push(Simplify.simplify(re));
|
|
7975
8000
|
return this.regexps.length - 1;
|
|
7976
8001
|
}
|
|
8002
|
+
|
|
8003
|
+
/**
|
|
8004
|
+
* Compiles the added patterns into a single state machine.
|
|
8005
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8006
|
+
* @returns {void}
|
|
8007
|
+
*/
|
|
7977
8008
|
compile() {
|
|
7978
8009
|
if (this.prog) return;
|
|
7979
8010
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7986,6 +8017,12 @@ class RE2Set {
|
|
|
7986
8017
|
longest: false
|
|
7987
8018
|
};
|
|
7988
8019
|
}
|
|
8020
|
+
|
|
8021
|
+
/**
|
|
8022
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8023
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8024
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8025
|
+
*/
|
|
7989
8026
|
match(input) {
|
|
7990
8027
|
if (!this.prog) this.compile();
|
|
7991
8028
|
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
@@ -8011,13 +8048,19 @@ class RE2Set {
|
|
|
8011
8048
|
* Transform JS regex string to RE2 regex string
|
|
8012
8049
|
*/
|
|
8013
8050
|
class TranslateRegExpString {
|
|
8014
|
-
static isUpperCaseAlpha(ch) {
|
|
8015
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8016
|
-
}
|
|
8017
8051
|
static isHexadecimal(ch) {
|
|
8018
8052
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8019
8053
|
}
|
|
8020
8054
|
static translate(data) {
|
|
8055
|
+
let prefixFlags = '';
|
|
8056
|
+
if (data instanceof RegExp) {
|
|
8057
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8058
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8059
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8060
|
+
|
|
8061
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8062
|
+
data = data.source;
|
|
8063
|
+
}
|
|
8021
8064
|
if (typeof data !== 'string') {
|
|
8022
8065
|
return data;
|
|
8023
8066
|
}
|
|
@@ -8028,6 +8071,7 @@ class TranslateRegExpString {
|
|
|
8028
8071
|
result = '(?:)';
|
|
8029
8072
|
changed = true;
|
|
8030
8073
|
}
|
|
8074
|
+
let inCharClass = false;
|
|
8031
8075
|
let i = 0;
|
|
8032
8076
|
while (i < size) {
|
|
8033
8077
|
let ch = data[i];
|
|
@@ -8066,10 +8110,28 @@ class TranslateRegExpString {
|
|
|
8066
8110
|
if (i + 2 < size) {
|
|
8067
8111
|
let nextCh = data[i + 2];
|
|
8068
8112
|
if (nextCh === '{') {
|
|
8069
|
-
|
|
8070
|
-
i
|
|
8071
|
-
|
|
8072
|
-
|
|
8113
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8114
|
+
let j = i + 3;
|
|
8115
|
+
let hasHex = false;
|
|
8116
|
+
let closed = false;
|
|
8117
|
+
while (j < size) {
|
|
8118
|
+
const hexChar = data[j];
|
|
8119
|
+
if (hexChar === '}') {
|
|
8120
|
+
closed = true;
|
|
8121
|
+
break;
|
|
8122
|
+
}
|
|
8123
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8124
|
+
break;
|
|
8125
|
+
}
|
|
8126
|
+
hasHex = true;
|
|
8127
|
+
j++;
|
|
8128
|
+
}
|
|
8129
|
+
if (closed && hasHex) {
|
|
8130
|
+
result += '\\x';
|
|
8131
|
+
i += 2;
|
|
8132
|
+
changed = true;
|
|
8133
|
+
continue;
|
|
8134
|
+
}
|
|
8073
8135
|
} else if (i + 5 < size) {
|
|
8074
8136
|
let isHex4 = true;
|
|
8075
8137
|
for (let j = 0; j < 4; j++) {
|
|
@@ -8086,18 +8148,101 @@ class TranslateRegExpString {
|
|
|
8086
8148
|
}
|
|
8087
8149
|
}
|
|
8088
8150
|
}
|
|
8151
|
+
|
|
8152
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8089
8153
|
result += 'u';
|
|
8090
8154
|
i += 2;
|
|
8091
8155
|
changed = true;
|
|
8092
8156
|
continue;
|
|
8093
8157
|
}
|
|
8158
|
+
case 'x':
|
|
8159
|
+
{
|
|
8160
|
+
let isValidHex = false;
|
|
8161
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8162
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8163
|
+
let j = i + 3;
|
|
8164
|
+
let hasHex = false;
|
|
8165
|
+
let closed = false;
|
|
8166
|
+
while (j < size) {
|
|
8167
|
+
const hexChar = data[j];
|
|
8168
|
+
if (hexChar === '}') {
|
|
8169
|
+
closed = true;
|
|
8170
|
+
break;
|
|
8171
|
+
}
|
|
8172
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8173
|
+
break;
|
|
8174
|
+
}
|
|
8175
|
+
hasHex = true;
|
|
8176
|
+
j++;
|
|
8177
|
+
}
|
|
8178
|
+
if (closed && hasHex) {
|
|
8179
|
+
isValidHex = true;
|
|
8180
|
+
}
|
|
8181
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8182
|
+
isValidHex = true;
|
|
8183
|
+
}
|
|
8184
|
+
if (isValidHex) {
|
|
8185
|
+
result += '\\x';
|
|
8186
|
+
i += 2;
|
|
8187
|
+
} else {
|
|
8188
|
+
result += 'x';
|
|
8189
|
+
i += 2;
|
|
8190
|
+
changed = true;
|
|
8191
|
+
}
|
|
8192
|
+
continue;
|
|
8193
|
+
}
|
|
8194
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8195
|
+
case 'n':
|
|
8196
|
+
case 'r':
|
|
8197
|
+
case 't':
|
|
8198
|
+
case 'a':
|
|
8199
|
+
case 'f':
|
|
8200
|
+
case 'v':
|
|
8201
|
+
case 'd':
|
|
8202
|
+
case 'D':
|
|
8203
|
+
case 's':
|
|
8204
|
+
case 'S':
|
|
8205
|
+
case 'w':
|
|
8206
|
+
case 'W':
|
|
8207
|
+
case 'b':
|
|
8208
|
+
case 'B':
|
|
8209
|
+
case 'p':
|
|
8210
|
+
case 'P':
|
|
8211
|
+
case 'A':
|
|
8212
|
+
case 'z':
|
|
8213
|
+
case 'Q':
|
|
8214
|
+
case 'E':
|
|
8215
|
+
case '0':
|
|
8216
|
+
case '1':
|
|
8217
|
+
case '2':
|
|
8218
|
+
case '3':
|
|
8219
|
+
case '4':
|
|
8220
|
+
case '5':
|
|
8221
|
+
case '6':
|
|
8222
|
+
case '7':
|
|
8223
|
+
{
|
|
8224
|
+
result += '\\' + ch;
|
|
8225
|
+
i += 2;
|
|
8226
|
+
continue;
|
|
8227
|
+
}
|
|
8094
8228
|
default:
|
|
8095
8229
|
{
|
|
8096
|
-
result += '\\';
|
|
8097
8230
|
let cp = data.codePointAt(i + 1);
|
|
8098
|
-
let
|
|
8099
|
-
|
|
8100
|
-
|
|
8231
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8232
|
+
if (isAlphaNum) {
|
|
8233
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8234
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8235
|
+
let symSize = Utils.charCount(cp);
|
|
8236
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8237
|
+
i += symSize + 1;
|
|
8238
|
+
changed = true;
|
|
8239
|
+
} else {
|
|
8240
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8241
|
+
result += '\\';
|
|
8242
|
+
let symSize = Utils.charCount(cp);
|
|
8243
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8244
|
+
i += symSize + 1;
|
|
8245
|
+
}
|
|
8101
8246
|
continue;
|
|
8102
8247
|
}
|
|
8103
8248
|
}
|
|
@@ -8107,7 +8252,13 @@ class TranslateRegExpString {
|
|
|
8107
8252
|
i += 1;
|
|
8108
8253
|
changed = true;
|
|
8109
8254
|
continue;
|
|
8110
|
-
} else if (ch === '
|
|
8255
|
+
} else if (ch === '[') {
|
|
8256
|
+
// Track entry into a character class (protects syntax inside)
|
|
8257
|
+
inCharClass = true;
|
|
8258
|
+
} else if (ch === ']') {
|
|
8259
|
+
// Track exit of a character class
|
|
8260
|
+
inCharClass = false;
|
|
8261
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8111
8262
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8112
8263
|
result += '(?P<';
|
|
8113
8264
|
i += 3;
|
|
@@ -8120,7 +8271,13 @@ class TranslateRegExpString {
|
|
|
8120
8271
|
result += data.substring(i, i + symSize);
|
|
8121
8272
|
i += symSize;
|
|
8122
8273
|
}
|
|
8123
|
-
|
|
8274
|
+
const finalResult = changed ? result : data;
|
|
8275
|
+
|
|
8276
|
+
// Append any extracted inline flags
|
|
8277
|
+
if (prefixFlags.length > 0) {
|
|
8278
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8279
|
+
}
|
|
8280
|
+
return finalResult;
|
|
8124
8281
|
}
|
|
8125
8282
|
}
|
|
8126
8283
|
|
|
@@ -8198,7 +8355,7 @@ class RE2JS {
|
|
|
8198
8355
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8199
8356
|
* resulting regex is safe and properly formatted before compilation.
|
|
8200
8357
|
*
|
|
8201
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8358
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8202
8359
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8203
8360
|
*/
|
|
8204
8361
|
static translateRegExp(expr) {
|
|
@@ -8242,7 +8399,7 @@ class RE2JS {
|
|
|
8242
8399
|
* Matches a string against a regular expression.
|
|
8243
8400
|
*
|
|
8244
8401
|
* @param {string} regex the regular expression
|
|
8245
|
-
* @param {string|number[]} input the input
|
|
8402
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8246
8403
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8247
8404
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8248
8405
|
*/
|
|
@@ -8309,7 +8466,7 @@ class RE2JS {
|
|
|
8309
8466
|
/**
|
|
8310
8467
|
* Matches a string against a regular expression.
|
|
8311
8468
|
*
|
|
8312
|
-
* @param {string|number[]} input the input
|
|
8469
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8313
8470
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8314
8471
|
*/
|
|
8315
8472
|
matches(input) {
|
|
@@ -8319,7 +8476,7 @@ class RE2JS {
|
|
|
8319
8476
|
/**
|
|
8320
8477
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8321
8478
|
*
|
|
8322
|
-
* @param {string|number[]} input the input string
|
|
8479
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8323
8480
|
* @returns {Matcher}
|
|
8324
8481
|
*/
|
|
8325
8482
|
matcher(input) {
|
|
@@ -8335,7 +8492,7 @@ class RE2JS {
|
|
|
8335
8492
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8336
8493
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8337
8494
|
*
|
|
8338
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8495
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8339
8496
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8340
8497
|
*/
|
|
8341
8498
|
test(input) {
|
|
@@ -8354,7 +8511,7 @@ class RE2JS {
|
|
|
8354
8511
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8355
8512
|
* it securely routes execution through the DFA fast-path.
|
|
8356
8513
|
*
|
|
8357
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8514
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8358
8515
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8359
8516
|
*/
|
|
8360
8517
|
testExact(input) {
|