re2js 1.3.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -17
- package/build/index.cjs.cjs +51 -22
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +23 -11
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +51 -22
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +51 -22
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -337,17 +337,17 @@ Note that the replacement string can include references to capturing groups from
|
|
|
337
337
|
|
|
338
338
|
Parameters:
|
|
339
339
|
- `replacement (String)`: The string that replaces the substrings found. Capture groups and special characters in the replacement string have special behavior. For example:
|
|
340
|
-
- `$0` refers to the entire matched substring
|
|
341
|
-
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
342
|
-
- `\$` inserts a literal `$`
|
|
343
|
-
- `${name}` can be used to reference named capture groups
|
|
344
|
-
- on invalid group - throw exception
|
|
345
|
-
- `perlMode (Boolean)`: If set to `true`, the replacement follows Perl/JS's rules for replacement. Defaults to `false`. If `perlMode = true`, changed rules for capture groups and special characters:
|
|
346
340
|
- `$&` refers to the entire matched substring
|
|
347
341
|
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
348
342
|
- `$$` inserts a literal `$`
|
|
349
343
|
- `$<name>` can be used to reference named capture groups
|
|
350
344
|
- on invalid group - ignore it
|
|
345
|
+
- `javaMode (Boolean)`: If set to `true`, the replacement follows Java's rules for replacement. Defaults to `false`. If `javaMode = true`, changed rules for capture groups and special characters:
|
|
346
|
+
- `$0` refers to the entire matched substring
|
|
347
|
+
- `$1, $2, ...` refer to the corresponding capture groups in the pattern
|
|
348
|
+
- `\$` inserts a literal `$`
|
|
349
|
+
- `${name}` can be used to reference named capture groups
|
|
350
|
+
- on invalid group - throw exception
|
|
351
351
|
|
|
352
352
|
Examples:
|
|
353
353
|
|
|
@@ -377,25 +377,28 @@ RE2JS.compile('(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)')
|
|
|
377
377
|
.replaceFirst('$10$20') // 'jb0nopqrstuvwxyz123'
|
|
378
378
|
```
|
|
379
379
|
|
|
380
|
-
Function support second argument `
|
|
380
|
+
Function support second argument `javaMode`, which work in the same way, as for `replaceAll` function
|
|
381
381
|
|
|
382
|
-
###
|
|
382
|
+
### Safe Replacements
|
|
383
383
|
|
|
384
|
-
|
|
385
|
-
|
|
384
|
+
When using untrusted user input as a replacement string, you must escape special characters so they aren't accidentally evaluated as capture groups (e.g., `$1`).
|
|
385
|
+
|
|
386
|
+
Use the static method `quoteReplacement(string, javaMode)` to safely escape these characters. **Note:** You must pass the same `javaMode` boolean to `quoteReplacement` that you plan to use in `replaceAll()` / `replaceFirst()`, because the two modes use different escaping logic
|
|
386
387
|
|
|
387
388
|
```js
|
|
388
389
|
import { RE2JS } from 're2js'
|
|
389
390
|
|
|
390
|
-
const
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
RE2JS.matches(regexp, '123') // true
|
|
391
|
+
const text = 'The cost is 100 bucks.'
|
|
392
|
+
const regex = RE2JS.compile('100 bucks')
|
|
393
|
+
const unsafeUserInput = '$500'
|
|
394
394
|
|
|
395
|
-
|
|
395
|
+
// Safe (Default Mode)
|
|
396
|
+
const safeDefault = RE2JS.quoteReplacement(unsafeUserInput) // "\$500"
|
|
397
|
+
regex.matcher(text).replaceAll(safeDefault) // "The cost is $500."
|
|
396
398
|
|
|
397
|
-
|
|
398
|
-
RE2JS.
|
|
399
|
+
// Safe (Java Mode)
|
|
400
|
+
const safeJava = RE2JS.quoteReplacement(unsafeUserInput, true) // "$$500"
|
|
401
|
+
regex.matcher(text).replaceAll(safeJava, true) // "The cost is $500."
|
|
399
402
|
```
|
|
400
403
|
|
|
401
404
|
### Escaping Special Characters
|
|
@@ -423,6 +426,25 @@ console.log(RE2JS.compile('a+b').programSize()); // Outputs: 5
|
|
|
423
426
|
console.log(RE2JS.compile('(a+b?)').programSize()); // Outputs: 8
|
|
424
427
|
```
|
|
425
428
|
|
|
429
|
+
### Translating Regular Expressions
|
|
430
|
+
|
|
431
|
+
The `translateRegExp()` method preprocesses a given regular expression string to ensure compatibility with RE2JS.
|
|
432
|
+
It applies necessary transformations, such as escaping special characters, adjusting Unicode sequences, and converting named capture groups
|
|
433
|
+
|
|
434
|
+
```js
|
|
435
|
+
import { RE2JS } from 're2js'
|
|
436
|
+
|
|
437
|
+
const regexp = RE2JS.translateRegExp('(?<word>\\w+)') // '(?P<word>\\w+)'
|
|
438
|
+
|
|
439
|
+
RE2JS.matches(regexp, 'hello') // true
|
|
440
|
+
RE2JS.matches(regexp, '123') // true
|
|
441
|
+
|
|
442
|
+
const unicodeRegexp = RE2JS.translateRegExp('\\u{1F600}') // '\\x{1F600}'
|
|
443
|
+
|
|
444
|
+
RE2JS.matches(unicodeRegexp, '😀') // true
|
|
445
|
+
RE2JS.matches(unicodeRegexp, '😃') // false
|
|
446
|
+
```
|
|
447
|
+
|
|
426
448
|
## Performance
|
|
427
449
|
|
|
428
450
|
The RE2JS engine runs more slowly compared to native RegExp objects. This reduced speed is also noticeable when comparing RE2JS to the original RE2 engine. The C++ implementation of the RE2 engine includes both NFA (Nondeterministic Finite Automaton) and DFA (Deterministic Finite Automaton) engines, as well as a variety of optimizations. Russ Cox ported a simplified version of the NFA engine to Go. Later, Alan Donovan ported the NFA-based Go implementation to Java. I then ported the NFA-based Java implementation (plus Golang stuff, which are not present in Java implementation, like checks for regular expression complexity) to a pure JS version. This is another reason why the pure JS version will perform more slowly compared to the original RE2 engine.
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v1.
|
|
5
|
+
* @version v1.4.0
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1126,16 +1126,31 @@ class Matcher {
|
|
|
1126
1126
|
* {@link #appendReplacement} as a literal replacement of {@code s}.
|
|
1127
1127
|
*
|
|
1128
1128
|
* @param {string} str the string to be quoted
|
|
1129
|
+
* @param {boolean} [javaMode=false] whether the replacement will be used in javaMode
|
|
1129
1130
|
* @returns {string} the quoted string
|
|
1130
1131
|
*/
|
|
1131
|
-
static quoteReplacement(str) {
|
|
1132
|
-
if (
|
|
1132
|
+
static quoteReplacement(str, javaMode = false) {
|
|
1133
|
+
if (javaMode) {
|
|
1134
|
+
// Java mode escape '\' and '$' with a backslash
|
|
1135
|
+
if (str.indexOf('\\') < 0 && str.indexOf('$') < 0) {
|
|
1136
|
+
return str;
|
|
1137
|
+
}
|
|
1138
|
+
return str.split('').map(s => {
|
|
1139
|
+
const c = s.codePointAt(0);
|
|
1140
|
+
if (c === Codepoint.CODES.get('\\') || c === Codepoint.CODES.get('$')) {
|
|
1141
|
+
return `\\${s}`;
|
|
1142
|
+
}
|
|
1143
|
+
return s;
|
|
1144
|
+
}).join('');
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// In JS mode, '\' is not a special character, but '$' must be escaped as '$$'
|
|
1148
|
+
if (str.indexOf('$') < 0) {
|
|
1133
1149
|
return str;
|
|
1134
1150
|
}
|
|
1135
1151
|
return str.split('').map(s => {
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
return `\\${s}`;
|
|
1152
|
+
if (s.codePointAt(0) === Codepoint.CODES.get('$')) {
|
|
1153
|
+
return '$$';
|
|
1139
1154
|
}
|
|
1140
1155
|
return s;
|
|
1141
1156
|
}).join('');
|
|
@@ -1425,13 +1440,13 @@ class Matcher {
|
|
|
1425
1440
|
* earlier, escape the first digit that should not be used.
|
|
1426
1441
|
*
|
|
1427
1442
|
* @param {string} replacement the replacement string
|
|
1428
|
-
* @param {boolean} [
|
|
1443
|
+
* @param {boolean} [javaMode=false] activate java mode (different behaviour for capture groups and special characters)
|
|
1429
1444
|
* @returns {string}
|
|
1430
1445
|
* @throws IllegalStateException if there was no most recent match
|
|
1431
1446
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group
|
|
1432
1447
|
* @private
|
|
1433
1448
|
*/
|
|
1434
|
-
appendReplacement(replacement,
|
|
1449
|
+
appendReplacement(replacement, javaMode = false) {
|
|
1435
1450
|
let res = '';
|
|
1436
1451
|
const s = this.start();
|
|
1437
1452
|
const e = this.end();
|
|
@@ -1439,7 +1454,7 @@ class Matcher {
|
|
|
1439
1454
|
res += this.substring(this.appendPos, s);
|
|
1440
1455
|
}
|
|
1441
1456
|
this.appendPos = e;
|
|
1442
|
-
res +=
|
|
1457
|
+
res += javaMode ? this.appendReplacementInternalJava(replacement) : this.appendReplacementInternalJs(replacement);
|
|
1443
1458
|
return res;
|
|
1444
1459
|
}
|
|
1445
1460
|
|
|
@@ -1448,7 +1463,7 @@ class Matcher {
|
|
|
1448
1463
|
* @returns {string}
|
|
1449
1464
|
* @private
|
|
1450
1465
|
*/
|
|
1451
|
-
|
|
1466
|
+
appendReplacementInternalJava(replacement) {
|
|
1452
1467
|
let res = '';
|
|
1453
1468
|
let last = 0;
|
|
1454
1469
|
const m = replacement.length;
|
|
@@ -1514,7 +1529,7 @@ class Matcher {
|
|
|
1514
1529
|
* @returns {string}
|
|
1515
1530
|
* @private
|
|
1516
1531
|
*/
|
|
1517
|
-
|
|
1532
|
+
appendReplacementInternalJs(replacement) {
|
|
1518
1533
|
let res = '';
|
|
1519
1534
|
let last = 0;
|
|
1520
1535
|
const m = replacement.length;
|
|
@@ -1611,12 +1626,12 @@ class Matcher {
|
|
|
1611
1626
|
* {@code appendReplacement}.
|
|
1612
1627
|
*
|
|
1613
1628
|
* @param {string} replacement - the replacement string
|
|
1614
|
-
* @param {boolean} [
|
|
1629
|
+
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1615
1630
|
* @returns {string} the input string with the matches replaced
|
|
1616
|
-
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and
|
|
1631
|
+
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
1617
1632
|
*/
|
|
1618
|
-
replaceAll(replacement,
|
|
1619
|
-
return this.replace(replacement, true,
|
|
1633
|
+
replaceAll(replacement, javaMode = false) {
|
|
1634
|
+
return this.replace(replacement, true, javaMode);
|
|
1620
1635
|
}
|
|
1621
1636
|
|
|
1622
1637
|
/**
|
|
@@ -1624,27 +1639,27 @@ class Matcher {
|
|
|
1624
1639
|
* {@code appendReplacement}.
|
|
1625
1640
|
*
|
|
1626
1641
|
* @param {string} replacement - the replacement string
|
|
1627
|
-
* @param {boolean} [
|
|
1642
|
+
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1628
1643
|
* @returns {string} the input string with the first match replaced
|
|
1629
|
-
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and
|
|
1644
|
+
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and javaMode is true
|
|
1630
1645
|
*/
|
|
1631
|
-
replaceFirst(replacement,
|
|
1632
|
-
return this.replace(replacement, false,
|
|
1646
|
+
replaceFirst(replacement, javaMode = false) {
|
|
1647
|
+
return this.replace(replacement, false, javaMode);
|
|
1633
1648
|
}
|
|
1634
1649
|
|
|
1635
1650
|
/**
|
|
1636
1651
|
* Helper: replaceAll/replaceFirst hybrid.
|
|
1637
1652
|
* @param {string} replacement - the replacement string
|
|
1638
1653
|
* @param {boolean} [all=true] - replace all matches
|
|
1639
|
-
* @param {boolean} [
|
|
1654
|
+
* @param {boolean} [javaMode=false] - activate java mode (different behaviour for capture groups and special characters)
|
|
1640
1655
|
* @returns {string}
|
|
1641
1656
|
* @private
|
|
1642
1657
|
*/
|
|
1643
|
-
replace(replacement, all = true,
|
|
1658
|
+
replace(replacement, all = true, javaMode = false) {
|
|
1644
1659
|
let res = '';
|
|
1645
1660
|
this.reset();
|
|
1646
1661
|
while (this.find()) {
|
|
1647
|
-
res += this.appendReplacement(replacement,
|
|
1662
|
+
res += this.appendReplacement(replacement, javaMode);
|
|
1648
1663
|
if (!all) {
|
|
1649
1664
|
break;
|
|
1650
1665
|
}
|
|
@@ -6201,6 +6216,20 @@ class RE2JS {
|
|
|
6201
6216
|
return Utils.quoteMeta(str);
|
|
6202
6217
|
}
|
|
6203
6218
|
|
|
6219
|
+
/**
|
|
6220
|
+
* Quotes '\' and '$' in {@code str}, so that the returned string could be used in
|
|
6221
|
+
* replacement methods as a literal replacement of {@code str}.
|
|
6222
|
+
*
|
|
6223
|
+
* This is a convenience delegation to {@link Matcher.quoteReplacement}.
|
|
6224
|
+
*
|
|
6225
|
+
* @param {string} str the string to be quoted
|
|
6226
|
+
* @param {boolean} [javaMode=false] whether the replacement will be used in javaMode
|
|
6227
|
+
* @returns {string} the quoted string
|
|
6228
|
+
*/
|
|
6229
|
+
static quoteReplacement(str, javaMode = false) {
|
|
6230
|
+
return Matcher.quoteReplacement(str, javaMode);
|
|
6231
|
+
}
|
|
6232
|
+
|
|
6204
6233
|
/**
|
|
6205
6234
|
* Translates a given regular expression string to ensure compatibility with RE2JS.
|
|
6206
6235
|
*
|