re2js 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/build/index.cjs.cjs +40 -98
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +12 -18
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +40 -98
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +40 -98
- package/build/index.umd.js.map +1 -1
- package/package.json +21 -21
package/README.md
CHANGED
|
@@ -24,11 +24,14 @@ There are certain features of PCRE or Perl regular expressions that cannot be im
|
|
|
24
24
|
To install RE2JS:
|
|
25
25
|
|
|
26
26
|
```bash
|
|
27
|
+
# npm
|
|
27
28
|
npm install re2js
|
|
28
|
-
#
|
|
29
|
+
# yarn
|
|
29
30
|
yarn add re2js
|
|
30
|
-
#
|
|
31
|
+
# pnpm
|
|
31
32
|
pnpm add re2js
|
|
33
|
+
# bun
|
|
34
|
+
bun add re2js
|
|
32
35
|
```
|
|
33
36
|
|
|
34
37
|
## Usage
|
|
@@ -205,6 +208,7 @@ You can access the named groups in a pattern using the `namedGroups()` function
|
|
|
205
208
|
import { RE2JS } from 're2js'
|
|
206
209
|
|
|
207
210
|
RE2JS.compile('(?P<foo>\\d{2})').namedGroups() // { foo: 1 }
|
|
211
|
+
RE2JS.compile('(?<bar>\\d{2})').namedGroups() // { bar: 1 }
|
|
208
212
|
RE2JS.compile('\\d{2}').namedGroups() // {}
|
|
209
213
|
RE2JS.compile('(?P<foo>.*)(?P<bar>.*)').namedGroups() // { foo: 1, bar: 2 }
|
|
210
214
|
```
|
|
@@ -234,6 +238,7 @@ The `group()` method retrieves the content matched by a specific name of capturi
|
|
|
234
238
|
```js
|
|
235
239
|
import { RE2JS } from 're2js'
|
|
236
240
|
|
|
241
|
+
// example with `(?P<name>expr)`
|
|
237
242
|
const p = RE2JS.compile(
|
|
238
243
|
'(?P<baz>f(?P<foo>b*a(?P<another>r+)){0,10})(?P<bag>bag)?(?P<nomatch>zzz)?'
|
|
239
244
|
)
|
|
@@ -245,6 +250,16 @@ if (matchString.matches()) {
|
|
|
245
250
|
matchString.group('bag') // 'bag'
|
|
246
251
|
matchString.group('nomatch') // null
|
|
247
252
|
}
|
|
253
|
+
|
|
254
|
+
// example with `(?<name>expr)`
|
|
255
|
+
const m = RE2JS.compile(
|
|
256
|
+
'(?<baz>f(?<foo>b*a))'
|
|
257
|
+
)
|
|
258
|
+
const mString = m.matcher('fbba')
|
|
259
|
+
if (mString.matches()) {
|
|
260
|
+
mString.group('baz') // 'fbba'
|
|
261
|
+
mString.group('foo') // 'bba'
|
|
262
|
+
}
|
|
248
263
|
```
|
|
249
264
|
|
|
250
265
|
### Replacing Matches
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v0.
|
|
5
|
+
* @version v0.4.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -419,6 +419,8 @@ class Unicode {
|
|
|
419
419
|
// equalsIgnoreCase performs case-insensitive equality comparison
|
|
420
420
|
// on the given runes |r1| and |r2|, with special consideration
|
|
421
421
|
// for the likely scenario where both runes are ASCII characters.
|
|
422
|
+
// If non-ASCII, Unicode case folding will be performed on |r1|
|
|
423
|
+
// to compare it to |r2|.
|
|
422
424
|
// -1 is interpreted as the end-of-file mark.
|
|
423
425
|
static equalsIgnoreCase(r1, r2) {
|
|
424
426
|
// Runes already match, or one of them is EOF
|
|
@@ -677,9 +679,7 @@ class Utils {
|
|
|
677
679
|
// example
|
|
678
680
|
// Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
|
|
679
681
|
// Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
|
|
680
|
-
const createEnum =
|
|
681
|
-
let values = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
|
|
682
|
-
let initNum = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
682
|
+
const createEnum = (values = [], initNum = 0) => {
|
|
683
683
|
const enumObject = {};
|
|
684
684
|
for (let i = 0; i < values.length; i++) {
|
|
685
685
|
const val = values[i];
|
|
@@ -716,8 +716,7 @@ class MatcherInputBase {
|
|
|
716
716
|
}
|
|
717
717
|
}
|
|
718
718
|
class Utf8MatcherInput extends MatcherInputBase {
|
|
719
|
-
constructor() {
|
|
720
|
-
let bytes = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
719
|
+
constructor(bytes = null) {
|
|
721
720
|
super();
|
|
722
721
|
this.bytes = bytes;
|
|
723
722
|
}
|
|
@@ -749,8 +748,7 @@ class Utf8MatcherInput extends MatcherInputBase {
|
|
|
749
748
|
}
|
|
750
749
|
}
|
|
751
750
|
class Utf16MatcherInput extends MatcherInputBase {
|
|
752
|
-
constructor() {
|
|
753
|
-
let charSequence = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
751
|
+
constructor(charSequence = null) {
|
|
754
752
|
super();
|
|
755
753
|
this.charSequence = charSequence;
|
|
756
754
|
}
|
|
@@ -814,8 +812,7 @@ class RE2JSException extends Error {
|
|
|
814
812
|
* An exception thrown by the parser if the pattern was invalid.
|
|
815
813
|
*/
|
|
816
814
|
class RE2JSSyntaxException extends RE2JSException {
|
|
817
|
-
constructor(error) {
|
|
818
|
-
let input = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
|
|
815
|
+
constructor(error, input = null) {
|
|
819
816
|
let message = `error parsing regexp: ${error}`;
|
|
820
817
|
if (input) {
|
|
821
818
|
message += `: \`${input}\``;
|
|
@@ -875,7 +872,6 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
875
872
|
/**
|
|
876
873
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
877
874
|
*
|
|
878
|
-
* <p>
|
|
879
875
|
* Conceptually, a Matcher consists of four parts:
|
|
880
876
|
* <ol>
|
|
881
877
|
* <li>A compiled regular expression {@code RE2JS}, set at construction and fixed for the lifetime
|
|
@@ -988,8 +984,7 @@ class Matcher {
|
|
|
988
984
|
* @param {string|number} [group=0]
|
|
989
985
|
* @returns {string}
|
|
990
986
|
*/
|
|
991
|
-
start() {
|
|
992
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
987
|
+
start(group = 0) {
|
|
993
988
|
if (typeof group === 'string') {
|
|
994
989
|
const groupInt = this.namedGroups[group];
|
|
995
990
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1007,8 +1002,7 @@ class Matcher {
|
|
|
1007
1002
|
* @param {string|number} [group=0]
|
|
1008
1003
|
* @returns {string}
|
|
1009
1004
|
*/
|
|
1010
|
-
end() {
|
|
1011
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1005
|
+
end(group = 0) {
|
|
1012
1006
|
if (typeof group === 'string') {
|
|
1013
1007
|
const groupInt = this.namedGroups[group];
|
|
1014
1008
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1025,8 +1019,7 @@ class Matcher {
|
|
|
1025
1019
|
* @param {string|number} [group=0]
|
|
1026
1020
|
* @returns {string}
|
|
1027
1021
|
*/
|
|
1028
|
-
group() {
|
|
1029
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1022
|
+
group(group = 0) {
|
|
1030
1023
|
if (typeof group === 'string') {
|
|
1031
1024
|
const groupInt = this.namedGroups[group];
|
|
1032
1025
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1106,8 +1099,7 @@ class Matcher {
|
|
|
1106
1099
|
* @returns {boolean} if it finds a match
|
|
1107
1100
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1108
1101
|
*/
|
|
1109
|
-
find() {
|
|
1110
|
-
let start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
1102
|
+
find(start = null) {
|
|
1111
1103
|
if (start !== null) {
|
|
1112
1104
|
if (start < 0 || start > this.matcherInputLength) {
|
|
1113
1105
|
throw new RE2JSGroupException(`start index out of bounds: ${start}`);
|
|
@@ -1173,12 +1165,10 @@ class Matcher {
|
|
|
1173
1165
|
* the form {@code $n}, where {@code n} is the group number in decimal. It advances the append
|
|
1174
1166
|
* position to where the most recent match ended.
|
|
1175
1167
|
*
|
|
1176
|
-
* <p>
|
|
1177
1168
|
* To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string escapes). The escape
|
|
1178
1169
|
* is only necessary when {@code $} is followed by a digit, but it is always allowed. Only
|
|
1179
1170
|
* {@code $} and {@code \} need escaping, but any character can be escaped.
|
|
1180
1171
|
*
|
|
1181
|
-
* <p>
|
|
1182
1172
|
* The group number {@code n} in {@code $n} is always at least one digit and expands to use more
|
|
1183
1173
|
* digits as long as the resulting number is a valid group number for this pattern. To cut it off
|
|
1184
1174
|
* earlier, escape the first digit that should not be used.
|
|
@@ -1190,8 +1180,7 @@ class Matcher {
|
|
|
1190
1180
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group
|
|
1191
1181
|
* @private
|
|
1192
1182
|
*/
|
|
1193
|
-
appendReplacement(replacement) {
|
|
1194
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1183
|
+
appendReplacement(replacement, perlMode = false) {
|
|
1195
1184
|
let res = '';
|
|
1196
1185
|
const s = this.start();
|
|
1197
1186
|
const e = this.end();
|
|
@@ -1375,8 +1364,7 @@ class Matcher {
|
|
|
1375
1364
|
* @returns {string} the input string with the matches replaced
|
|
1376
1365
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1377
1366
|
*/
|
|
1378
|
-
replaceAll(replacement) {
|
|
1379
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1367
|
+
replaceAll(replacement, perlMode = false) {
|
|
1380
1368
|
return this.replace(replacement, true, perlMode);
|
|
1381
1369
|
}
|
|
1382
1370
|
|
|
@@ -1389,8 +1377,7 @@ class Matcher {
|
|
|
1389
1377
|
* @returns {string} the input string with the first match replaced
|
|
1390
1378
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1391
1379
|
*/
|
|
1392
|
-
replaceFirst(replacement) {
|
|
1393
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1380
|
+
replaceFirst(replacement, perlMode = false) {
|
|
1394
1381
|
return this.replace(replacement, false, perlMode);
|
|
1395
1382
|
}
|
|
1396
1383
|
|
|
@@ -1402,9 +1389,7 @@ class Matcher {
|
|
|
1402
1389
|
* @returns {string}
|
|
1403
1390
|
* @private
|
|
1404
1391
|
*/
|
|
1405
|
-
replace(replacement) {
|
|
1406
|
-
let all = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
|
|
1407
|
-
let perlMode = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
1392
|
+
replace(replacement, all = true, perlMode = false) {
|
|
1408
1393
|
let res = '';
|
|
1409
1394
|
this.reset();
|
|
1410
1395
|
while (this.find()) {
|
|
@@ -1441,9 +1426,7 @@ class MachineInputBase {
|
|
|
1441
1426
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
1442
1427
|
// |pos| and |width| are byte indices.
|
|
1443
1428
|
class MachineUTF8Input extends MachineInputBase {
|
|
1444
|
-
constructor(bytes) {
|
|
1445
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1446
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1429
|
+
constructor(bytes, start = 0, end = bytes.length) {
|
|
1447
1430
|
super();
|
|
1448
1431
|
this.bytes = bytes;
|
|
1449
1432
|
this.start = start;
|
|
@@ -1526,8 +1509,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1526
1509
|
|
|
1527
1510
|
// Returns the index of the first occurrence of array |target| within
|
|
1528
1511
|
// array |source| after |fromIndex|, or -1 if not found.
|
|
1529
|
-
indexOf(source, target) {
|
|
1530
|
-
let fromIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
1512
|
+
indexOf(source, target, fromIndex = 0) {
|
|
1531
1513
|
let targetLength = target.length;
|
|
1532
1514
|
if (targetLength === 0) {
|
|
1533
1515
|
return -1;
|
|
@@ -1548,9 +1530,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1548
1530
|
|
|
1549
1531
|
// |pos| and |width| are in JS "char" units.
|
|
1550
1532
|
class MachineUTF16Input extends MachineInputBase {
|
|
1551
|
-
constructor(charSequence) {
|
|
1552
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1553
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1533
|
+
constructor(charSequence, start = 0, end = charSequence.length) {
|
|
1554
1534
|
super();
|
|
1555
1535
|
this.charSequence = charSequence;
|
|
1556
1536
|
this.start = start;
|
|
@@ -1590,14 +1570,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1590
1570
|
}
|
|
1591
1571
|
}
|
|
1592
1572
|
class MachineInput {
|
|
1593
|
-
static fromUTF8(bytes) {
|
|
1594
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1595
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1573
|
+
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
1596
1574
|
return new MachineUTF8Input(bytes, start, end);
|
|
1597
1575
|
}
|
|
1598
|
-
static fromUTF16(charSequence) {
|
|
1599
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1600
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1576
|
+
static fromUTF16(charSequence, start = 0, end = charSequence.length) {
|
|
1601
1577
|
return new MachineUTF16Input(charSequence, start, end);
|
|
1602
1578
|
}
|
|
1603
1579
|
}
|
|
@@ -1685,7 +1661,6 @@ class Regexp {
|
|
|
1685
1661
|
this.name = null; // capturing name, for CAPTURE
|
|
1686
1662
|
this.namedGroups = {}; // map of group name -> capturing index
|
|
1687
1663
|
}
|
|
1688
|
-
|
|
1689
1664
|
reinit() {
|
|
1690
1665
|
this.flags = 0;
|
|
1691
1666
|
this.subs = Regexp.emptySubs();
|
|
@@ -2000,6 +1975,10 @@ class Inst {
|
|
|
2000
1975
|
// class.
|
|
2001
1976
|
if (this.runes.length === 1) {
|
|
2002
1977
|
const r0 = this.runes[0];
|
|
1978
|
+
// If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
|
|
1979
|
+
// Note that this may result in a case-folding loop when executed,
|
|
1980
|
+
// so attempt to reduce the chance of that occurring
|
|
1981
|
+
// by performing case folding on |r0| from the pattern rather than |r| from the input.
|
|
2003
1982
|
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
2004
1983
|
return Unicode.equalsIgnoreCase(r0, r);
|
|
2005
1984
|
}
|
|
@@ -2231,10 +2210,7 @@ class Prog {
|
|
|
2231
2210
|
* @class
|
|
2232
2211
|
*/
|
|
2233
2212
|
class Frag {
|
|
2234
|
-
constructor() {
|
|
2235
|
-
let i = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
2236
|
-
let out = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
2237
|
-
let nullable = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
2213
|
+
constructor(i = 0, out = 0, nullable = false) {
|
|
2238
2214
|
this.i = i; // an instruction address (pc).
|
|
2239
2215
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2240
2216
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2727,8 +2703,7 @@ class CharClass {
|
|
|
2727
2703
|
CharClass.qsortIntPair(array, i, right);
|
|
2728
2704
|
}
|
|
2729
2705
|
}
|
|
2730
|
-
constructor() {
|
|
2731
|
-
let r = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : Utils.emptyInts();
|
|
2706
|
+
constructor(r = Utils.emptyInts()) {
|
|
2732
2707
|
this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
|
|
2733
2708
|
this.len = r.length; // prefix of |r| that is defined. Even.
|
|
2734
2709
|
}
|
|
@@ -3402,8 +3377,7 @@ class Parser {
|
|
|
3402
3377
|
static concatRunes(x, y) {
|
|
3403
3378
|
return [...x, ...y];
|
|
3404
3379
|
}
|
|
3405
|
-
constructor(wholeRegexp) {
|
|
3406
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
3380
|
+
constructor(wholeRegexp, flags = 0) {
|
|
3407
3381
|
this.wholeRegexp = wholeRegexp;
|
|
3408
3382
|
// Flags control the behavior of the parser and record information about
|
|
3409
3383
|
// regexp context.
|
|
@@ -4116,7 +4090,6 @@ class Parser {
|
|
|
4116
4090
|
if (this.swapVerticalBar()) {
|
|
4117
4091
|
this.pop(); // pop vertical bar
|
|
4118
4092
|
}
|
|
4119
|
-
|
|
4120
4093
|
this.alternate();
|
|
4121
4094
|
const n = this.stack.length;
|
|
4122
4095
|
if (n !== 1) {
|
|
@@ -4145,21 +4118,22 @@ class Parser {
|
|
|
4145
4118
|
// support all three as well. EcmaScript 4 uses only the Python form.
|
|
4146
4119
|
//
|
|
4147
4120
|
// In both the open source world (via Code Search) and the
|
|
4148
|
-
// Google source tree, (?P<expr>
|
|
4149
|
-
//
|
|
4121
|
+
// Google source tree, (?P<name>expr) and (?<name>expr) are the
|
|
4122
|
+
// dominant forms of named captures and both are supported.
|
|
4150
4123
|
const s = t.rest();
|
|
4151
|
-
if (s.startsWith('(?P<')) {
|
|
4124
|
+
if (s.startsWith('(?P<') || s.startsWith('(?<')) {
|
|
4152
4125
|
// Pull out name.
|
|
4126
|
+
const begin = s.charAt(2) === 'P' ? 4 : 3;
|
|
4153
4127
|
const end = s.indexOf('>');
|
|
4154
4128
|
if (end < 0) {
|
|
4155
4129
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s);
|
|
4156
4130
|
}
|
|
4157
|
-
const name = s.substring(
|
|
4131
|
+
const name = s.substring(begin, end); // "name"
|
|
4158
4132
|
t.skipString(name);
|
|
4159
|
-
t.skip(
|
|
4133
|
+
t.skip(begin + 1); // "(?P<>" or "(?<>"
|
|
4160
4134
|
if (!Parser.isValidCaptureName(name)) {
|
|
4161
4135
|
// "(?P<name>"
|
|
4162
|
-
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end));
|
|
4136
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P<name>" or "(?<name>"
|
|
4163
4137
|
}
|
|
4164
4138
|
// Like ordinary capture, but named.
|
|
4165
4139
|
const re = this.op(Regexp.Op.LEFT_PAREN);
|
|
@@ -4289,7 +4263,6 @@ class Parser {
|
|
|
4289
4263
|
if (this.swapVerticalBar()) {
|
|
4290
4264
|
this.pop(); // pop vertical bar
|
|
4291
4265
|
}
|
|
4292
|
-
|
|
4293
4266
|
this.alternate();
|
|
4294
4267
|
const n = this.stack.length;
|
|
4295
4268
|
if (n < 2) {
|
|
@@ -4631,8 +4604,7 @@ class Machine {
|
|
|
4631
4604
|
}
|
|
4632
4605
|
|
|
4633
4606
|
// Frees all threads on the thread queue, returning them to the free pool.
|
|
4634
|
-
freeQueue(queue) {
|
|
4635
|
-
let from = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
4607
|
+
freeQueue(queue, from = 0) {
|
|
4636
4608
|
const numberOfThread = queue.size - from;
|
|
4637
4609
|
const requiredPoolLength = this.poolSize + numberOfThread;
|
|
4638
4610
|
if (this.pool.length < requiredPoolLength) {
|
|
@@ -4876,10 +4848,8 @@ class AtomicReference {
|
|
|
4876
4848
|
* An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
|
|
4877
4849
|
* the public Java-like Pattern/Matcher API.
|
|
4878
4850
|
*
|
|
4879
|
-
* <p>
|
|
4880
4851
|
* This class also contains various implementation helpers for RE2 regular expressions.
|
|
4881
4852
|
*
|
|
4882
|
-
* <p>
|
|
4883
4853
|
* Use the {@link #quoteMeta(String)} utility function to quote all regular expression
|
|
4884
4854
|
* metacharacters in an arbitrary string.
|
|
4885
4855
|
*
|
|
@@ -4901,7 +4871,6 @@ class RE2 {
|
|
|
4901
4871
|
* Parses a regular expression and returns, if successful, an {@code RE2} instance that can be
|
|
4902
4872
|
* used to match against text.
|
|
4903
4873
|
*
|
|
4904
|
-
* <p>
|
|
4905
4874
|
* When matching against text, the regexp returns a match that begins as early as possible in the
|
|
4906
4875
|
* input (leftmost), and among those it chooses the one that a backtracking search would have
|
|
4907
4876
|
* found first. This so-called leftmost-first matching is the same semantics that Perl, Python,
|
|
@@ -4916,13 +4885,11 @@ class RE2 {
|
|
|
4916
4885
|
* {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE
|
|
4917
4886
|
* (egrep) syntax and changes the match semantics to leftmost-longest.
|
|
4918
4887
|
*
|
|
4919
|
-
* <p>
|
|
4920
4888
|
* That is, when matching against text, the regexp returns a match that begins as early as
|
|
4921
4889
|
* possible in the input (leftmost), and among those it chooses a match that is as long as
|
|
4922
4890
|
* possible. This so-called leftmost-longest matching is the same semantics that early regular
|
|
4923
4891
|
* expression implementations used and that POSIX specifies.
|
|
4924
4892
|
*
|
|
4925
|
-
* <p>
|
|
4926
4893
|
* However, there can be multiple leftmost-longest matches, with different submatch choices, and
|
|
4927
4894
|
* here this package diverges from POSIX. Among the possible leftmost-longest matches, this
|
|
4928
4895
|
* package chooses the one that a backtracking search would have found first, while POSIX
|
|
@@ -4955,16 +4922,13 @@ class RE2 {
|
|
|
4955
4922
|
/**
|
|
4956
4923
|
* Returns true iff textual regular expression {@code pattern} matches string {@code s}.
|
|
4957
4924
|
*
|
|
4958
|
-
* <p>
|
|
4959
4925
|
* More complicated queries need to use {@link #compile} and the full {@code RE2} interface.
|
|
4960
4926
|
*/
|
|
4961
4927
|
// This is visible for testing.
|
|
4962
4928
|
static match(pattern, s) {
|
|
4963
4929
|
return RE2.compile(pattern).match(s);
|
|
4964
4930
|
}
|
|
4965
|
-
constructor(expr, prog) {
|
|
4966
|
-
let numSubexp = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
4967
|
-
let longest = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0;
|
|
4931
|
+
constructor(expr, prog, numSubexp = 0, longest = 0) {
|
|
4968
4932
|
this.expr = expr; // as passed to Compile
|
|
4969
4933
|
this.prog = prog; // compiled program
|
|
4970
4934
|
this.numSubexp = numSubexp;
|
|
@@ -5198,8 +5162,7 @@ class RE2 {
|
|
|
5198
5162
|
}
|
|
5199
5163
|
|
|
5200
5164
|
// Find matches in input.
|
|
5201
|
-
allMatches(input, n) {
|
|
5202
|
-
let deliverFun = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : v => v;
|
|
5165
|
+
allMatches(input, n, deliverFun = v => v) {
|
|
5203
5166
|
let result = [];
|
|
5204
5167
|
const end = input.endPos();
|
|
5205
5168
|
if (n < 0) {
|
|
@@ -5276,7 +5239,6 @@ class RE2 {
|
|
|
5276
5239
|
* Returns an array holding the text of the leftmost match in {@code b} of this regular
|
|
5277
5240
|
* expression.
|
|
5278
5241
|
*
|
|
5279
|
-
* <p>
|
|
5280
5242
|
* A return value of null indicates no match.
|
|
5281
5243
|
*/
|
|
5282
5244
|
// This is visible for testing.
|
|
@@ -5292,7 +5254,6 @@ class RE2 {
|
|
|
5292
5254
|
* Returns a two-element array of integers defining the location of the leftmost match in
|
|
5293
5255
|
* {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
|
|
5294
5256
|
*
|
|
5295
|
-
* <p>
|
|
5296
5257
|
* A return value of null indicates no match.
|
|
5297
5258
|
*/
|
|
5298
5259
|
// This is visible for testing.
|
|
@@ -5308,7 +5269,6 @@ class RE2 {
|
|
|
5308
5269
|
* Returns a string holding the text of the leftmost match in {@code s} of this regular
|
|
5309
5270
|
* expression.
|
|
5310
5271
|
*
|
|
5311
|
-
* <p>
|
|
5312
5272
|
* If there is no match, the return value is an empty string, but it will also be empty if the
|
|
5313
5273
|
* regular expression successfully matches an empty string. Use {@link #findIndex} or
|
|
5314
5274
|
* {@link #findSubmatch} if it is necessary to distinguish these cases.
|
|
@@ -5327,7 +5287,6 @@ class RE2 {
|
|
|
5327
5287
|
* {@code s} of this regular expression. The match itself is at
|
|
5328
5288
|
* {@code s.substring(loc[0], loc[1])}.
|
|
5329
5289
|
*
|
|
5330
|
-
* <p>
|
|
5331
5290
|
* A return value of null indicates no match.
|
|
5332
5291
|
*/
|
|
5333
5292
|
// This is visible for testing.
|
|
@@ -5340,7 +5299,6 @@ class RE2 {
|
|
|
5340
5299
|
* {@code b} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5341
5300
|
* href='#submatch'>Submatch</a> description above.
|
|
5342
5301
|
*
|
|
5343
|
-
* <p>
|
|
5344
5302
|
* A return value of null indicates no match.
|
|
5345
5303
|
*/
|
|
5346
5304
|
// This is visible for testing.
|
|
@@ -5363,7 +5321,6 @@ class RE2 {
|
|
|
5363
5321
|
* expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
|
|
5364
5322
|
* <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
|
|
5365
5323
|
*
|
|
5366
|
-
* <p>
|
|
5367
5324
|
* A return value of null indicates no match.
|
|
5368
5325
|
*/
|
|
5369
5326
|
// This is visible for testing.
|
|
@@ -5376,7 +5333,6 @@ class RE2 {
|
|
|
5376
5333
|
* {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5377
5334
|
* href='#submatch'>Submatch</a> description above.
|
|
5378
5335
|
*
|
|
5379
|
-
* <p>
|
|
5380
5336
|
* A return value of null indicates no match.
|
|
5381
5337
|
*/
|
|
5382
5338
|
// This is visible for testing.
|
|
@@ -5399,7 +5355,6 @@ class RE2 {
|
|
|
5399
5355
|
* expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5400
5356
|
* href='#submatch'>Submatch</a> description above.
|
|
5401
5357
|
*
|
|
5402
|
-
* <p>
|
|
5403
5358
|
* A return value of null indicates no match.
|
|
5404
5359
|
*/
|
|
5405
5360
|
// This is visible for testing.
|
|
@@ -5412,7 +5367,6 @@ class RE2 {
|
|
|
5412
5367
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5413
5368
|
* href='#all'>All</a> description above.
|
|
5414
5369
|
*
|
|
5415
|
-
* <p>
|
|
5416
5370
|
* A return value of null indicates no match.
|
|
5417
5371
|
*
|
|
5418
5372
|
* TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
|
|
@@ -5432,7 +5386,6 @@ class RE2 {
|
|
|
5432
5386
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5433
5387
|
* href='#all'>All</a> description above.
|
|
5434
5388
|
*
|
|
5435
|
-
* <p>
|
|
5436
5389
|
* A return value of null indicates no match.
|
|
5437
5390
|
*/
|
|
5438
5391
|
// This is visible for testing.
|
|
@@ -5449,7 +5402,6 @@ class RE2 {
|
|
|
5449
5402
|
* to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
|
|
5450
5403
|
* description above.
|
|
5451
5404
|
*
|
|
5452
|
-
* <p>
|
|
5453
5405
|
* A return value of null indicates no match.
|
|
5454
5406
|
*/
|
|
5455
5407
|
// This is visible for testing.
|
|
@@ -5466,7 +5418,6 @@ class RE2 {
|
|
|
5466
5418
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5467
5419
|
* href='#all'>All</a> description above.
|
|
5468
5420
|
*
|
|
5469
|
-
* <p>
|
|
5470
5421
|
* A return value of null indicates no match.
|
|
5471
5422
|
*/
|
|
5472
5423
|
// This is visible for testing.
|
|
@@ -5483,7 +5434,6 @@ class RE2 {
|
|
|
5483
5434
|
* it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5484
5435
|
* href='#all'>All</a> description above.
|
|
5485
5436
|
*
|
|
5486
|
-
* <p>
|
|
5487
5437
|
* A return value of null indicates no match.
|
|
5488
5438
|
*/
|
|
5489
5439
|
// This is visible for testing.
|
|
@@ -5508,7 +5458,6 @@ class RE2 {
|
|
|
5508
5458
|
* {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5509
5459
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5510
5460
|
*
|
|
5511
|
-
* <p>
|
|
5512
5461
|
* A return value of null indicates no match.
|
|
5513
5462
|
*/
|
|
5514
5463
|
// This is visible for testing.
|
|
@@ -5525,7 +5474,6 @@ class RE2 {
|
|
|
5525
5474
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5526
5475
|
* href='#all'>All</a> description above.
|
|
5527
5476
|
*
|
|
5528
|
-
* <p>
|
|
5529
5477
|
* A return value of null indicates no match.
|
|
5530
5478
|
*/
|
|
5531
5479
|
// This is visible for testing.
|
|
@@ -5550,7 +5498,6 @@ class RE2 {
|
|
|
5550
5498
|
* {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5551
5499
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5552
5500
|
*
|
|
5553
|
-
* <p>
|
|
5554
5501
|
* A return value of null indicates no match.
|
|
5555
5502
|
*/
|
|
5556
5503
|
// This is visible for testing.
|
|
@@ -5566,7 +5513,6 @@ class RE2 {
|
|
|
5566
5513
|
/**
|
|
5567
5514
|
* A compiled representation of an RE2 regular expression
|
|
5568
5515
|
*
|
|
5569
|
-
* <p>
|
|
5570
5516
|
* The matching functions take {@code String} arguments instead of the more general Java
|
|
5571
5517
|
* {@code CharSequence} since the latter doesn't provide UTF-16 decoding.
|
|
5572
5518
|
*
|
|
@@ -5600,10 +5546,9 @@ class RE2JS {
|
|
|
5600
5546
|
/**
|
|
5601
5547
|
* Returns a literal pattern string for the specified string.
|
|
5602
5548
|
*
|
|
5603
|
-
* <p>
|
|
5604
5549
|
* This method produces a string that can be used to create a <code>RE2JS</code> that would
|
|
5605
5550
|
* match the string <code>s</code> as if it were a literal pattern.
|
|
5606
|
-
*
|
|
5551
|
+
*
|
|
5607
5552
|
* Metacharacters or escape sequences in the input sequence will be given no special meaning.
|
|
5608
5553
|
*
|
|
5609
5554
|
* @param {string} str The string to be literalized
|
|
@@ -5619,8 +5564,7 @@ class RE2JS {
|
|
|
5619
5564
|
* @param {number} [flags=0]
|
|
5620
5565
|
* @returns {RE2JS}
|
|
5621
5566
|
*/
|
|
5622
|
-
static compile(regex) {
|
|
5623
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5567
|
+
static compile(regex, flags = 0) {
|
|
5624
5568
|
let fregex = regex;
|
|
5625
5569
|
if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
|
|
5626
5570
|
fregex = `(?i)${fregex}`;
|
|
@@ -5739,7 +5683,6 @@ class RE2JS {
|
|
|
5739
5683
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
5740
5684
|
* that occur before, between, and after instances of the regular expression.
|
|
5741
5685
|
*
|
|
5742
|
-
* <p>
|
|
5743
5686
|
* If {@code limit <= 0}, there is no limit on the size of the returned array. If
|
|
5744
5687
|
* {@code limit == 0}, empty strings that would occur at the end of the array are omitted. If
|
|
5745
5688
|
* {@code limit > 0}, at most limit strings are returned. The final string contains the remainder
|
|
@@ -5749,8 +5692,7 @@ class RE2JS {
|
|
|
5749
5692
|
* @param {number} [limit=0] the limit
|
|
5750
5693
|
* @returns {string[]} the split strings
|
|
5751
5694
|
*/
|
|
5752
|
-
split(input) {
|
|
5753
|
-
let limit = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5695
|
+
split(input, limit = 0) {
|
|
5754
5696
|
const m = this.matcher(input);
|
|
5755
5697
|
const result = [];
|
|
5756
5698
|
let emptiesSkipped = 0;
|