re2js 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/build/index.cjs.cjs +40 -98
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +12 -18
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +40 -98
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +40 -98
- package/build/index.umd.js.map +1 -1
- package/package.json +21 -21
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v0.
|
|
5
|
+
* @version v0.4.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -417,6 +417,8 @@ class Unicode {
|
|
|
417
417
|
// equalsIgnoreCase performs case-insensitive equality comparison
|
|
418
418
|
// on the given runes |r1| and |r2|, with special consideration
|
|
419
419
|
// for the likely scenario where both runes are ASCII characters.
|
|
420
|
+
// If non-ASCII, Unicode case folding will be performed on |r1|
|
|
421
|
+
// to compare it to |r2|.
|
|
420
422
|
// -1 is interpreted as the end-of-file mark.
|
|
421
423
|
static equalsIgnoreCase(r1, r2) {
|
|
422
424
|
// Runes already match, or one of them is EOF
|
|
@@ -675,9 +677,7 @@ class Utils {
|
|
|
675
677
|
// example
|
|
676
678
|
// Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
|
|
677
679
|
// Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
|
|
678
|
-
const createEnum =
|
|
679
|
-
let values = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
|
|
680
|
-
let initNum = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
680
|
+
const createEnum = (values = [], initNum = 0) => {
|
|
681
681
|
const enumObject = {};
|
|
682
682
|
for (let i = 0; i < values.length; i++) {
|
|
683
683
|
const val = values[i];
|
|
@@ -714,8 +714,7 @@ class MatcherInputBase {
|
|
|
714
714
|
}
|
|
715
715
|
}
|
|
716
716
|
class Utf8MatcherInput extends MatcherInputBase {
|
|
717
|
-
constructor() {
|
|
718
|
-
let bytes = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
717
|
+
constructor(bytes = null) {
|
|
719
718
|
super();
|
|
720
719
|
this.bytes = bytes;
|
|
721
720
|
}
|
|
@@ -747,8 +746,7 @@ class Utf8MatcherInput extends MatcherInputBase {
|
|
|
747
746
|
}
|
|
748
747
|
}
|
|
749
748
|
class Utf16MatcherInput extends MatcherInputBase {
|
|
750
|
-
constructor() {
|
|
751
|
-
let charSequence = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
749
|
+
constructor(charSequence = null) {
|
|
752
750
|
super();
|
|
753
751
|
this.charSequence = charSequence;
|
|
754
752
|
}
|
|
@@ -812,8 +810,7 @@ class RE2JSException extends Error {
|
|
|
812
810
|
* An exception thrown by the parser if the pattern was invalid.
|
|
813
811
|
*/
|
|
814
812
|
class RE2JSSyntaxException extends RE2JSException {
|
|
815
|
-
constructor(error) {
|
|
816
|
-
let input = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
|
|
813
|
+
constructor(error, input = null) {
|
|
817
814
|
let message = `error parsing regexp: ${error}`;
|
|
818
815
|
if (input) {
|
|
819
816
|
message += `: \`${input}\``;
|
|
@@ -873,7 +870,6 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
873
870
|
/**
|
|
874
871
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
875
872
|
*
|
|
876
|
-
* <p>
|
|
877
873
|
* Conceptually, a Matcher consists of four parts:
|
|
878
874
|
* <ol>
|
|
879
875
|
* <li>A compiled regular expression {@code RE2JS}, set at construction and fixed for the lifetime
|
|
@@ -986,8 +982,7 @@ class Matcher {
|
|
|
986
982
|
* @param {string|number} [group=0]
|
|
987
983
|
* @returns {string}
|
|
988
984
|
*/
|
|
989
|
-
start() {
|
|
990
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
985
|
+
start(group = 0) {
|
|
991
986
|
if (typeof group === 'string') {
|
|
992
987
|
const groupInt = this.namedGroups[group];
|
|
993
988
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1005,8 +1000,7 @@ class Matcher {
|
|
|
1005
1000
|
* @param {string|number} [group=0]
|
|
1006
1001
|
* @returns {string}
|
|
1007
1002
|
*/
|
|
1008
|
-
end() {
|
|
1009
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1003
|
+
end(group = 0) {
|
|
1010
1004
|
if (typeof group === 'string') {
|
|
1011
1005
|
const groupInt = this.namedGroups[group];
|
|
1012
1006
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1023,8 +1017,7 @@ class Matcher {
|
|
|
1023
1017
|
* @param {string|number} [group=0]
|
|
1024
1018
|
* @returns {string}
|
|
1025
1019
|
*/
|
|
1026
|
-
group() {
|
|
1027
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1020
|
+
group(group = 0) {
|
|
1028
1021
|
if (typeof group === 'string') {
|
|
1029
1022
|
const groupInt = this.namedGroups[group];
|
|
1030
1023
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1104,8 +1097,7 @@ class Matcher {
|
|
|
1104
1097
|
* @returns {boolean} if it finds a match
|
|
1105
1098
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1106
1099
|
*/
|
|
1107
|
-
find() {
|
|
1108
|
-
let start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
1100
|
+
find(start = null) {
|
|
1109
1101
|
if (start !== null) {
|
|
1110
1102
|
if (start < 0 || start > this.matcherInputLength) {
|
|
1111
1103
|
throw new RE2JSGroupException(`start index out of bounds: ${start}`);
|
|
@@ -1171,12 +1163,10 @@ class Matcher {
|
|
|
1171
1163
|
* the form {@code $n}, where {@code n} is the group number in decimal. It advances the append
|
|
1172
1164
|
* position to where the most recent match ended.
|
|
1173
1165
|
*
|
|
1174
|
-
* <p>
|
|
1175
1166
|
* To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string escapes). The escape
|
|
1176
1167
|
* is only necessary when {@code $} is followed by a digit, but it is always allowed. Only
|
|
1177
1168
|
* {@code $} and {@code \} need escaping, but any character can be escaped.
|
|
1178
1169
|
*
|
|
1179
|
-
* <p>
|
|
1180
1170
|
* The group number {@code n} in {@code $n} is always at least one digit and expands to use more
|
|
1181
1171
|
* digits as long as the resulting number is a valid group number for this pattern. To cut it off
|
|
1182
1172
|
* earlier, escape the first digit that should not be used.
|
|
@@ -1188,8 +1178,7 @@ class Matcher {
|
|
|
1188
1178
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group
|
|
1189
1179
|
* @private
|
|
1190
1180
|
*/
|
|
1191
|
-
appendReplacement(replacement) {
|
|
1192
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1181
|
+
appendReplacement(replacement, perlMode = false) {
|
|
1193
1182
|
let res = '';
|
|
1194
1183
|
const s = this.start();
|
|
1195
1184
|
const e = this.end();
|
|
@@ -1373,8 +1362,7 @@ class Matcher {
|
|
|
1373
1362
|
* @returns {string} the input string with the matches replaced
|
|
1374
1363
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1375
1364
|
*/
|
|
1376
|
-
replaceAll(replacement) {
|
|
1377
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1365
|
+
replaceAll(replacement, perlMode = false) {
|
|
1378
1366
|
return this.replace(replacement, true, perlMode);
|
|
1379
1367
|
}
|
|
1380
1368
|
|
|
@@ -1387,8 +1375,7 @@ class Matcher {
|
|
|
1387
1375
|
* @returns {string} the input string with the first match replaced
|
|
1388
1376
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1389
1377
|
*/
|
|
1390
|
-
replaceFirst(replacement) {
|
|
1391
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1378
|
+
replaceFirst(replacement, perlMode = false) {
|
|
1392
1379
|
return this.replace(replacement, false, perlMode);
|
|
1393
1380
|
}
|
|
1394
1381
|
|
|
@@ -1400,9 +1387,7 @@ class Matcher {
|
|
|
1400
1387
|
* @returns {string}
|
|
1401
1388
|
* @private
|
|
1402
1389
|
*/
|
|
1403
|
-
replace(replacement) {
|
|
1404
|
-
let all = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
|
|
1405
|
-
let perlMode = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
1390
|
+
replace(replacement, all = true, perlMode = false) {
|
|
1406
1391
|
let res = '';
|
|
1407
1392
|
this.reset();
|
|
1408
1393
|
while (this.find()) {
|
|
@@ -1439,9 +1424,7 @@ class MachineInputBase {
|
|
|
1439
1424
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
1440
1425
|
// |pos| and |width| are byte indices.
|
|
1441
1426
|
class MachineUTF8Input extends MachineInputBase {
|
|
1442
|
-
constructor(bytes) {
|
|
1443
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1444
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1427
|
+
constructor(bytes, start = 0, end = bytes.length) {
|
|
1445
1428
|
super();
|
|
1446
1429
|
this.bytes = bytes;
|
|
1447
1430
|
this.start = start;
|
|
@@ -1524,8 +1507,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1524
1507
|
|
|
1525
1508
|
// Returns the index of the first occurrence of array |target| within
|
|
1526
1509
|
// array |source| after |fromIndex|, or -1 if not found.
|
|
1527
|
-
indexOf(source, target) {
|
|
1528
|
-
let fromIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
1510
|
+
indexOf(source, target, fromIndex = 0) {
|
|
1529
1511
|
let targetLength = target.length;
|
|
1530
1512
|
if (targetLength === 0) {
|
|
1531
1513
|
return -1;
|
|
@@ -1546,9 +1528,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1546
1528
|
|
|
1547
1529
|
// |pos| and |width| are in JS "char" units.
|
|
1548
1530
|
class MachineUTF16Input extends MachineInputBase {
|
|
1549
|
-
constructor(charSequence) {
|
|
1550
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1551
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1531
|
+
constructor(charSequence, start = 0, end = charSequence.length) {
|
|
1552
1532
|
super();
|
|
1553
1533
|
this.charSequence = charSequence;
|
|
1554
1534
|
this.start = start;
|
|
@@ -1588,14 +1568,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1588
1568
|
}
|
|
1589
1569
|
}
|
|
1590
1570
|
class MachineInput {
|
|
1591
|
-
static fromUTF8(bytes) {
|
|
1592
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1593
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1571
|
+
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
1594
1572
|
return new MachineUTF8Input(bytes, start, end);
|
|
1595
1573
|
}
|
|
1596
|
-
static fromUTF16(charSequence) {
|
|
1597
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1598
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1574
|
+
static fromUTF16(charSequence, start = 0, end = charSequence.length) {
|
|
1599
1575
|
return new MachineUTF16Input(charSequence, start, end);
|
|
1600
1576
|
}
|
|
1601
1577
|
}
|
|
@@ -1683,7 +1659,6 @@ class Regexp {
|
|
|
1683
1659
|
this.name = null; // capturing name, for CAPTURE
|
|
1684
1660
|
this.namedGroups = {}; // map of group name -> capturing index
|
|
1685
1661
|
}
|
|
1686
|
-
|
|
1687
1662
|
reinit() {
|
|
1688
1663
|
this.flags = 0;
|
|
1689
1664
|
this.subs = Regexp.emptySubs();
|
|
@@ -1998,6 +1973,10 @@ class Inst {
|
|
|
1998
1973
|
// class.
|
|
1999
1974
|
if (this.runes.length === 1) {
|
|
2000
1975
|
const r0 = this.runes[0];
|
|
1976
|
+
// If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
|
|
1977
|
+
// Note that this may result in a case-folding loop when executed,
|
|
1978
|
+
// so attempt to reduce the chance of that occurring
|
|
1979
|
+
// by performing case folding on |r0| from the pattern rather than |r| from the input.
|
|
2001
1980
|
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
2002
1981
|
return Unicode.equalsIgnoreCase(r0, r);
|
|
2003
1982
|
}
|
|
@@ -2229,10 +2208,7 @@ class Prog {
|
|
|
2229
2208
|
* @class
|
|
2230
2209
|
*/
|
|
2231
2210
|
class Frag {
|
|
2232
|
-
constructor() {
|
|
2233
|
-
let i = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
2234
|
-
let out = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
2235
|
-
let nullable = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
2211
|
+
constructor(i = 0, out = 0, nullable = false) {
|
|
2236
2212
|
this.i = i; // an instruction address (pc).
|
|
2237
2213
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2238
2214
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2725,8 +2701,7 @@ class CharClass {
|
|
|
2725
2701
|
CharClass.qsortIntPair(array, i, right);
|
|
2726
2702
|
}
|
|
2727
2703
|
}
|
|
2728
|
-
constructor() {
|
|
2729
|
-
let r = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : Utils.emptyInts();
|
|
2704
|
+
constructor(r = Utils.emptyInts()) {
|
|
2730
2705
|
this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
|
|
2731
2706
|
this.len = r.length; // prefix of |r| that is defined. Even.
|
|
2732
2707
|
}
|
|
@@ -3400,8 +3375,7 @@ class Parser {
|
|
|
3400
3375
|
static concatRunes(x, y) {
|
|
3401
3376
|
return [...x, ...y];
|
|
3402
3377
|
}
|
|
3403
|
-
constructor(wholeRegexp) {
|
|
3404
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
3378
|
+
constructor(wholeRegexp, flags = 0) {
|
|
3405
3379
|
this.wholeRegexp = wholeRegexp;
|
|
3406
3380
|
// Flags control the behavior of the parser and record information about
|
|
3407
3381
|
// regexp context.
|
|
@@ -4114,7 +4088,6 @@ class Parser {
|
|
|
4114
4088
|
if (this.swapVerticalBar()) {
|
|
4115
4089
|
this.pop(); // pop vertical bar
|
|
4116
4090
|
}
|
|
4117
|
-
|
|
4118
4091
|
this.alternate();
|
|
4119
4092
|
const n = this.stack.length;
|
|
4120
4093
|
if (n !== 1) {
|
|
@@ -4143,21 +4116,22 @@ class Parser {
|
|
|
4143
4116
|
// support all three as well. EcmaScript 4 uses only the Python form.
|
|
4144
4117
|
//
|
|
4145
4118
|
// In both the open source world (via Code Search) and the
|
|
4146
|
-
// Google source tree, (?P<expr>
|
|
4147
|
-
//
|
|
4119
|
+
// Google source tree, (?P<name>expr) and (?<name>expr) are the
|
|
4120
|
+
// dominant forms of named captures and both are supported.
|
|
4148
4121
|
const s = t.rest();
|
|
4149
|
-
if (s.startsWith('(?P<')) {
|
|
4122
|
+
if (s.startsWith('(?P<') || s.startsWith('(?<')) {
|
|
4150
4123
|
// Pull out name.
|
|
4124
|
+
const begin = s.charAt(2) === 'P' ? 4 : 3;
|
|
4151
4125
|
const end = s.indexOf('>');
|
|
4152
4126
|
if (end < 0) {
|
|
4153
4127
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s);
|
|
4154
4128
|
}
|
|
4155
|
-
const name = s.substring(
|
|
4129
|
+
const name = s.substring(begin, end); // "name"
|
|
4156
4130
|
t.skipString(name);
|
|
4157
|
-
t.skip(
|
|
4131
|
+
t.skip(begin + 1); // "(?P<>" or "(?<>"
|
|
4158
4132
|
if (!Parser.isValidCaptureName(name)) {
|
|
4159
4133
|
// "(?P<name>"
|
|
4160
|
-
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end));
|
|
4134
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P<name>" or "(?<name>"
|
|
4161
4135
|
}
|
|
4162
4136
|
// Like ordinary capture, but named.
|
|
4163
4137
|
const re = this.op(Regexp.Op.LEFT_PAREN);
|
|
@@ -4287,7 +4261,6 @@ class Parser {
|
|
|
4287
4261
|
if (this.swapVerticalBar()) {
|
|
4288
4262
|
this.pop(); // pop vertical bar
|
|
4289
4263
|
}
|
|
4290
|
-
|
|
4291
4264
|
this.alternate();
|
|
4292
4265
|
const n = this.stack.length;
|
|
4293
4266
|
if (n < 2) {
|
|
@@ -4629,8 +4602,7 @@ class Machine {
|
|
|
4629
4602
|
}
|
|
4630
4603
|
|
|
4631
4604
|
// Frees all threads on the thread queue, returning them to the free pool.
|
|
4632
|
-
freeQueue(queue) {
|
|
4633
|
-
let from = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
4605
|
+
freeQueue(queue, from = 0) {
|
|
4634
4606
|
const numberOfThread = queue.size - from;
|
|
4635
4607
|
const requiredPoolLength = this.poolSize + numberOfThread;
|
|
4636
4608
|
if (this.pool.length < requiredPoolLength) {
|
|
@@ -4874,10 +4846,8 @@ class AtomicReference {
|
|
|
4874
4846
|
* An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
|
|
4875
4847
|
* the public Java-like Pattern/Matcher API.
|
|
4876
4848
|
*
|
|
4877
|
-
* <p>
|
|
4878
4849
|
* This class also contains various implementation helpers for RE2 regular expressions.
|
|
4879
4850
|
*
|
|
4880
|
-
* <p>
|
|
4881
4851
|
* Use the {@link #quoteMeta(String)} utility function to quote all regular expression
|
|
4882
4852
|
* metacharacters in an arbitrary string.
|
|
4883
4853
|
*
|
|
@@ -4899,7 +4869,6 @@ class RE2 {
|
|
|
4899
4869
|
* Parses a regular expression and returns, if successful, an {@code RE2} instance that can be
|
|
4900
4870
|
* used to match against text.
|
|
4901
4871
|
*
|
|
4902
|
-
* <p>
|
|
4903
4872
|
* When matching against text, the regexp returns a match that begins as early as possible in the
|
|
4904
4873
|
* input (leftmost), and among those it chooses the one that a backtracking search would have
|
|
4905
4874
|
* found first. This so-called leftmost-first matching is the same semantics that Perl, Python,
|
|
@@ -4914,13 +4883,11 @@ class RE2 {
|
|
|
4914
4883
|
* {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE
|
|
4915
4884
|
* (egrep) syntax and changes the match semantics to leftmost-longest.
|
|
4916
4885
|
*
|
|
4917
|
-
* <p>
|
|
4918
4886
|
* That is, when matching against text, the regexp returns a match that begins as early as
|
|
4919
4887
|
* possible in the input (leftmost), and among those it chooses a match that is as long as
|
|
4920
4888
|
* possible. This so-called leftmost-longest matching is the same semantics that early regular
|
|
4921
4889
|
* expression implementations used and that POSIX specifies.
|
|
4922
4890
|
*
|
|
4923
|
-
* <p>
|
|
4924
4891
|
* However, there can be multiple leftmost-longest matches, with different submatch choices, and
|
|
4925
4892
|
* here this package diverges from POSIX. Among the possible leftmost-longest matches, this
|
|
4926
4893
|
* package chooses the one that a backtracking search would have found first, while POSIX
|
|
@@ -4953,16 +4920,13 @@ class RE2 {
|
|
|
4953
4920
|
/**
|
|
4954
4921
|
* Returns true iff textual regular expression {@code pattern} matches string {@code s}.
|
|
4955
4922
|
*
|
|
4956
|
-
* <p>
|
|
4957
4923
|
* More complicated queries need to use {@link #compile} and the full {@code RE2} interface.
|
|
4958
4924
|
*/
|
|
4959
4925
|
// This is visible for testing.
|
|
4960
4926
|
static match(pattern, s) {
|
|
4961
4927
|
return RE2.compile(pattern).match(s);
|
|
4962
4928
|
}
|
|
4963
|
-
constructor(expr, prog) {
|
|
4964
|
-
let numSubexp = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
4965
|
-
let longest = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0;
|
|
4929
|
+
constructor(expr, prog, numSubexp = 0, longest = 0) {
|
|
4966
4930
|
this.expr = expr; // as passed to Compile
|
|
4967
4931
|
this.prog = prog; // compiled program
|
|
4968
4932
|
this.numSubexp = numSubexp;
|
|
@@ -5196,8 +5160,7 @@ class RE2 {
|
|
|
5196
5160
|
}
|
|
5197
5161
|
|
|
5198
5162
|
// Find matches in input.
|
|
5199
|
-
allMatches(input, n) {
|
|
5200
|
-
let deliverFun = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : v => v;
|
|
5163
|
+
allMatches(input, n, deliverFun = v => v) {
|
|
5201
5164
|
let result = [];
|
|
5202
5165
|
const end = input.endPos();
|
|
5203
5166
|
if (n < 0) {
|
|
@@ -5274,7 +5237,6 @@ class RE2 {
|
|
|
5274
5237
|
* Returns an array holding the text of the leftmost match in {@code b} of this regular
|
|
5275
5238
|
* expression.
|
|
5276
5239
|
*
|
|
5277
|
-
* <p>
|
|
5278
5240
|
* A return value of null indicates no match.
|
|
5279
5241
|
*/
|
|
5280
5242
|
// This is visible for testing.
|
|
@@ -5290,7 +5252,6 @@ class RE2 {
|
|
|
5290
5252
|
* Returns a two-element array of integers defining the location of the leftmost match in
|
|
5291
5253
|
* {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
|
|
5292
5254
|
*
|
|
5293
|
-
* <p>
|
|
5294
5255
|
* A return value of null indicates no match.
|
|
5295
5256
|
*/
|
|
5296
5257
|
// This is visible for testing.
|
|
@@ -5306,7 +5267,6 @@ class RE2 {
|
|
|
5306
5267
|
* Returns a string holding the text of the leftmost match in {@code s} of this regular
|
|
5307
5268
|
* expression.
|
|
5308
5269
|
*
|
|
5309
|
-
* <p>
|
|
5310
5270
|
* If there is no match, the return value is an empty string, but it will also be empty if the
|
|
5311
5271
|
* regular expression successfully matches an empty string. Use {@link #findIndex} or
|
|
5312
5272
|
* {@link #findSubmatch} if it is necessary to distinguish these cases.
|
|
@@ -5325,7 +5285,6 @@ class RE2 {
|
|
|
5325
5285
|
* {@code s} of this regular expression. The match itself is at
|
|
5326
5286
|
* {@code s.substring(loc[0], loc[1])}.
|
|
5327
5287
|
*
|
|
5328
|
-
* <p>
|
|
5329
5288
|
* A return value of null indicates no match.
|
|
5330
5289
|
*/
|
|
5331
5290
|
// This is visible for testing.
|
|
@@ -5338,7 +5297,6 @@ class RE2 {
|
|
|
5338
5297
|
* {@code b} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5339
5298
|
* href='#submatch'>Submatch</a> description above.
|
|
5340
5299
|
*
|
|
5341
|
-
* <p>
|
|
5342
5300
|
* A return value of null indicates no match.
|
|
5343
5301
|
*/
|
|
5344
5302
|
// This is visible for testing.
|
|
@@ -5361,7 +5319,6 @@ class RE2 {
|
|
|
5361
5319
|
* expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
|
|
5362
5320
|
* <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
|
|
5363
5321
|
*
|
|
5364
|
-
* <p>
|
|
5365
5322
|
* A return value of null indicates no match.
|
|
5366
5323
|
*/
|
|
5367
5324
|
// This is visible for testing.
|
|
@@ -5374,7 +5331,6 @@ class RE2 {
|
|
|
5374
5331
|
* {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5375
5332
|
* href='#submatch'>Submatch</a> description above.
|
|
5376
5333
|
*
|
|
5377
|
-
* <p>
|
|
5378
5334
|
* A return value of null indicates no match.
|
|
5379
5335
|
*/
|
|
5380
5336
|
// This is visible for testing.
|
|
@@ -5397,7 +5353,6 @@ class RE2 {
|
|
|
5397
5353
|
* expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5398
5354
|
* href='#submatch'>Submatch</a> description above.
|
|
5399
5355
|
*
|
|
5400
|
-
* <p>
|
|
5401
5356
|
* A return value of null indicates no match.
|
|
5402
5357
|
*/
|
|
5403
5358
|
// This is visible for testing.
|
|
@@ -5410,7 +5365,6 @@ class RE2 {
|
|
|
5410
5365
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5411
5366
|
* href='#all'>All</a> description above.
|
|
5412
5367
|
*
|
|
5413
|
-
* <p>
|
|
5414
5368
|
* A return value of null indicates no match.
|
|
5415
5369
|
*
|
|
5416
5370
|
* TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
|
|
@@ -5430,7 +5384,6 @@ class RE2 {
|
|
|
5430
5384
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5431
5385
|
* href='#all'>All</a> description above.
|
|
5432
5386
|
*
|
|
5433
|
-
* <p>
|
|
5434
5387
|
* A return value of null indicates no match.
|
|
5435
5388
|
*/
|
|
5436
5389
|
// This is visible for testing.
|
|
@@ -5447,7 +5400,6 @@ class RE2 {
|
|
|
5447
5400
|
* to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
|
|
5448
5401
|
* description above.
|
|
5449
5402
|
*
|
|
5450
|
-
* <p>
|
|
5451
5403
|
* A return value of null indicates no match.
|
|
5452
5404
|
*/
|
|
5453
5405
|
// This is visible for testing.
|
|
@@ -5464,7 +5416,6 @@ class RE2 {
|
|
|
5464
5416
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5465
5417
|
* href='#all'>All</a> description above.
|
|
5466
5418
|
*
|
|
5467
|
-
* <p>
|
|
5468
5419
|
* A return value of null indicates no match.
|
|
5469
5420
|
*/
|
|
5470
5421
|
// This is visible for testing.
|
|
@@ -5481,7 +5432,6 @@ class RE2 {
|
|
|
5481
5432
|
* it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5482
5433
|
* href='#all'>All</a> description above.
|
|
5483
5434
|
*
|
|
5484
|
-
* <p>
|
|
5485
5435
|
* A return value of null indicates no match.
|
|
5486
5436
|
*/
|
|
5487
5437
|
// This is visible for testing.
|
|
@@ -5506,7 +5456,6 @@ class RE2 {
|
|
|
5506
5456
|
* {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5507
5457
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5508
5458
|
*
|
|
5509
|
-
* <p>
|
|
5510
5459
|
* A return value of null indicates no match.
|
|
5511
5460
|
*/
|
|
5512
5461
|
// This is visible for testing.
|
|
@@ -5523,7 +5472,6 @@ class RE2 {
|
|
|
5523
5472
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5524
5473
|
* href='#all'>All</a> description above.
|
|
5525
5474
|
*
|
|
5526
|
-
* <p>
|
|
5527
5475
|
* A return value of null indicates no match.
|
|
5528
5476
|
*/
|
|
5529
5477
|
// This is visible for testing.
|
|
@@ -5548,7 +5496,6 @@ class RE2 {
|
|
|
5548
5496
|
* {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5549
5497
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5550
5498
|
*
|
|
5551
|
-
* <p>
|
|
5552
5499
|
* A return value of null indicates no match.
|
|
5553
5500
|
*/
|
|
5554
5501
|
// This is visible for testing.
|
|
@@ -5564,7 +5511,6 @@ class RE2 {
|
|
|
5564
5511
|
/**
|
|
5565
5512
|
* A compiled representation of an RE2 regular expression
|
|
5566
5513
|
*
|
|
5567
|
-
* <p>
|
|
5568
5514
|
* The matching functions take {@code String} arguments instead of the more general Java
|
|
5569
5515
|
* {@code CharSequence} since the latter doesn't provide UTF-16 decoding.
|
|
5570
5516
|
*
|
|
@@ -5598,10 +5544,9 @@ class RE2JS {
|
|
|
5598
5544
|
/**
|
|
5599
5545
|
* Returns a literal pattern string for the specified string.
|
|
5600
5546
|
*
|
|
5601
|
-
* <p>
|
|
5602
5547
|
* This method produces a string that can be used to create a <code>RE2JS</code> that would
|
|
5603
5548
|
* match the string <code>s</code> as if it were a literal pattern.
|
|
5604
|
-
*
|
|
5549
|
+
*
|
|
5605
5550
|
* Metacharacters or escape sequences in the input sequence will be given no special meaning.
|
|
5606
5551
|
*
|
|
5607
5552
|
* @param {string} str The string to be literalized
|
|
@@ -5617,8 +5562,7 @@ class RE2JS {
|
|
|
5617
5562
|
* @param {number} [flags=0]
|
|
5618
5563
|
* @returns {RE2JS}
|
|
5619
5564
|
*/
|
|
5620
|
-
static compile(regex) {
|
|
5621
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5565
|
+
static compile(regex, flags = 0) {
|
|
5622
5566
|
let fregex = regex;
|
|
5623
5567
|
if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
|
|
5624
5568
|
fregex = `(?i)${fregex}`;
|
|
@@ -5737,7 +5681,6 @@ class RE2JS {
|
|
|
5737
5681
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
5738
5682
|
* that occur before, between, and after instances of the regular expression.
|
|
5739
5683
|
*
|
|
5740
|
-
* <p>
|
|
5741
5684
|
* If {@code limit <= 0}, there is no limit on the size of the returned array. If
|
|
5742
5685
|
* {@code limit == 0}, empty strings that would occur at the end of the array are omitted. If
|
|
5743
5686
|
* {@code limit > 0}, at most limit strings are returned. The final string contains the remainder
|
|
@@ -5747,8 +5690,7 @@ class RE2JS {
|
|
|
5747
5690
|
* @param {number} [limit=0] the limit
|
|
5748
5691
|
* @returns {string[]} the split strings
|
|
5749
5692
|
*/
|
|
5750
|
-
split(input) {
|
|
5751
|
-
let limit = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5693
|
+
split(input, limit = 0) {
|
|
5752
5694
|
const m = this.matcher(input);
|
|
5753
5695
|
const result = [];
|
|
5754
5696
|
let emptiesSkipped = 0;
|