re2js 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/build/index.cjs.cjs +40 -98
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +12 -18
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +40 -98
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +40 -98
- package/build/index.umd.js.map +1 -1
- package/package.json +21 -21
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v0.
|
|
5
|
+
* @version v0.4.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -423,6 +423,8 @@
|
|
|
423
423
|
// equalsIgnoreCase performs case-insensitive equality comparison
|
|
424
424
|
// on the given runes |r1| and |r2|, with special consideration
|
|
425
425
|
// for the likely scenario where both runes are ASCII characters.
|
|
426
|
+
// If non-ASCII, Unicode case folding will be performed on |r1|
|
|
427
|
+
// to compare it to |r2|.
|
|
426
428
|
// -1 is interpreted as the end-of-file mark.
|
|
427
429
|
static equalsIgnoreCase(r1, r2) {
|
|
428
430
|
// Runes already match, or one of them is EOF
|
|
@@ -681,9 +683,7 @@
|
|
|
681
683
|
// example
|
|
682
684
|
// Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
|
|
683
685
|
// Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
|
|
684
|
-
const createEnum =
|
|
685
|
-
let values = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
|
|
686
|
-
let initNum = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
686
|
+
const createEnum = (values = [], initNum = 0) => {
|
|
687
687
|
const enumObject = {};
|
|
688
688
|
for (let i = 0; i < values.length; i++) {
|
|
689
689
|
const val = values[i];
|
|
@@ -720,8 +720,7 @@
|
|
|
720
720
|
}
|
|
721
721
|
}
|
|
722
722
|
class Utf8MatcherInput extends MatcherInputBase {
|
|
723
|
-
constructor() {
|
|
724
|
-
let bytes = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
723
|
+
constructor(bytes = null) {
|
|
725
724
|
super();
|
|
726
725
|
this.bytes = bytes;
|
|
727
726
|
}
|
|
@@ -753,8 +752,7 @@
|
|
|
753
752
|
}
|
|
754
753
|
}
|
|
755
754
|
class Utf16MatcherInput extends MatcherInputBase {
|
|
756
|
-
constructor() {
|
|
757
|
-
let charSequence = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
755
|
+
constructor(charSequence = null) {
|
|
758
756
|
super();
|
|
759
757
|
this.charSequence = charSequence;
|
|
760
758
|
}
|
|
@@ -818,8 +816,7 @@
|
|
|
818
816
|
* An exception thrown by the parser if the pattern was invalid.
|
|
819
817
|
*/
|
|
820
818
|
class RE2JSSyntaxException extends RE2JSException {
|
|
821
|
-
constructor(error) {
|
|
822
|
-
let input = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
|
|
819
|
+
constructor(error, input = null) {
|
|
823
820
|
let message = `error parsing regexp: ${error}`;
|
|
824
821
|
if (input) {
|
|
825
822
|
message += `: \`${input}\``;
|
|
@@ -879,7 +876,6 @@
|
|
|
879
876
|
/**
|
|
880
877
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
881
878
|
*
|
|
882
|
-
* <p>
|
|
883
879
|
* Conceptually, a Matcher consists of four parts:
|
|
884
880
|
* <ol>
|
|
885
881
|
* <li>A compiled regular expression {@code RE2JS}, set at construction and fixed for the lifetime
|
|
@@ -992,8 +988,7 @@
|
|
|
992
988
|
* @param {string|number} [group=0]
|
|
993
989
|
* @returns {string}
|
|
994
990
|
*/
|
|
995
|
-
start() {
|
|
996
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
991
|
+
start(group = 0) {
|
|
997
992
|
if (typeof group === 'string') {
|
|
998
993
|
const groupInt = this.namedGroups[group];
|
|
999
994
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1011,8 +1006,7 @@
|
|
|
1011
1006
|
* @param {string|number} [group=0]
|
|
1012
1007
|
* @returns {string}
|
|
1013
1008
|
*/
|
|
1014
|
-
end() {
|
|
1015
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1009
|
+
end(group = 0) {
|
|
1016
1010
|
if (typeof group === 'string') {
|
|
1017
1011
|
const groupInt = this.namedGroups[group];
|
|
1018
1012
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1029,8 +1023,7 @@
|
|
|
1029
1023
|
* @param {string|number} [group=0]
|
|
1030
1024
|
* @returns {string}
|
|
1031
1025
|
*/
|
|
1032
|
-
group() {
|
|
1033
|
-
let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
1026
|
+
group(group = 0) {
|
|
1034
1027
|
if (typeof group === 'string') {
|
|
1035
1028
|
const groupInt = this.namedGroups[group];
|
|
1036
1029
|
if (!Number.isFinite(groupInt)) {
|
|
@@ -1110,8 +1103,7 @@
|
|
|
1110
1103
|
* @returns {boolean} if it finds a match
|
|
1111
1104
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1112
1105
|
*/
|
|
1113
|
-
find() {
|
|
1114
|
-
let start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
1106
|
+
find(start = null) {
|
|
1115
1107
|
if (start !== null) {
|
|
1116
1108
|
if (start < 0 || start > this.matcherInputLength) {
|
|
1117
1109
|
throw new RE2JSGroupException(`start index out of bounds: ${start}`);
|
|
@@ -1177,12 +1169,10 @@
|
|
|
1177
1169
|
* the form {@code $n}, where {@code n} is the group number in decimal. It advances the append
|
|
1178
1170
|
* position to where the most recent match ended.
|
|
1179
1171
|
*
|
|
1180
|
-
* <p>
|
|
1181
1172
|
* To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string escapes). The escape
|
|
1182
1173
|
* is only necessary when {@code $} is followed by a digit, but it is always allowed. Only
|
|
1183
1174
|
* {@code $} and {@code \} need escaping, but any character can be escaped.
|
|
1184
1175
|
*
|
|
1185
|
-
* <p>
|
|
1186
1176
|
* The group number {@code n} in {@code $n} is always at least one digit and expands to use more
|
|
1187
1177
|
* digits as long as the resulting number is a valid group number for this pattern. To cut it off
|
|
1188
1178
|
* earlier, escape the first digit that should not be used.
|
|
@@ -1194,8 +1184,7 @@
|
|
|
1194
1184
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group
|
|
1195
1185
|
* @private
|
|
1196
1186
|
*/
|
|
1197
|
-
appendReplacement(replacement) {
|
|
1198
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1187
|
+
appendReplacement(replacement, perlMode = false) {
|
|
1199
1188
|
let res = '';
|
|
1200
1189
|
const s = this.start();
|
|
1201
1190
|
const e = this.end();
|
|
@@ -1379,8 +1368,7 @@
|
|
|
1379
1368
|
* @returns {string} the input string with the matches replaced
|
|
1380
1369
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1381
1370
|
*/
|
|
1382
|
-
replaceAll(replacement) {
|
|
1383
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1371
|
+
replaceAll(replacement, perlMode = false) {
|
|
1384
1372
|
return this.replace(replacement, true, perlMode);
|
|
1385
1373
|
}
|
|
1386
1374
|
|
|
@@ -1393,8 +1381,7 @@
|
|
|
1393
1381
|
* @returns {string} the input string with the first match replaced
|
|
1394
1382
|
* @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
|
|
1395
1383
|
*/
|
|
1396
|
-
replaceFirst(replacement) {
|
|
1397
|
-
let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
1384
|
+
replaceFirst(replacement, perlMode = false) {
|
|
1398
1385
|
return this.replace(replacement, false, perlMode);
|
|
1399
1386
|
}
|
|
1400
1387
|
|
|
@@ -1406,9 +1393,7 @@
|
|
|
1406
1393
|
* @returns {string}
|
|
1407
1394
|
* @private
|
|
1408
1395
|
*/
|
|
1409
|
-
replace(replacement) {
|
|
1410
|
-
let all = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
|
|
1411
|
-
let perlMode = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
1396
|
+
replace(replacement, all = true, perlMode = false) {
|
|
1412
1397
|
let res = '';
|
|
1413
1398
|
this.reset();
|
|
1414
1399
|
while (this.find()) {
|
|
@@ -1445,9 +1430,7 @@
|
|
|
1445
1430
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
1446
1431
|
// |pos| and |width| are byte indices.
|
|
1447
1432
|
class MachineUTF8Input extends MachineInputBase {
|
|
1448
|
-
constructor(bytes) {
|
|
1449
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1450
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1433
|
+
constructor(bytes, start = 0, end = bytes.length) {
|
|
1451
1434
|
super();
|
|
1452
1435
|
this.bytes = bytes;
|
|
1453
1436
|
this.start = start;
|
|
@@ -1530,8 +1513,7 @@
|
|
|
1530
1513
|
|
|
1531
1514
|
// Returns the index of the first occurrence of array |target| within
|
|
1532
1515
|
// array |source| after |fromIndex|, or -1 if not found.
|
|
1533
|
-
indexOf(source, target) {
|
|
1534
|
-
let fromIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
1516
|
+
indexOf(source, target, fromIndex = 0) {
|
|
1535
1517
|
let targetLength = target.length;
|
|
1536
1518
|
if (targetLength === 0) {
|
|
1537
1519
|
return -1;
|
|
@@ -1552,9 +1534,7 @@
|
|
|
1552
1534
|
|
|
1553
1535
|
// |pos| and |width| are in JS "char" units.
|
|
1554
1536
|
class MachineUTF16Input extends MachineInputBase {
|
|
1555
|
-
constructor(charSequence) {
|
|
1556
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1557
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1537
|
+
constructor(charSequence, start = 0, end = charSequence.length) {
|
|
1558
1538
|
super();
|
|
1559
1539
|
this.charSequence = charSequence;
|
|
1560
1540
|
this.start = start;
|
|
@@ -1594,14 +1574,10 @@
|
|
|
1594
1574
|
}
|
|
1595
1575
|
}
|
|
1596
1576
|
class MachineInput {
|
|
1597
|
-
static fromUTF8(bytes) {
|
|
1598
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1599
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1577
|
+
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
1600
1578
|
return new MachineUTF8Input(bytes, start, end);
|
|
1601
1579
|
}
|
|
1602
|
-
static fromUTF16(charSequence) {
|
|
1603
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1604
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1580
|
+
static fromUTF16(charSequence, start = 0, end = charSequence.length) {
|
|
1605
1581
|
return new MachineUTF16Input(charSequence, start, end);
|
|
1606
1582
|
}
|
|
1607
1583
|
}
|
|
@@ -1689,7 +1665,6 @@
|
|
|
1689
1665
|
this.name = null; // capturing name, for CAPTURE
|
|
1690
1666
|
this.namedGroups = {}; // map of group name -> capturing index
|
|
1691
1667
|
}
|
|
1692
|
-
|
|
1693
1668
|
reinit() {
|
|
1694
1669
|
this.flags = 0;
|
|
1695
1670
|
this.subs = Regexp.emptySubs();
|
|
@@ -2004,6 +1979,10 @@
|
|
|
2004
1979
|
// class.
|
|
2005
1980
|
if (this.runes.length === 1) {
|
|
2006
1981
|
const r0 = this.runes[0];
|
|
1982
|
+
// If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
|
|
1983
|
+
// Note that this may result in a case-folding loop when executed,
|
|
1984
|
+
// so attempt to reduce the chance of that occurring
|
|
1985
|
+
// by performing case folding on |r0| from the pattern rather than |r| from the input.
|
|
2007
1986
|
if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
|
|
2008
1987
|
return Unicode.equalsIgnoreCase(r0, r);
|
|
2009
1988
|
}
|
|
@@ -2235,10 +2214,7 @@
|
|
|
2235
2214
|
* @class
|
|
2236
2215
|
*/
|
|
2237
2216
|
class Frag {
|
|
2238
|
-
constructor() {
|
|
2239
|
-
let i = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
|
|
2240
|
-
let out = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
2241
|
-
let nullable = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
|
2217
|
+
constructor(i = 0, out = 0, nullable = false) {
|
|
2242
2218
|
this.i = i; // an instruction address (pc).
|
|
2243
2219
|
this.out = out; // a patch list; see explanation in Prog.js
|
|
2244
2220
|
this.nullable = nullable; // whether the fragment can match the empty string
|
|
@@ -2731,8 +2707,7 @@
|
|
|
2731
2707
|
CharClass.qsortIntPair(array, i, right);
|
|
2732
2708
|
}
|
|
2733
2709
|
}
|
|
2734
|
-
constructor() {
|
|
2735
|
-
let r = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : Utils.emptyInts();
|
|
2710
|
+
constructor(r = Utils.emptyInts()) {
|
|
2736
2711
|
this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
|
|
2737
2712
|
this.len = r.length; // prefix of |r| that is defined. Even.
|
|
2738
2713
|
}
|
|
@@ -3406,8 +3381,7 @@
|
|
|
3406
3381
|
static concatRunes(x, y) {
|
|
3407
3382
|
return [...x, ...y];
|
|
3408
3383
|
}
|
|
3409
|
-
constructor(wholeRegexp) {
|
|
3410
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
3384
|
+
constructor(wholeRegexp, flags = 0) {
|
|
3411
3385
|
this.wholeRegexp = wholeRegexp;
|
|
3412
3386
|
// Flags control the behavior of the parser and record information about
|
|
3413
3387
|
// regexp context.
|
|
@@ -4120,7 +4094,6 @@
|
|
|
4120
4094
|
if (this.swapVerticalBar()) {
|
|
4121
4095
|
this.pop(); // pop vertical bar
|
|
4122
4096
|
}
|
|
4123
|
-
|
|
4124
4097
|
this.alternate();
|
|
4125
4098
|
const n = this.stack.length;
|
|
4126
4099
|
if (n !== 1) {
|
|
@@ -4149,21 +4122,22 @@
|
|
|
4149
4122
|
// support all three as well. EcmaScript 4 uses only the Python form.
|
|
4150
4123
|
//
|
|
4151
4124
|
// In both the open source world (via Code Search) and the
|
|
4152
|
-
// Google source tree, (?P<expr>
|
|
4153
|
-
//
|
|
4125
|
+
// Google source tree, (?P<name>expr) and (?<name>expr) are the
|
|
4126
|
+
// dominant forms of named captures and both are supported.
|
|
4154
4127
|
const s = t.rest();
|
|
4155
|
-
if (s.startsWith('(?P<')) {
|
|
4128
|
+
if (s.startsWith('(?P<') || s.startsWith('(?<')) {
|
|
4156
4129
|
// Pull out name.
|
|
4130
|
+
const begin = s.charAt(2) === 'P' ? 4 : 3;
|
|
4157
4131
|
const end = s.indexOf('>');
|
|
4158
4132
|
if (end < 0) {
|
|
4159
4133
|
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s);
|
|
4160
4134
|
}
|
|
4161
|
-
const name = s.substring(
|
|
4135
|
+
const name = s.substring(begin, end); // "name"
|
|
4162
4136
|
t.skipString(name);
|
|
4163
|
-
t.skip(
|
|
4137
|
+
t.skip(begin + 1); // "(?P<>" or "(?<>"
|
|
4164
4138
|
if (!Parser.isValidCaptureName(name)) {
|
|
4165
4139
|
// "(?P<name>"
|
|
4166
|
-
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end));
|
|
4140
|
+
throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P<name>" or "(?<name>"
|
|
4167
4141
|
}
|
|
4168
4142
|
// Like ordinary capture, but named.
|
|
4169
4143
|
const re = this.op(Regexp.Op.LEFT_PAREN);
|
|
@@ -4293,7 +4267,6 @@
|
|
|
4293
4267
|
if (this.swapVerticalBar()) {
|
|
4294
4268
|
this.pop(); // pop vertical bar
|
|
4295
4269
|
}
|
|
4296
|
-
|
|
4297
4270
|
this.alternate();
|
|
4298
4271
|
const n = this.stack.length;
|
|
4299
4272
|
if (n < 2) {
|
|
@@ -4635,8 +4608,7 @@
|
|
|
4635
4608
|
}
|
|
4636
4609
|
|
|
4637
4610
|
// Frees all threads on the thread queue, returning them to the free pool.
|
|
4638
|
-
freeQueue(queue) {
|
|
4639
|
-
let from = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
4611
|
+
freeQueue(queue, from = 0) {
|
|
4640
4612
|
const numberOfThread = queue.size - from;
|
|
4641
4613
|
const requiredPoolLength = this.poolSize + numberOfThread;
|
|
4642
4614
|
if (this.pool.length < requiredPoolLength) {
|
|
@@ -4880,10 +4852,8 @@
|
|
|
4880
4852
|
* An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
|
|
4881
4853
|
* the public Java-like Pattern/Matcher API.
|
|
4882
4854
|
*
|
|
4883
|
-
* <p>
|
|
4884
4855
|
* This class also contains various implementation helpers for RE2 regular expressions.
|
|
4885
4856
|
*
|
|
4886
|
-
* <p>
|
|
4887
4857
|
* Use the {@link #quoteMeta(String)} utility function to quote all regular expression
|
|
4888
4858
|
* metacharacters in an arbitrary string.
|
|
4889
4859
|
*
|
|
@@ -4905,7 +4875,6 @@
|
|
|
4905
4875
|
* Parses a regular expression and returns, if successful, an {@code RE2} instance that can be
|
|
4906
4876
|
* used to match against text.
|
|
4907
4877
|
*
|
|
4908
|
-
* <p>
|
|
4909
4878
|
* When matching against text, the regexp returns a match that begins as early as possible in the
|
|
4910
4879
|
* input (leftmost), and among those it chooses the one that a backtracking search would have
|
|
4911
4880
|
* found first. This so-called leftmost-first matching is the same semantics that Perl, Python,
|
|
@@ -4920,13 +4889,11 @@
|
|
|
4920
4889
|
* {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE
|
|
4921
4890
|
* (egrep) syntax and changes the match semantics to leftmost-longest.
|
|
4922
4891
|
*
|
|
4923
|
-
* <p>
|
|
4924
4892
|
* That is, when matching against text, the regexp returns a match that begins as early as
|
|
4925
4893
|
* possible in the input (leftmost), and among those it chooses a match that is as long as
|
|
4926
4894
|
* possible. This so-called leftmost-longest matching is the same semantics that early regular
|
|
4927
4895
|
* expression implementations used and that POSIX specifies.
|
|
4928
4896
|
*
|
|
4929
|
-
* <p>
|
|
4930
4897
|
* However, there can be multiple leftmost-longest matches, with different submatch choices, and
|
|
4931
4898
|
* here this package diverges from POSIX. Among the possible leftmost-longest matches, this
|
|
4932
4899
|
* package chooses the one that a backtracking search would have found first, while POSIX
|
|
@@ -4959,16 +4926,13 @@
|
|
|
4959
4926
|
/**
|
|
4960
4927
|
* Returns true iff textual regular expression {@code pattern} matches string {@code s}.
|
|
4961
4928
|
*
|
|
4962
|
-
* <p>
|
|
4963
4929
|
* More complicated queries need to use {@link #compile} and the full {@code RE2} interface.
|
|
4964
4930
|
*/
|
|
4965
4931
|
// This is visible for testing.
|
|
4966
4932
|
static match(pattern, s) {
|
|
4967
4933
|
return RE2.compile(pattern).match(s);
|
|
4968
4934
|
}
|
|
4969
|
-
constructor(expr, prog) {
|
|
4970
|
-
let numSubexp = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
4971
|
-
let longest = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0;
|
|
4935
|
+
constructor(expr, prog, numSubexp = 0, longest = 0) {
|
|
4972
4936
|
this.expr = expr; // as passed to Compile
|
|
4973
4937
|
this.prog = prog; // compiled program
|
|
4974
4938
|
this.numSubexp = numSubexp;
|
|
@@ -5202,8 +5166,7 @@
|
|
|
5202
5166
|
}
|
|
5203
5167
|
|
|
5204
5168
|
// Find matches in input.
|
|
5205
|
-
allMatches(input, n) {
|
|
5206
|
-
let deliverFun = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : v => v;
|
|
5169
|
+
allMatches(input, n, deliverFun = v => v) {
|
|
5207
5170
|
let result = [];
|
|
5208
5171
|
const end = input.endPos();
|
|
5209
5172
|
if (n < 0) {
|
|
@@ -5280,7 +5243,6 @@
|
|
|
5280
5243
|
* Returns an array holding the text of the leftmost match in {@code b} of this regular
|
|
5281
5244
|
* expression.
|
|
5282
5245
|
*
|
|
5283
|
-
* <p>
|
|
5284
5246
|
* A return value of null indicates no match.
|
|
5285
5247
|
*/
|
|
5286
5248
|
// This is visible for testing.
|
|
@@ -5296,7 +5258,6 @@
|
|
|
5296
5258
|
* Returns a two-element array of integers defining the location of the leftmost match in
|
|
5297
5259
|
* {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
|
|
5298
5260
|
*
|
|
5299
|
-
* <p>
|
|
5300
5261
|
* A return value of null indicates no match.
|
|
5301
5262
|
*/
|
|
5302
5263
|
// This is visible for testing.
|
|
@@ -5312,7 +5273,6 @@
|
|
|
5312
5273
|
* Returns a string holding the text of the leftmost match in {@code s} of this regular
|
|
5313
5274
|
* expression.
|
|
5314
5275
|
*
|
|
5315
|
-
* <p>
|
|
5316
5276
|
* If there is no match, the return value is an empty string, but it will also be empty if the
|
|
5317
5277
|
* regular expression successfully matches an empty string. Use {@link #findIndex} or
|
|
5318
5278
|
* {@link #findSubmatch} if it is necessary to distinguish these cases.
|
|
@@ -5331,7 +5291,6 @@
|
|
|
5331
5291
|
* {@code s} of this regular expression. The match itself is at
|
|
5332
5292
|
* {@code s.substring(loc[0], loc[1])}.
|
|
5333
5293
|
*
|
|
5334
|
-
* <p>
|
|
5335
5294
|
* A return value of null indicates no match.
|
|
5336
5295
|
*/
|
|
5337
5296
|
// This is visible for testing.
|
|
@@ -5344,7 +5303,6 @@
|
|
|
5344
5303
|
* {@code b} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5345
5304
|
* href='#submatch'>Submatch</a> description above.
|
|
5346
5305
|
*
|
|
5347
|
-
* <p>
|
|
5348
5306
|
* A return value of null indicates no match.
|
|
5349
5307
|
*/
|
|
5350
5308
|
// This is visible for testing.
|
|
@@ -5367,7 +5325,6 @@
|
|
|
5367
5325
|
* expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
|
|
5368
5326
|
* <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
|
|
5369
5327
|
*
|
|
5370
|
-
* <p>
|
|
5371
5328
|
* A return value of null indicates no match.
|
|
5372
5329
|
*/
|
|
5373
5330
|
// This is visible for testing.
|
|
@@ -5380,7 +5337,6 @@
|
|
|
5380
5337
|
* {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5381
5338
|
* href='#submatch'>Submatch</a> description above.
|
|
5382
5339
|
*
|
|
5383
|
-
* <p>
|
|
5384
5340
|
* A return value of null indicates no match.
|
|
5385
5341
|
*/
|
|
5386
5342
|
// This is visible for testing.
|
|
@@ -5403,7 +5359,6 @@
|
|
|
5403
5359
|
* expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5404
5360
|
* href='#submatch'>Submatch</a> description above.
|
|
5405
5361
|
*
|
|
5406
|
-
* <p>
|
|
5407
5362
|
* A return value of null indicates no match.
|
|
5408
5363
|
*/
|
|
5409
5364
|
// This is visible for testing.
|
|
@@ -5416,7 +5371,6 @@
|
|
|
5416
5371
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5417
5372
|
* href='#all'>All</a> description above.
|
|
5418
5373
|
*
|
|
5419
|
-
* <p>
|
|
5420
5374
|
* A return value of null indicates no match.
|
|
5421
5375
|
*
|
|
5422
5376
|
* TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
|
|
@@ -5436,7 +5390,6 @@
|
|
|
5436
5390
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5437
5391
|
* href='#all'>All</a> description above.
|
|
5438
5392
|
*
|
|
5439
|
-
* <p>
|
|
5440
5393
|
* A return value of null indicates no match.
|
|
5441
5394
|
*/
|
|
5442
5395
|
// This is visible for testing.
|
|
@@ -5453,7 +5406,6 @@
|
|
|
5453
5406
|
* to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
|
|
5454
5407
|
* description above.
|
|
5455
5408
|
*
|
|
5456
|
-
* <p>
|
|
5457
5409
|
* A return value of null indicates no match.
|
|
5458
5410
|
*/
|
|
5459
5411
|
// This is visible for testing.
|
|
@@ -5470,7 +5422,6 @@
|
|
|
5470
5422
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5471
5423
|
* href='#all'>All</a> description above.
|
|
5472
5424
|
*
|
|
5473
|
-
* <p>
|
|
5474
5425
|
* A return value of null indicates no match.
|
|
5475
5426
|
*/
|
|
5476
5427
|
// This is visible for testing.
|
|
@@ -5487,7 +5438,6 @@
|
|
|
5487
5438
|
* it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5488
5439
|
* href='#all'>All</a> description above.
|
|
5489
5440
|
*
|
|
5490
|
-
* <p>
|
|
5491
5441
|
* A return value of null indicates no match.
|
|
5492
5442
|
*/
|
|
5493
5443
|
// This is visible for testing.
|
|
@@ -5512,7 +5462,6 @@
|
|
|
5512
5462
|
* {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5513
5463
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5514
5464
|
*
|
|
5515
|
-
* <p>
|
|
5516
5465
|
* A return value of null indicates no match.
|
|
5517
5466
|
*/
|
|
5518
5467
|
// This is visible for testing.
|
|
@@ -5529,7 +5478,6 @@
|
|
|
5529
5478
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5530
5479
|
* href='#all'>All</a> description above.
|
|
5531
5480
|
*
|
|
5532
|
-
* <p>
|
|
5533
5481
|
* A return value of null indicates no match.
|
|
5534
5482
|
*/
|
|
5535
5483
|
// This is visible for testing.
|
|
@@ -5554,7 +5502,6 @@
|
|
|
5554
5502
|
* {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5555
5503
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5556
5504
|
*
|
|
5557
|
-
* <p>
|
|
5558
5505
|
* A return value of null indicates no match.
|
|
5559
5506
|
*/
|
|
5560
5507
|
// This is visible for testing.
|
|
@@ -5570,7 +5517,6 @@
|
|
|
5570
5517
|
/**
|
|
5571
5518
|
* A compiled representation of an RE2 regular expression
|
|
5572
5519
|
*
|
|
5573
|
-
* <p>
|
|
5574
5520
|
* The matching functions take {@code String} arguments instead of the more general Java
|
|
5575
5521
|
* {@code CharSequence} since the latter doesn't provide UTF-16 decoding.
|
|
5576
5522
|
*
|
|
@@ -5604,10 +5550,9 @@
|
|
|
5604
5550
|
/**
|
|
5605
5551
|
* Returns a literal pattern string for the specified string.
|
|
5606
5552
|
*
|
|
5607
|
-
* <p>
|
|
5608
5553
|
* This method produces a string that can be used to create a <code>RE2JS</code> that would
|
|
5609
5554
|
* match the string <code>s</code> as if it were a literal pattern.
|
|
5610
|
-
*
|
|
5555
|
+
*
|
|
5611
5556
|
* Metacharacters or escape sequences in the input sequence will be given no special meaning.
|
|
5612
5557
|
*
|
|
5613
5558
|
* @param {string} str The string to be literalized
|
|
@@ -5623,8 +5568,7 @@
|
|
|
5623
5568
|
* @param {number} [flags=0]
|
|
5624
5569
|
* @returns {RE2JS}
|
|
5625
5570
|
*/
|
|
5626
|
-
static compile(regex) {
|
|
5627
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5571
|
+
static compile(regex, flags = 0) {
|
|
5628
5572
|
let fregex = regex;
|
|
5629
5573
|
if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
|
|
5630
5574
|
fregex = `(?i)${fregex}`;
|
|
@@ -5743,7 +5687,6 @@
|
|
|
5743
5687
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
5744
5688
|
* that occur before, between, and after instances of the regular expression.
|
|
5745
5689
|
*
|
|
5746
|
-
* <p>
|
|
5747
5690
|
* If {@code limit <= 0}, there is no limit on the size of the returned array. If
|
|
5748
5691
|
* {@code limit == 0}, empty strings that would occur at the end of the array are omitted. If
|
|
5749
5692
|
* {@code limit > 0}, at most limit strings are returned. The final string contains the remainder
|
|
@@ -5753,8 +5696,7 @@
|
|
|
5753
5696
|
* @param {number} [limit=0] the limit
|
|
5754
5697
|
* @returns {string[]} the split strings
|
|
5755
5698
|
*/
|
|
5756
|
-
split(input) {
|
|
5757
|
-
let limit = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5699
|
+
split(input, limit = 0) {
|
|
5758
5700
|
const m = this.matcher(input);
|
|
5759
5701
|
const result = [];
|
|
5760
5702
|
let emptiesSkipped = 0;
|