re2js 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,11 +24,14 @@ There are certain features of PCRE or Perl regular expressions that cannot be im
24
24
  To install RE2JS:
25
25
 
26
26
  ```bash
27
+ # npm
27
28
  npm install re2js
28
- # or
29
+ # yarn
29
30
  yarn add re2js
30
- # or
31
+ # pnpm
31
32
  pnpm add re2js
33
+ # bun
34
+ bun add re2js
32
35
  ```
33
36
 
34
37
  ## Usage
@@ -205,6 +208,7 @@ You can access the named groups in a pattern using the `namedGroups()` function
205
208
  import { RE2JS } from 're2js'
206
209
 
207
210
  RE2JS.compile('(?P<foo>\\d{2})').namedGroups() // { foo: 1 }
211
+ RE2JS.compile('(?<bar>\\d{2})').namedGroups() // { bar: 1 }
208
212
  RE2JS.compile('\\d{2}').namedGroups() // {}
209
213
  RE2JS.compile('(?P<foo>.*)(?P<bar>.*)').namedGroups() // { foo: 1, bar: 2 }
210
214
  ```
@@ -234,6 +238,7 @@ The `group()` method retrieves the content matched by a specific name of capturi
234
238
  ```js
235
239
  import { RE2JS } from 're2js'
236
240
 
241
+ // example with `(?P<name>expr)`
237
242
  const p = RE2JS.compile(
238
243
  '(?P<baz>f(?P<foo>b*a(?P<another>r+)){0,10})(?P<bag>bag)?(?P<nomatch>zzz)?'
239
244
  )
@@ -245,6 +250,16 @@ if (matchString.matches()) {
245
250
  matchString.group('bag') // 'bag'
246
251
  matchString.group('nomatch') // null
247
252
  }
253
+
254
+ // example with `(?<name>expr)`
255
+ const m = RE2JS.compile(
256
+ '(?<baz>f(?<foo>b*a))'
257
+ )
258
+ const mString = m.matcher('fbba')
259
+ if (mString.matches()) {
260
+ mString.group('baz') // 'fbba'
261
+ mString.group('foo') // 'bba'
262
+ }
248
263
  ```
249
264
 
250
265
  ### Replacing Matches
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v0.3.3
5
+ * @version v0.4.1
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -419,6 +419,8 @@ class Unicode {
419
419
  // equalsIgnoreCase performs case-insensitive equality comparison
420
420
  // on the given runes |r1| and |r2|, with special consideration
421
421
  // for the likely scenario where both runes are ASCII characters.
422
+ // If non-ASCII, Unicode case folding will be performed on |r1|
423
+ // to compare it to |r2|.
422
424
  // -1 is interpreted as the end-of-file mark.
423
425
  static equalsIgnoreCase(r1, r2) {
424
426
  // Runes already match, or one of them is EOF
@@ -677,9 +679,7 @@ class Utils {
677
679
  // example
678
680
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
679
681
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
680
- const createEnum = function () {
681
- let values = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
682
- let initNum = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
682
+ const createEnum = (values = [], initNum = 0) => {
683
683
  const enumObject = {};
684
684
  for (let i = 0; i < values.length; i++) {
685
685
  const val = values[i];
@@ -716,8 +716,7 @@ class MatcherInputBase {
716
716
  }
717
717
  }
718
718
  class Utf8MatcherInput extends MatcherInputBase {
719
- constructor() {
720
- let bytes = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
719
+ constructor(bytes = null) {
721
720
  super();
722
721
  this.bytes = bytes;
723
722
  }
@@ -749,8 +748,7 @@ class Utf8MatcherInput extends MatcherInputBase {
749
748
  }
750
749
  }
751
750
  class Utf16MatcherInput extends MatcherInputBase {
752
- constructor() {
753
- let charSequence = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
751
+ constructor(charSequence = null) {
754
752
  super();
755
753
  this.charSequence = charSequence;
756
754
  }
@@ -814,8 +812,7 @@ class RE2JSException extends Error {
814
812
  * An exception thrown by the parser if the pattern was invalid.
815
813
  */
816
814
  class RE2JSSyntaxException extends RE2JSException {
817
- constructor(error) {
818
- let input = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
815
+ constructor(error, input = null) {
819
816
  let message = `error parsing regexp: ${error}`;
820
817
  if (input) {
821
818
  message += `: \`${input}\``;
@@ -875,7 +872,6 @@ class RE2JSFlagsException extends RE2JSException {
875
872
  /**
876
873
  * A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
877
874
  *
878
- * <p>
879
875
  * Conceptually, a Matcher consists of four parts:
880
876
  * <ol>
881
877
  * <li>A compiled regular expression {@code RE2JS}, set at construction and fixed for the lifetime
@@ -988,8 +984,7 @@ class Matcher {
988
984
  * @param {string|number} [group=0]
989
985
  * @returns {string}
990
986
  */
991
- start() {
992
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
987
+ start(group = 0) {
993
988
  if (typeof group === 'string') {
994
989
  const groupInt = this.namedGroups[group];
995
990
  if (!Number.isFinite(groupInt)) {
@@ -1007,8 +1002,7 @@ class Matcher {
1007
1002
  * @param {string|number} [group=0]
1008
1003
  * @returns {string}
1009
1004
  */
1010
- end() {
1011
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
1005
+ end(group = 0) {
1012
1006
  if (typeof group === 'string') {
1013
1007
  const groupInt = this.namedGroups[group];
1014
1008
  if (!Number.isFinite(groupInt)) {
@@ -1025,8 +1019,7 @@ class Matcher {
1025
1019
  * @param {string|number} [group=0]
1026
1020
  * @returns {string}
1027
1021
  */
1028
- group() {
1029
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
1022
+ group(group = 0) {
1030
1023
  if (typeof group === 'string') {
1031
1024
  const groupInt = this.namedGroups[group];
1032
1025
  if (!Number.isFinite(groupInt)) {
@@ -1106,8 +1099,7 @@ class Matcher {
1106
1099
  * @returns {boolean} if it finds a match
1107
1100
  * @throws IndexOutOfBoundsException if start is not a valid input position
1108
1101
  */
1109
- find() {
1110
- let start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
1102
+ find(start = null) {
1111
1103
  if (start !== null) {
1112
1104
  if (start < 0 || start > this.matcherInputLength) {
1113
1105
  throw new RE2JSGroupException(`start index out of bounds: ${start}`);
@@ -1173,12 +1165,10 @@ class Matcher {
1173
1165
  * the form {@code $n}, where {@code n} is the group number in decimal. It advances the append
1174
1166
  * position to where the most recent match ended.
1175
1167
  *
1176
- * <p>
1177
1168
  * To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string escapes). The escape
1178
1169
  * is only necessary when {@code $} is followed by a digit, but it is always allowed. Only
1179
1170
  * {@code $} and {@code \} need escaping, but any character can be escaped.
1180
1171
  *
1181
- * <p>
1182
1172
  * The group number {@code n} in {@code $n} is always at least one digit and expands to use more
1183
1173
  * digits as long as the resulting number is a valid group number for this pattern. To cut it off
1184
1174
  * earlier, escape the first digit that should not be used.
@@ -1190,8 +1180,7 @@ class Matcher {
1190
1180
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group
1191
1181
  * @private
1192
1182
  */
1193
- appendReplacement(replacement) {
1194
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1183
+ appendReplacement(replacement, perlMode = false) {
1195
1184
  let res = '';
1196
1185
  const s = this.start();
1197
1186
  const e = this.end();
@@ -1375,8 +1364,7 @@ class Matcher {
1375
1364
  * @returns {string} the input string with the matches replaced
1376
1365
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
1377
1366
  */
1378
- replaceAll(replacement) {
1379
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1367
+ replaceAll(replacement, perlMode = false) {
1380
1368
  return this.replace(replacement, true, perlMode);
1381
1369
  }
1382
1370
 
@@ -1389,8 +1377,7 @@ class Matcher {
1389
1377
  * @returns {string} the input string with the first match replaced
1390
1378
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
1391
1379
  */
1392
- replaceFirst(replacement) {
1393
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1380
+ replaceFirst(replacement, perlMode = false) {
1394
1381
  return this.replace(replacement, false, perlMode);
1395
1382
  }
1396
1383
 
@@ -1402,9 +1389,7 @@ class Matcher {
1402
1389
  * @returns {string}
1403
1390
  * @private
1404
1391
  */
1405
- replace(replacement) {
1406
- let all = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
1407
- let perlMode = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
1392
+ replace(replacement, all = true, perlMode = false) {
1408
1393
  let res = '';
1409
1394
  this.reset();
1410
1395
  while (this.find()) {
@@ -1441,9 +1426,7 @@ class MachineInputBase {
1441
1426
  // An implementation of MachineInput for UTF-8 byte arrays.
1442
1427
  // |pos| and |width| are byte indices.
1443
1428
  class MachineUTF8Input extends MachineInputBase {
1444
- constructor(bytes) {
1445
- let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
1446
- let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
1429
+ constructor(bytes, start = 0, end = bytes.length) {
1447
1430
  super();
1448
1431
  this.bytes = bytes;
1449
1432
  this.start = start;
@@ -1526,8 +1509,7 @@ class MachineUTF8Input extends MachineInputBase {
1526
1509
 
1527
1510
  // Returns the index of the first occurrence of array |target| within
1528
1511
  // array |source| after |fromIndex|, or -1 if not found.
1529
- indexOf(source, target) {
1530
- let fromIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
1512
+ indexOf(source, target, fromIndex = 0) {
1531
1513
  let targetLength = target.length;
1532
1514
  if (targetLength === 0) {
1533
1515
  return -1;
@@ -1548,9 +1530,7 @@ class MachineUTF8Input extends MachineInputBase {
1548
1530
 
1549
1531
  // |pos| and |width| are in JS "char" units.
1550
1532
  class MachineUTF16Input extends MachineInputBase {
1551
- constructor(charSequence) {
1552
- let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
1553
- let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
1533
+ constructor(charSequence, start = 0, end = charSequence.length) {
1554
1534
  super();
1555
1535
  this.charSequence = charSequence;
1556
1536
  this.start = start;
@@ -1590,14 +1570,10 @@ class MachineUTF16Input extends MachineInputBase {
1590
1570
  }
1591
1571
  }
1592
1572
  class MachineInput {
1593
- static fromUTF8(bytes) {
1594
- let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
1595
- let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
1573
+ static fromUTF8(bytes, start = 0, end = bytes.length) {
1596
1574
  return new MachineUTF8Input(bytes, start, end);
1597
1575
  }
1598
- static fromUTF16(charSequence) {
1599
- let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
1600
- let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
1576
+ static fromUTF16(charSequence, start = 0, end = charSequence.length) {
1601
1577
  return new MachineUTF16Input(charSequence, start, end);
1602
1578
  }
1603
1579
  }
@@ -1685,7 +1661,6 @@ class Regexp {
1685
1661
  this.name = null; // capturing name, for CAPTURE
1686
1662
  this.namedGroups = {}; // map of group name -> capturing index
1687
1663
  }
1688
-
1689
1664
  reinit() {
1690
1665
  this.flags = 0;
1691
1666
  this.subs = Regexp.emptySubs();
@@ -2000,6 +1975,10 @@ class Inst {
2000
1975
  // class.
2001
1976
  if (this.runes.length === 1) {
2002
1977
  const r0 = this.runes[0];
1978
+ // If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
1979
+ // Note that this may result in a case-folding loop when executed,
1980
+ // so attempt to reduce the chance of that occurring
1981
+ // by performing case folding on |r0| from the pattern rather than |r| from the input.
2003
1982
  if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
2004
1983
  return Unicode.equalsIgnoreCase(r0, r);
2005
1984
  }
@@ -2231,10 +2210,7 @@ class Prog {
2231
2210
  * @class
2232
2211
  */
2233
2212
  class Frag {
2234
- constructor() {
2235
- let i = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
2236
- let out = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
2237
- let nullable = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
2213
+ constructor(i = 0, out = 0, nullable = false) {
2238
2214
  this.i = i; // an instruction address (pc).
2239
2215
  this.out = out; // a patch list; see explanation in Prog.js
2240
2216
  this.nullable = nullable; // whether the fragment can match the empty string
@@ -2727,8 +2703,7 @@ class CharClass {
2727
2703
  CharClass.qsortIntPair(array, i, right);
2728
2704
  }
2729
2705
  }
2730
- constructor() {
2731
- let r = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : Utils.emptyInts();
2706
+ constructor(r = Utils.emptyInts()) {
2732
2707
  this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
2733
2708
  this.len = r.length; // prefix of |r| that is defined. Even.
2734
2709
  }
@@ -3402,8 +3377,7 @@ class Parser {
3402
3377
  static concatRunes(x, y) {
3403
3378
  return [...x, ...y];
3404
3379
  }
3405
- constructor(wholeRegexp) {
3406
- let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
3380
+ constructor(wholeRegexp, flags = 0) {
3407
3381
  this.wholeRegexp = wholeRegexp;
3408
3382
  // Flags control the behavior of the parser and record information about
3409
3383
  // regexp context.
@@ -4116,7 +4090,6 @@ class Parser {
4116
4090
  if (this.swapVerticalBar()) {
4117
4091
  this.pop(); // pop vertical bar
4118
4092
  }
4119
-
4120
4093
  this.alternate();
4121
4094
  const n = this.stack.length;
4122
4095
  if (n !== 1) {
@@ -4145,21 +4118,22 @@ class Parser {
4145
4118
  // support all three as well. EcmaScript 4 uses only the Python form.
4146
4119
  //
4147
4120
  // In both the open source world (via Code Search) and the
4148
- // Google source tree, (?P<expr>name) is the dominant form,
4149
- // so that's the one we implement. One is enough.
4121
+ // Google source tree, (?P<name>expr) and (?<name>expr) are the
4122
+ // dominant forms of named captures and both are supported.
4150
4123
  const s = t.rest();
4151
- if (s.startsWith('(?P<')) {
4124
+ if (s.startsWith('(?P<') || s.startsWith('(?<')) {
4152
4125
  // Pull out name.
4126
+ const begin = s.charAt(2) === 'P' ? 4 : 3;
4153
4127
  const end = s.indexOf('>');
4154
4128
  if (end < 0) {
4155
4129
  throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s);
4156
4130
  }
4157
- const name = s.substring(4, end); // "name"
4131
+ const name = s.substring(begin, end); // "name"
4158
4132
  t.skipString(name);
4159
- t.skip(5); // "(?P<>"
4133
+ t.skip(begin + 1); // "(?P<>" or "(?<>"
4160
4134
  if (!Parser.isValidCaptureName(name)) {
4161
4135
  // "(?P<name>"
4162
- throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end));
4136
+ throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P<name>" or "(?<name>"
4163
4137
  }
4164
4138
  // Like ordinary capture, but named.
4165
4139
  const re = this.op(Regexp.Op.LEFT_PAREN);
@@ -4289,7 +4263,6 @@ class Parser {
4289
4263
  if (this.swapVerticalBar()) {
4290
4264
  this.pop(); // pop vertical bar
4291
4265
  }
4292
-
4293
4266
  this.alternate();
4294
4267
  const n = this.stack.length;
4295
4268
  if (n < 2) {
@@ -4631,8 +4604,7 @@ class Machine {
4631
4604
  }
4632
4605
 
4633
4606
  // Frees all threads on the thread queue, returning them to the free pool.
4634
- freeQueue(queue) {
4635
- let from = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
4607
+ freeQueue(queue, from = 0) {
4636
4608
  const numberOfThread = queue.size - from;
4637
4609
  const requiredPoolLength = this.poolSize + numberOfThread;
4638
4610
  if (this.pool.length < requiredPoolLength) {
@@ -4876,10 +4848,8 @@ class AtomicReference {
4876
4848
  * An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
4877
4849
  * the public Java-like Pattern/Matcher API.
4878
4850
  *
4879
- * <p>
4880
4851
  * This class also contains various implementation helpers for RE2 regular expressions.
4881
4852
  *
4882
- * <p>
4883
4853
  * Use the {@link #quoteMeta(String)} utility function to quote all regular expression
4884
4854
  * metacharacters in an arbitrary string.
4885
4855
  *
@@ -4901,7 +4871,6 @@ class RE2 {
4901
4871
  * Parses a regular expression and returns, if successful, an {@code RE2} instance that can be
4902
4872
  * used to match against text.
4903
4873
  *
4904
- * <p>
4905
4874
  * When matching against text, the regexp returns a match that begins as early as possible in the
4906
4875
  * input (leftmost), and among those it chooses the one that a backtracking search would have
4907
4876
  * found first. This so-called leftmost-first matching is the same semantics that Perl, Python,
@@ -4916,13 +4885,11 @@ class RE2 {
4916
4885
  * {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE
4917
4886
  * (egrep) syntax and changes the match semantics to leftmost-longest.
4918
4887
  *
4919
- * <p>
4920
4888
  * That is, when matching against text, the regexp returns a match that begins as early as
4921
4889
  * possible in the input (leftmost), and among those it chooses a match that is as long as
4922
4890
  * possible. This so-called leftmost-longest matching is the same semantics that early regular
4923
4891
  * expression implementations used and that POSIX specifies.
4924
4892
  *
4925
- * <p>
4926
4893
  * However, there can be multiple leftmost-longest matches, with different submatch choices, and
4927
4894
  * here this package diverges from POSIX. Among the possible leftmost-longest matches, this
4928
4895
  * package chooses the one that a backtracking search would have found first, while POSIX
@@ -4955,16 +4922,13 @@ class RE2 {
4955
4922
  /**
4956
4923
  * Returns true iff textual regular expression {@code pattern} matches string {@code s}.
4957
4924
  *
4958
- * <p>
4959
4925
  * More complicated queries need to use {@link #compile} and the full {@code RE2} interface.
4960
4926
  */
4961
4927
  // This is visible for testing.
4962
4928
  static match(pattern, s) {
4963
4929
  return RE2.compile(pattern).match(s);
4964
4930
  }
4965
- constructor(expr, prog) {
4966
- let numSubexp = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
4967
- let longest = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0;
4931
+ constructor(expr, prog, numSubexp = 0, longest = 0) {
4968
4932
  this.expr = expr; // as passed to Compile
4969
4933
  this.prog = prog; // compiled program
4970
4934
  this.numSubexp = numSubexp;
@@ -5198,8 +5162,7 @@ class RE2 {
5198
5162
  }
5199
5163
 
5200
5164
  // Find matches in input.
5201
- allMatches(input, n) {
5202
- let deliverFun = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : v => v;
5165
+ allMatches(input, n, deliverFun = v => v) {
5203
5166
  let result = [];
5204
5167
  const end = input.endPos();
5205
5168
  if (n < 0) {
@@ -5276,7 +5239,6 @@ class RE2 {
5276
5239
  * Returns an array holding the text of the leftmost match in {@code b} of this regular
5277
5240
  * expression.
5278
5241
  *
5279
- * <p>
5280
5242
  * A return value of null indicates no match.
5281
5243
  */
5282
5244
  // This is visible for testing.
@@ -5292,7 +5254,6 @@ class RE2 {
5292
5254
  * Returns a two-element array of integers defining the location of the leftmost match in
5293
5255
  * {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
5294
5256
  *
5295
- * <p>
5296
5257
  * A return value of null indicates no match.
5297
5258
  */
5298
5259
  // This is visible for testing.
@@ -5308,7 +5269,6 @@ class RE2 {
5308
5269
  * Returns a string holding the text of the leftmost match in {@code s} of this regular
5309
5270
  * expression.
5310
5271
  *
5311
- * <p>
5312
5272
  * If there is no match, the return value is an empty string, but it will also be empty if the
5313
5273
  * regular expression successfully matches an empty string. Use {@link #findIndex} or
5314
5274
  * {@link #findSubmatch} if it is necessary to distinguish these cases.
@@ -5327,7 +5287,6 @@ class RE2 {
5327
5287
  * {@code s} of this regular expression. The match itself is at
5328
5288
  * {@code s.substring(loc[0], loc[1])}.
5329
5289
  *
5330
- * <p>
5331
5290
  * A return value of null indicates no match.
5332
5291
  */
5333
5292
  // This is visible for testing.
@@ -5340,7 +5299,6 @@ class RE2 {
5340
5299
  * {@code b} and the matches, if any, of its subexpressions, as defined by the <a
5341
5300
  * href='#submatch'>Submatch</a> description above.
5342
5301
  *
5343
- * <p>
5344
5302
  * A return value of null indicates no match.
5345
5303
  */
5346
5304
  // This is visible for testing.
@@ -5363,7 +5321,6 @@ class RE2 {
5363
5321
  * expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
5364
5322
  * <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
5365
5323
  *
5366
- * <p>
5367
5324
  * A return value of null indicates no match.
5368
5325
  */
5369
5326
  // This is visible for testing.
@@ -5376,7 +5333,6 @@ class RE2 {
5376
5333
  * {@code s} and the matches, if any, of its subexpressions, as defined by the <a
5377
5334
  * href='#submatch'>Submatch</a> description above.
5378
5335
  *
5379
- * <p>
5380
5336
  * A return value of null indicates no match.
5381
5337
  */
5382
5338
  // This is visible for testing.
@@ -5399,7 +5355,6 @@ class RE2 {
5399
5355
  * expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
5400
5356
  * href='#submatch'>Submatch</a> description above.
5401
5357
  *
5402
- * <p>
5403
5358
  * A return value of null indicates no match.
5404
5359
  */
5405
5360
  // This is visible for testing.
@@ -5412,7 +5367,6 @@ class RE2 {
5412
5367
  * list of up to {@code n} successive matches of the expression, as defined by the <a
5413
5368
  * href='#all'>All</a> description above.
5414
5369
  *
5415
- * <p>
5416
5370
  * A return value of null indicates no match.
5417
5371
  *
5418
5372
  * TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
@@ -5432,7 +5386,6 @@ class RE2 {
5432
5386
  * returns a list of up to {@code n} successive matches of the expression, as defined by the <a
5433
5387
  * href='#all'>All</a> description above.
5434
5388
  *
5435
- * <p>
5436
5389
  * A return value of null indicates no match.
5437
5390
  */
5438
5391
  // This is visible for testing.
@@ -5449,7 +5402,6 @@ class RE2 {
5449
5402
  * to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
5450
5403
  * description above.
5451
5404
  *
5452
- * <p>
5453
5405
  * A return value of null indicates no match.
5454
5406
  */
5455
5407
  // This is visible for testing.
@@ -5466,7 +5418,6 @@ class RE2 {
5466
5418
  * list of up to {@code n} successive matches of the expression, as defined by the <a
5467
5419
  * href='#all'>All</a> description above.
5468
5420
  *
5469
- * <p>
5470
5421
  * A return value of null indicates no match.
5471
5422
  */
5472
5423
  // This is visible for testing.
@@ -5483,7 +5434,6 @@ class RE2 {
5483
5434
  * it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
5484
5435
  * href='#all'>All</a> description above.
5485
5436
  *
5486
- * <p>
5487
5437
  * A return value of null indicates no match.
5488
5438
  */
5489
5439
  // This is visible for testing.
@@ -5508,7 +5458,6 @@ class RE2 {
5508
5458
  * {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
5509
5459
  * expression, as defined by the <a href='#all'>All</a> description above.
5510
5460
  *
5511
- * <p>
5512
5461
  * A return value of null indicates no match.
5513
5462
  */
5514
5463
  // This is visible for testing.
@@ -5525,7 +5474,6 @@ class RE2 {
5525
5474
  * returns a list of up to {@code n} successive matches of the expression, as defined by the <a
5526
5475
  * href='#all'>All</a> description above.
5527
5476
  *
5528
- * <p>
5529
5477
  * A return value of null indicates no match.
5530
5478
  */
5531
5479
  // This is visible for testing.
@@ -5550,7 +5498,6 @@ class RE2 {
5550
5498
  * {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
5551
5499
  * expression, as defined by the <a href='#all'>All</a> description above.
5552
5500
  *
5553
- * <p>
5554
5501
  * A return value of null indicates no match.
5555
5502
  */
5556
5503
  // This is visible for testing.
@@ -5566,7 +5513,6 @@ class RE2 {
5566
5513
  /**
5567
5514
  * A compiled representation of an RE2 regular expression
5568
5515
  *
5569
- * <p>
5570
5516
  * The matching functions take {@code String} arguments instead of the more general Java
5571
5517
  * {@code CharSequence} since the latter doesn't provide UTF-16 decoding.
5572
5518
  *
@@ -5600,10 +5546,9 @@ class RE2JS {
5600
5546
  /**
5601
5547
  * Returns a literal pattern string for the specified string.
5602
5548
  *
5603
- * <p>
5604
5549
  * This method produces a string that can be used to create a <code>RE2JS</code> that would
5605
5550
  * match the string <code>s</code> as if it were a literal pattern.
5606
- * </p>
5551
+ *
5607
5552
  * Metacharacters or escape sequences in the input sequence will be given no special meaning.
5608
5553
  *
5609
5554
  * @param {string} str The string to be literalized
@@ -5619,8 +5564,7 @@ class RE2JS {
5619
5564
  * @param {number} [flags=0]
5620
5565
  * @returns {RE2JS}
5621
5566
  */
5622
- static compile(regex) {
5623
- let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
5567
+ static compile(regex, flags = 0) {
5624
5568
  let fregex = regex;
5625
5569
  if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
5626
5570
  fregex = `(?i)${fregex}`;
@@ -5739,7 +5683,6 @@ class RE2JS {
5739
5683
  * Splits input around instances of the regular expression. It returns an array giving the strings
5740
5684
  * that occur before, between, and after instances of the regular expression.
5741
5685
  *
5742
- * <p>
5743
5686
  * If {@code limit <= 0}, there is no limit on the size of the returned array. If
5744
5687
  * {@code limit == 0}, empty strings that would occur at the end of the array are omitted. If
5745
5688
  * {@code limit > 0}, at most limit strings are returned. The final string contains the remainder
@@ -5749,8 +5692,7 @@ class RE2JS {
5749
5692
  * @param {number} [limit=0] the limit
5750
5693
  * @returns {string[]} the split strings
5751
5694
  */
5752
- split(input) {
5753
- let limit = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
5695
+ split(input, limit = 0) {
5754
5696
  const m = this.matcher(input);
5755
5697
  const result = [];
5756
5698
  let emptiesSkipped = 0;