re2js 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,6 +205,7 @@ You can access the named groups in a pattern using the `namedGroups()` function
205
205
  import { RE2JS } from 're2js'
206
206
 
207
207
  RE2JS.compile('(?P<foo>\\d{2})').namedGroups() // { foo: 1 }
208
+ RE2JS.compile('(?<bar>\\d{2})').namedGroups() // { bar: 1 }
208
209
  RE2JS.compile('\\d{2}').namedGroups() // {}
209
210
  RE2JS.compile('(?P<foo>.*)(?P<bar>.*)').namedGroups() // { foo: 1, bar: 2 }
210
211
  ```
@@ -234,6 +235,7 @@ The `group()` method retrieves the content matched by a specific name of capturi
234
235
  ```js
235
236
  import { RE2JS } from 're2js'
236
237
 
238
+ // example with `(?P<name>expr)`
237
239
  const p = RE2JS.compile(
238
240
  '(?P<baz>f(?P<foo>b*a(?P<another>r+)){0,10})(?P<bag>bag)?(?P<nomatch>zzz)?'
239
241
  )
@@ -245,6 +247,16 @@ if (matchString.matches()) {
245
247
  matchString.group('bag') // 'bag'
246
248
  matchString.group('nomatch') // null
247
249
  }
250
+
251
+ // example with `(?<name>expr)`
252
+ const m = RE2JS.compile(
253
+ '(?<baz>f(?<foo>b*a))'
254
+ )
255
+ const mString = m.matcher('fbba')
256
+ if (mString.matches()) {
257
+ mString.group('baz') // 'fbba'
258
+ mString.group('foo') // 'bba'
259
+ }
248
260
  ```
249
261
 
250
262
  ### Replacing Matches
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v0.3.3
5
+ * @version v0.4.0
6
6
  * @author Alexey Vasiliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -419,6 +419,8 @@ class Unicode {
419
419
  // equalsIgnoreCase performs case-insensitive equality comparison
420
420
  // on the given runes |r1| and |r2|, with special consideration
421
421
  // for the likely scenario where both runes are ASCII characters.
422
+ // If non-ASCII, Unicode case folding will be performed on |r1|
423
+ // to compare it to |r2|.
422
424
  // -1 is interpreted as the end-of-file mark.
423
425
  static equalsIgnoreCase(r1, r2) {
424
426
  // Runes already match, or one of them is EOF
@@ -677,9 +679,7 @@ class Utils {
677
679
  // example
678
680
  // Encoding[(Encoding['UTF_16'] = 0)] = 'UTF_16'
679
681
  // Encoding[(Encoding['UTF_8'] = 1)] = 'UTF_8'
680
- const createEnum = function () {
681
- let values = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
682
- let initNum = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
682
+ const createEnum = (values = [], initNum = 0) => {
683
683
  const enumObject = {};
684
684
  for (let i = 0; i < values.length; i++) {
685
685
  const val = values[i];
@@ -814,8 +814,7 @@ class RE2JSException extends Error {
814
814
  * An exception thrown by the parser if the pattern was invalid.
815
815
  */
816
816
  class RE2JSSyntaxException extends RE2JSException {
817
- constructor(error) {
818
- let input = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
817
+ constructor(error, input = null) {
819
818
  let message = `error parsing regexp: ${error}`;
820
819
  if (input) {
821
820
  message += `: \`${input}\``;
@@ -988,8 +987,7 @@ class Matcher {
988
987
  * @param {string|number} [group=0]
989
988
  * @returns {string}
990
989
  */
991
- start() {
992
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
990
+ start(group = 0) {
993
991
  if (typeof group === 'string') {
994
992
  const groupInt = this.namedGroups[group];
995
993
  if (!Number.isFinite(groupInt)) {
@@ -1007,8 +1005,7 @@ class Matcher {
1007
1005
  * @param {string|number} [group=0]
1008
1006
  * @returns {string}
1009
1007
  */
1010
- end() {
1011
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
1008
+ end(group = 0) {
1012
1009
  if (typeof group === 'string') {
1013
1010
  const groupInt = this.namedGroups[group];
1014
1011
  if (!Number.isFinite(groupInt)) {
@@ -1025,8 +1022,7 @@ class Matcher {
1025
1022
  * @param {string|number} [group=0]
1026
1023
  * @returns {string}
1027
1024
  */
1028
- group() {
1029
- let group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
1025
+ group(group = 0) {
1030
1026
  if (typeof group === 'string') {
1031
1027
  const groupInt = this.namedGroups[group];
1032
1028
  if (!Number.isFinite(groupInt)) {
@@ -1106,8 +1102,7 @@ class Matcher {
1106
1102
  * @returns {boolean} if it finds a match
1107
1103
  * @throws IndexOutOfBoundsException if start is not a valid input position
1108
1104
  */
1109
- find() {
1110
- let start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
1105
+ find(start = null) {
1111
1106
  if (start !== null) {
1112
1107
  if (start < 0 || start > this.matcherInputLength) {
1113
1108
  throw new RE2JSGroupException(`start index out of bounds: ${start}`);
@@ -1190,8 +1185,7 @@ class Matcher {
1190
1185
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group
1191
1186
  * @private
1192
1187
  */
1193
- appendReplacement(replacement) {
1194
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1188
+ appendReplacement(replacement, perlMode = false) {
1195
1189
  let res = '';
1196
1190
  const s = this.start();
1197
1191
  const e = this.end();
@@ -1375,8 +1369,7 @@ class Matcher {
1375
1369
  * @returns {string} the input string with the matches replaced
1376
1370
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
1377
1371
  */
1378
- replaceAll(replacement) {
1379
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1372
+ replaceAll(replacement, perlMode = false) {
1380
1373
  return this.replace(replacement, true, perlMode);
1381
1374
  }
1382
1375
 
@@ -1389,8 +1382,7 @@ class Matcher {
1389
1382
  * @returns {string} the input string with the first match replaced
1390
1383
  * @throws IndexOutOfBoundsException if replacement refers to an invalid group and perlMode is false
1391
1384
  */
1392
- replaceFirst(replacement) {
1393
- let perlMode = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
1385
+ replaceFirst(replacement, perlMode = false) {
1394
1386
  return this.replace(replacement, false, perlMode);
1395
1387
  }
1396
1388
 
@@ -1402,9 +1394,7 @@ class Matcher {
1402
1394
  * @returns {string}
1403
1395
  * @private
1404
1396
  */
1405
- replace(replacement) {
1406
- let all = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
1407
- let perlMode = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
1397
+ replace(replacement, all = true, perlMode = false) {
1408
1398
  let res = '';
1409
1399
  this.reset();
1410
1400
  while (this.find()) {
@@ -2000,6 +1990,10 @@ class Inst {
2000
1990
  // class.
2001
1991
  if (this.runes.length === 1) {
2002
1992
  const r0 = this.runes[0];
1993
+ // If this pattern is case-insensitive, apply Unicode case folding to compare the two runes.
1994
+ // Note that this may result in a case-folding loop when executed,
1995
+ // so attempt to reduce the chance of that occurring
1996
+ // by performing case folding on |r0| from the pattern rather than |r| from the input.
2003
1997
  if ((this.arg & RE2Flags.FOLD_CASE) !== 0) {
2004
1998
  return Unicode.equalsIgnoreCase(r0, r);
2005
1999
  }
@@ -2231,10 +2225,7 @@ class Prog {
2231
2225
  * @class
2232
2226
  */
2233
2227
  class Frag {
2234
- constructor() {
2235
- let i = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 0;
2236
- let out = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
2237
- let nullable = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
2228
+ constructor(i = 0, out = 0, nullable = false) {
2238
2229
  this.i = i; // an instruction address (pc).
2239
2230
  this.out = out; // a patch list; see explanation in Prog.js
2240
2231
  this.nullable = nullable; // whether the fragment can match the empty string
@@ -2727,8 +2718,7 @@ class CharClass {
2727
2718
  CharClass.qsortIntPair(array, i, right);
2728
2719
  }
2729
2720
  }
2730
- constructor() {
2731
- let r = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : Utils.emptyInts();
2721
+ constructor(r = Utils.emptyInts()) {
2732
2722
  this.r = r; // inclusive ranges, pairs of [lo,hi]. r.length is even.
2733
2723
  this.len = r.length; // prefix of |r| that is defined. Even.
2734
2724
  }
@@ -3402,8 +3392,7 @@ class Parser {
3402
3392
  static concatRunes(x, y) {
3403
3393
  return [...x, ...y];
3404
3394
  }
3405
- constructor(wholeRegexp) {
3406
- let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
3395
+ constructor(wholeRegexp, flags = 0) {
3407
3396
  this.wholeRegexp = wholeRegexp;
3408
3397
  // Flags control the behavior of the parser and record information about
3409
3398
  // regexp context.
@@ -4145,21 +4134,22 @@ class Parser {
4145
4134
  // support all three as well. EcmaScript 4 uses only the Python form.
4146
4135
  //
4147
4136
  // In both the open source world (via Code Search) and the
4148
- // Google source tree, (?P<expr>name) is the dominant form,
4149
- // so that's the one we implement. One is enough.
4137
+ // Google source tree, (?P<name>expr) and (?<name>expr) are the
4138
+ // dominant forms of named captures and both are supported.
4150
4139
  const s = t.rest();
4151
- if (s.startsWith('(?P<')) {
4140
+ if (s.startsWith('(?P<') || s.startsWith('(?<')) {
4152
4141
  // Pull out name.
4142
+ const begin = s.charAt(2) === 'P' ? 4 : 3;
4153
4143
  const end = s.indexOf('>');
4154
4144
  if (end < 0) {
4155
4145
  throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s);
4156
4146
  }
4157
- const name = s.substring(4, end); // "name"
4147
+ const name = s.substring(begin, end); // "name"
4158
4148
  t.skipString(name);
4159
- t.skip(5); // "(?P<>"
4149
+ t.skip(begin + 1); // "(?P<>" or "(?<>"
4160
4150
  if (!Parser.isValidCaptureName(name)) {
4161
4151
  // "(?P<name>"
4162
- throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end));
4152
+ throw new RE2JSSyntaxException(Parser.ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P<name>" or "(?<name>"
4163
4153
  }
4164
4154
  // Like ordinary capture, but named.
4165
4155
  const re = this.op(Regexp.Op.LEFT_PAREN);
@@ -4631,8 +4621,7 @@ class Machine {
4631
4621
  }
4632
4622
 
4633
4623
  // Frees all threads on the thread queue, returning them to the free pool.
4634
- freeQueue(queue) {
4635
- let from = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
4624
+ freeQueue(queue, from = 0) {
4636
4625
  const numberOfThread = queue.size - from;
4637
4626
  const requiredPoolLength = this.poolSize + numberOfThread;
4638
4627
  if (this.pool.length < requiredPoolLength) {
@@ -4962,9 +4951,7 @@ class RE2 {
4962
4951
  static match(pattern, s) {
4963
4952
  return RE2.compile(pattern).match(s);
4964
4953
  }
4965
- constructor(expr, prog) {
4966
- let numSubexp = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
4967
- let longest = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0;
4954
+ constructor(expr, prog, numSubexp = 0, longest = 0) {
4968
4955
  this.expr = expr; // as passed to Compile
4969
4956
  this.prog = prog; // compiled program
4970
4957
  this.numSubexp = numSubexp;
@@ -5198,8 +5185,7 @@ class RE2 {
5198
5185
  }
5199
5186
 
5200
5187
  // Find matches in input.
5201
- allMatches(input, n) {
5202
- let deliverFun = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : v => v;
5188
+ allMatches(input, n, deliverFun = v => v) {
5203
5189
  let result = [];
5204
5190
  const end = input.endPos();
5205
5191
  if (n < 0) {