re2js 2.2.2 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.2
5
+ * @version v2.2.3
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -100,7 +100,7 @@
100
100
  }
101
101
  class Codepoint {
102
102
  // codePointAt(0)
103
- static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
103
+ static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
104
104
 
105
105
  // convert unicode codepoint to upper case codepoint
106
106
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
@@ -154,10 +154,6 @@
154
154
  getStride(index) {
155
155
  return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
156
156
  }
157
- get(index) {
158
- const i = index * this.SIZE;
159
- return [this.data[i], this.data[i + 1], this.getStride(index)];
160
- }
161
157
  get length() {
162
158
  return this.data.length / this.SIZE;
163
159
  }
@@ -654,6 +650,9 @@
654
650
  static emptyInts() {
655
651
  return [];
656
652
  }
653
+ static isByteArray(input) {
654
+ return Array.isArray(input) || input instanceof Uint8Array;
655
+ }
657
656
 
658
657
  // Returns true iff |c| is an ASCII letter or decimal digit.
659
658
  static isalnum(c) {
@@ -955,7 +954,7 @@
955
954
  * @returns {number[]}
956
955
  */
957
956
  asBytes() {
958
- return this.charSequence.toString().split('').map(s => s.codePointAt(0));
957
+ return Utils.stringToUtf8ByteArray(this.charSequence.toString());
959
958
  }
960
959
 
961
960
  /**
@@ -980,7 +979,7 @@
980
979
  * @returns {Utf8MatcherInput}
981
980
  */
982
981
  static utf8(input) {
983
- if (Array.isArray(input)) {
982
+ if (Utils.isByteArray(input)) {
984
983
  return new Utf8MatcherInput(input);
985
984
  }
986
985
  return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
@@ -1112,10 +1111,10 @@
1112
1111
  if (start < this.start) {
1113
1112
  start = this.start;
1114
1113
  }
1115
- r1 = this.step(start) >> 3;
1114
+ r1 = this.step(start - this.start) >> 3;
1116
1115
  }
1117
1116
  }
1118
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1117
+ const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
1119
1118
  return Utils.emptyOpContext(r1, r2);
1120
1119
  }
1121
1120
 
@@ -1197,14 +1196,17 @@
1197
1196
  index(re2, pos) {
1198
1197
  pos += this.start;
1199
1198
  const i = this.charSequence.indexOf(re2.prefix, pos);
1200
- return i < 0 ? i : i - pos;
1199
+ if (i < 0 || i > this.end - re2.prefix.length) {
1200
+ return -1;
1201
+ }
1202
+ return i - pos;
1201
1203
  }
1202
1204
 
1203
1205
  // Returns a bitmask of EMPTY_* flags.
1204
1206
  context(pos) {
1205
1207
  pos += this.start;
1206
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1207
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1208
+ const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
1209
+ const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
1208
1210
  return Utils.emptyOpContext(r1, r2);
1209
1211
  }
1210
1212
  prefixLength(re2) {
@@ -1392,7 +1394,7 @@
1392
1394
  this.numberOfInstructions = re2.numberOfInstructions();
1393
1395
  if (input instanceof MatcherInputBase) {
1394
1396
  this.resetMatcherInput(input);
1395
- } else if (Array.isArray(input)) {
1397
+ } else if (Utils.isByteArray(input)) {
1396
1398
  this.resetMatcherInput(MatcherInput.utf8(input));
1397
1399
  } else {
1398
1400
  this.resetMatcherInput(MatcherInput.utf16(input));
@@ -1554,10 +1556,7 @@
1554
1556
  if (group === 0 || this.hasGroups) {
1555
1557
  return;
1556
1558
  }
1557
- let end = this.groups[1] + 1;
1558
- if (end > this.matcherInputLength) {
1559
- end = this.matcherInputLength;
1560
- }
1559
+ const end = this.matcherInputLength;
1561
1560
  const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
1562
1561
  const ok = res[0];
1563
1562
  if (!ok) {
@@ -1753,7 +1752,10 @@
1753
1752
  throw new RE2JSGroupException("named capture group is missing trailing '}'");
1754
1753
  }
1755
1754
  const groupName = replacement.substring(i + 1, j);
1756
- res += this.group(groupName);
1755
+ const groupVal = this.group(groupName);
1756
+ if (groupVal !== null) {
1757
+ res += groupVal;
1758
+ }
1757
1759
  last = j + 1;
1758
1760
  i = j;
1759
1761
  continue;
@@ -1799,6 +1801,22 @@
1799
1801
  i++;
1800
1802
  last = i + 1;
1801
1803
  continue;
1804
+ } else if (Codepoint.CODES.get('`') === c) {
1805
+ if (last < i) {
1806
+ res += replacement.substring(last, i);
1807
+ }
1808
+ res += this.substring(0, this.start(0));
1809
+ i++;
1810
+ last = i + 1;
1811
+ continue;
1812
+ } else if (Codepoint.CODES.get("'") === c) {
1813
+ if (last < i) {
1814
+ res += replacement.substring(last, i);
1815
+ }
1816
+ res += this.substring(this.end(0), this.matcherInputLength);
1817
+ i++;
1818
+ last = i + 1;
1819
+ continue;
1802
1820
  } else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
1803
1821
  let n = c - Codepoint.CODES.get('0');
1804
1822
  if (last < i) {
@@ -1841,7 +1859,10 @@
1841
1859
  }
1842
1860
  const groupName = replacement.substring(i + 1, j);
1843
1861
  if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
1844
- res += this.group(groupName);
1862
+ const groupVal = this.group(groupName);
1863
+ if (groupVal !== null) {
1864
+ res += groupVal;
1865
+ }
1845
1866
  } else {
1846
1867
  res += `$<${groupName}>`;
1847
1868
  }
@@ -4343,13 +4364,6 @@
4343
4364
  // start every program with a fail instruction, so we'll never want to point
4344
4365
  // at its output link.
4345
4366
 
4346
- next(l) {
4347
- const i = this.inst[l >> 1];
4348
- if ((l & 1) === 0) {
4349
- return i.out;
4350
- }
4351
- return i.arg;
4352
- }
4353
4367
  patch(l, val) {
4354
4368
  let head = l.head;
4355
4369
  while (head !== 0) {
@@ -5679,6 +5693,7 @@
5679
5693
  case Codepoint.CODES.get('6'):
5680
5694
  case Codepoint.CODES.get('7'):
5681
5695
  {
5696
+ // Single non-zero digit is a backreference; not supported
5682
5697
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
5683
5698
  break;
5684
5699
  }
@@ -5686,6 +5701,7 @@
5686
5701
  // eslint-disable-next-line no-fallthrough
5687
5702
  case Codepoint.CODES.get('0'):
5688
5703
  {
5704
+ // Consume up to three octal digits; already have one.
5689
5705
  let r = c - Codepoint.CODES.get('0');
5690
5706
  for (let i = 1; i < 3; i++) {
5691
5707
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
@@ -7466,7 +7482,11 @@
7466
7482
  */
7467
7483
  matchWithGroup(input, start, end, anchor, ngroup) {
7468
7484
  if (!(input instanceof MatcherInputBase)) {
7469
- input = MatcherInput.utf16(input);
7485
+ if (Utils.isByteArray(input)) {
7486
+ input = MatcherInput.utf8(input);
7487
+ } else {
7488
+ input = MatcherInput.utf16(input);
7489
+ }
7470
7490
  }
7471
7491
  return this.matchMachineInput(input, start, end, anchor, ngroup);
7472
7492
  }
@@ -7972,7 +7992,7 @@
7972
7992
  }
7973
7993
  match(input) {
7974
7994
  if (!this.prog) this.compile();
7975
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7995
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7976
7996
  let internalAnchor = RE2Flags.UNANCHORED;
7977
7997
  if (this.anchor === RE2Set.ANCHOR_START) {
7978
7998
  internalAnchor = RE2Flags.ANCHOR_START;
@@ -8029,45 +8049,50 @@
8029
8049
  {
8030
8050
  if (i + 2 < size) {
8031
8051
  let nextCh = data[i + 2];
8032
- if (TranslateRegExpString.isUpperCaseAlpha(nextCh)) {
8052
+ let code = nextCh.charCodeAt(0);
8053
+ if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
8054
+ let val = code % 32;
8033
8055
  result += '\\x';
8034
- result += (nextCh.charCodeAt(0) - 64 >> 4).toString(16).toUpperCase();
8035
- result += (nextCh.charCodeAt(0) - 64 & 15).toString(16).toUpperCase();
8056
+ result += (val >> 4).toString(16).toUpperCase();
8057
+ result += (val & 15).toString(16).toUpperCase();
8036
8058
  i += 3;
8037
8059
  changed = true;
8038
8060
  continue;
8039
8061
  }
8040
8062
  }
8041
- result += '\\c';
8063
+ result += 'c';
8042
8064
  i += 2;
8065
+ changed = true;
8043
8066
  continue;
8044
8067
  }
8045
8068
  case 'u':
8046
8069
  {
8047
8070
  if (i + 2 < size) {
8048
8071
  let nextCh = data[i + 2];
8049
- if (TranslateRegExpString.isHexadecimal(nextCh)) {
8050
- result += '\\x{' + nextCh;
8051
- i += 3;
8052
- for (let j = 0; j < 3 && i < size; ++i, ++j) {
8053
- nextCh = data[i];
8054
- if (!TranslateRegExpString.isHexadecimal(nextCh)) {
8055
- break;
8056
- }
8057
- result += nextCh;
8058
- }
8059
- result += '}';
8060
- changed = true;
8061
- continue;
8062
- } else if (nextCh === '{') {
8072
+ if (nextCh === '{') {
8063
8073
  result += '\\x';
8064
8074
  i += 2;
8065
8075
  changed = true;
8066
8076
  continue;
8077
+ } else if (i + 5 < size) {
8078
+ let isHex4 = true;
8079
+ for (let j = 0; j < 4; j++) {
8080
+ if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
8081
+ isHex4 = false;
8082
+ break;
8083
+ }
8084
+ }
8085
+ if (isHex4) {
8086
+ result += '\\x{' + data.substring(i + 2, i + 6) + '}';
8087
+ i += 6;
8088
+ changed = true;
8089
+ continue;
8090
+ }
8067
8091
  }
8068
8092
  }
8069
- result += '\\u';
8093
+ result += 'u';
8070
8094
  i += 2;
8095
+ changed = true;
8071
8096
  continue;
8072
8097
  }
8073
8098
  default:
@@ -8302,7 +8327,7 @@
8302
8327
  * @returns {Matcher}
8303
8328
  */
8304
8329
  matcher(input) {
8305
- if (Array.isArray(input)) {
8330
+ if (Utils.isByteArray(input)) {
8306
8331
  input = MatcherInput.utf8(input);
8307
8332
  }
8308
8333
  return new Matcher(this, input);
@@ -8318,7 +8343,7 @@
8318
8343
  * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
8319
8344
  */
8320
8345
  test(input) {
8321
- if (Array.isArray(input)) {
8346
+ if (Utils.isByteArray(input)) {
8322
8347
  // Reuse the existing UTF-8 fast-path method
8323
8348
  return this.re2Input.matchUTF8(input);
8324
8349
  }
@@ -8337,7 +8362,7 @@
8337
8362
  * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
8338
8363
  */
8339
8364
  testExact(input) {
8340
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8365
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8341
8366
  return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
8342
8367
  }
8343
8368