re2js 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.1
6
- * @author Alexey Vasiliev
5
+ * @version v2.2.3
6
+ * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
9
9
  * @license MIT
@@ -100,7 +100,7 @@
100
100
  }
101
101
  class Codepoint {
102
102
  // codePointAt(0)
103
- static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
103
+ static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
104
104
 
105
105
  // convert unicode codepoint to upper case codepoint
106
106
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
@@ -154,10 +154,6 @@
154
154
  getStride(index) {
155
155
  return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
156
156
  }
157
- get(index) {
158
- const i = index * this.SIZE;
159
- return [this.data[i], this.data[i + 1], this.getStride(index)];
160
- }
161
157
  get length() {
162
158
  return this.data.length / this.SIZE;
163
159
  }
@@ -654,6 +650,9 @@
654
650
  static emptyInts() {
655
651
  return [];
656
652
  }
653
+ static isByteArray(input) {
654
+ return Array.isArray(input) || input instanceof Uint8Array;
655
+ }
657
656
 
658
657
  // Returns true iff |c| is an ASCII letter or decimal digit.
659
658
  static isalnum(c) {
@@ -955,7 +954,7 @@
955
954
  * @returns {number[]}
956
955
  */
957
956
  asBytes() {
958
- return this.charSequence.toString().split('').map(s => s.codePointAt(0));
957
+ return Utils.stringToUtf8ByteArray(this.charSequence.toString());
959
958
  }
960
959
 
961
960
  /**
@@ -980,7 +979,7 @@
980
979
  * @returns {Utf8MatcherInput}
981
980
  */
982
981
  static utf8(input) {
983
- if (Array.isArray(input)) {
982
+ if (Utils.isByteArray(input)) {
984
983
  return new Utf8MatcherInput(input);
985
984
  }
986
985
  return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
@@ -1112,10 +1111,10 @@
1112
1111
  if (start < this.start) {
1113
1112
  start = this.start;
1114
1113
  }
1115
- r1 = this.step(start) >> 3;
1114
+ r1 = this.step(start - this.start) >> 3;
1116
1115
  }
1117
1116
  }
1118
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1117
+ const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
1119
1118
  return Utils.emptyOpContext(r1, r2);
1120
1119
  }
1121
1120
 
@@ -1197,14 +1196,17 @@
1197
1196
  index(re2, pos) {
1198
1197
  pos += this.start;
1199
1198
  const i = this.charSequence.indexOf(re2.prefix, pos);
1200
- return i < 0 ? i : i - pos;
1199
+ if (i < 0 || i > this.end - re2.prefix.length) {
1200
+ return -1;
1201
+ }
1202
+ return i - pos;
1201
1203
  }
1202
1204
 
1203
1205
  // Returns a bitmask of EMPTY_* flags.
1204
1206
  context(pos) {
1205
1207
  pos += this.start;
1206
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1207
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1208
+ const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
1209
+ const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
1208
1210
  return Utils.emptyOpContext(r1, r2);
1209
1211
  }
1210
1212
  prefixLength(re2) {
@@ -1392,7 +1394,7 @@
1392
1394
  this.numberOfInstructions = re2.numberOfInstructions();
1393
1395
  if (input instanceof MatcherInputBase) {
1394
1396
  this.resetMatcherInput(input);
1395
- } else if (Array.isArray(input)) {
1397
+ } else if (Utils.isByteArray(input)) {
1396
1398
  this.resetMatcherInput(MatcherInput.utf8(input));
1397
1399
  } else {
1398
1400
  this.resetMatcherInput(MatcherInput.utf16(input));
@@ -1554,10 +1556,7 @@
1554
1556
  if (group === 0 || this.hasGroups) {
1555
1557
  return;
1556
1558
  }
1557
- let end = this.groups[1] + 1;
1558
- if (end > this.matcherInputLength) {
1559
- end = this.matcherInputLength;
1560
- }
1559
+ const end = this.matcherInputLength;
1561
1560
  const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
1562
1561
  const ok = res[0];
1563
1562
  if (!ok) {
@@ -1753,7 +1752,10 @@
1753
1752
  throw new RE2JSGroupException("named capture group is missing trailing '}'");
1754
1753
  }
1755
1754
  const groupName = replacement.substring(i + 1, j);
1756
- res += this.group(groupName);
1755
+ const groupVal = this.group(groupName);
1756
+ if (groupVal !== null) {
1757
+ res += groupVal;
1758
+ }
1757
1759
  last = j + 1;
1758
1760
  i = j;
1759
1761
  continue;
@@ -1799,6 +1801,22 @@
1799
1801
  i++;
1800
1802
  last = i + 1;
1801
1803
  continue;
1804
+ } else if (Codepoint.CODES.get('`') === c) {
1805
+ if (last < i) {
1806
+ res += replacement.substring(last, i);
1807
+ }
1808
+ res += this.substring(0, this.start(0));
1809
+ i++;
1810
+ last = i + 1;
1811
+ continue;
1812
+ } else if (Codepoint.CODES.get("'") === c) {
1813
+ if (last < i) {
1814
+ res += replacement.substring(last, i);
1815
+ }
1816
+ res += this.substring(this.end(0), this.matcherInputLength);
1817
+ i++;
1818
+ last = i + 1;
1819
+ continue;
1802
1820
  } else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
1803
1821
  let n = c - Codepoint.CODES.get('0');
1804
1822
  if (last < i) {
@@ -1841,7 +1859,10 @@
1841
1859
  }
1842
1860
  const groupName = replacement.substring(i + 1, j);
1843
1861
  if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
1844
- res += this.group(groupName);
1862
+ const groupVal = this.group(groupName);
1863
+ if (groupVal !== null) {
1864
+ res += groupVal;
1865
+ }
1845
1866
  } else {
1846
1867
  res += `$<${groupName}>`;
1847
1868
  }
@@ -3347,7 +3368,9 @@
3347
3368
  }
3348
3369
  runes.sort((a, b) => a - b);
3349
3370
  } else {
3350
- runes.push(...inst.runes);
3371
+ for (let j = 0; j < inst.runes.length; j++) {
3372
+ runes.push(inst.runes[j]);
3373
+ }
3351
3374
  }
3352
3375
  onePassRunes[pc] = runes;
3353
3376
  inst.next = new Uint32Array(Math.floor(runes.length / 2) + 1).fill(inst.out);
@@ -4174,7 +4197,9 @@
4174
4197
  return new Prefilter(Prefilter.Type.NONE);
4175
4198
  }
4176
4199
  if (s.type === Prefilter.Type.OR) {
4177
- newSubs.push(...s.subs);
4200
+ for (let j = 0; j < s.subs.length; j++) {
4201
+ newSubs.push(s.subs[j]);
4202
+ }
4178
4203
  } else {
4179
4204
  newSubs.push(s);
4180
4205
  }
@@ -4339,13 +4364,6 @@
4339
4364
  // start every program with a fail instruction, so we'll never want to point
4340
4365
  // at its output link.
4341
4366
 
4342
- next(l) {
4343
- const i = this.inst[l >> 1];
4344
- if ((l & 1) === 0) {
4345
- return i.out;
4346
- }
4347
- return i.arg;
4348
- }
4349
4367
  patch(l, val) {
4350
4368
  let head = l.head;
4351
4369
  while (head !== 0) {
@@ -5675,6 +5693,7 @@
5675
5693
  case Codepoint.CODES.get('6'):
5676
5694
  case Codepoint.CODES.get('7'):
5677
5695
  {
5696
+ // Single non-zero digit is a backreference; not supported
5678
5697
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
5679
5698
  break;
5680
5699
  }
@@ -5682,6 +5701,7 @@
5682
5701
  // eslint-disable-next-line no-fallthrough
5683
5702
  case Codepoint.CODES.get('0'):
5684
5703
  {
5704
+ // Consume up to three octal digits; already have one.
5685
5705
  let r = c - Codepoint.CODES.get('0');
5686
5706
  for (let i = 1; i < 3; i++) {
5687
5707
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
@@ -7462,7 +7482,11 @@
7462
7482
  */
7463
7483
  matchWithGroup(input, start, end, anchor, ngroup) {
7464
7484
  if (!(input instanceof MatcherInputBase)) {
7465
- input = MatcherInput.utf16(input);
7485
+ if (Utils.isByteArray(input)) {
7486
+ input = MatcherInput.utf8(input);
7487
+ } else {
7488
+ input = MatcherInput.utf16(input);
7489
+ }
7466
7490
  }
7467
7491
  return this.matchMachineInput(input, start, end, anchor, ngroup);
7468
7492
  }
@@ -7968,7 +7992,7 @@
7968
7992
  }
7969
7993
  match(input) {
7970
7994
  if (!this.prog) this.compile();
7971
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7995
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7972
7996
  let internalAnchor = RE2Flags.UNANCHORED;
7973
7997
  if (this.anchor === RE2Set.ANCHOR_START) {
7974
7998
  internalAnchor = RE2Flags.ANCHOR_START;
@@ -8025,45 +8049,50 @@
8025
8049
  {
8026
8050
  if (i + 2 < size) {
8027
8051
  let nextCh = data[i + 2];
8028
- if (TranslateRegExpString.isUpperCaseAlpha(nextCh)) {
8052
+ let code = nextCh.charCodeAt(0);
8053
+ if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
8054
+ let val = code % 32;
8029
8055
  result += '\\x';
8030
- result += (nextCh.charCodeAt(0) - 64 >> 4).toString(16).toUpperCase();
8031
- result += (nextCh.charCodeAt(0) - 64 & 15).toString(16).toUpperCase();
8056
+ result += (val >> 4).toString(16).toUpperCase();
8057
+ result += (val & 15).toString(16).toUpperCase();
8032
8058
  i += 3;
8033
8059
  changed = true;
8034
8060
  continue;
8035
8061
  }
8036
8062
  }
8037
- result += '\\c';
8063
+ result += 'c';
8038
8064
  i += 2;
8065
+ changed = true;
8039
8066
  continue;
8040
8067
  }
8041
8068
  case 'u':
8042
8069
  {
8043
8070
  if (i + 2 < size) {
8044
8071
  let nextCh = data[i + 2];
8045
- if (TranslateRegExpString.isHexadecimal(nextCh)) {
8046
- result += '\\x{' + nextCh;
8047
- i += 3;
8048
- for (let j = 0; j < 3 && i < size; ++i, ++j) {
8049
- nextCh = data[i];
8050
- if (!TranslateRegExpString.isHexadecimal(nextCh)) {
8051
- break;
8052
- }
8053
- result += nextCh;
8054
- }
8055
- result += '}';
8056
- changed = true;
8057
- continue;
8058
- } else if (nextCh === '{') {
8072
+ if (nextCh === '{') {
8059
8073
  result += '\\x';
8060
8074
  i += 2;
8061
8075
  changed = true;
8062
8076
  continue;
8077
+ } else if (i + 5 < size) {
8078
+ let isHex4 = true;
8079
+ for (let j = 0; j < 4; j++) {
8080
+ if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
8081
+ isHex4 = false;
8082
+ break;
8083
+ }
8084
+ }
8085
+ if (isHex4) {
8086
+ result += '\\x{' + data.substring(i + 2, i + 6) + '}';
8087
+ i += 6;
8088
+ changed = true;
8089
+ continue;
8090
+ }
8063
8091
  }
8064
8092
  }
8065
- result += '\\u';
8093
+ result += 'u';
8066
8094
  i += 2;
8095
+ changed = true;
8067
8096
  continue;
8068
8097
  }
8069
8098
  default:
@@ -8298,7 +8327,7 @@
8298
8327
  * @returns {Matcher}
8299
8328
  */
8300
8329
  matcher(input) {
8301
- if (Array.isArray(input)) {
8330
+ if (Utils.isByteArray(input)) {
8302
8331
  input = MatcherInput.utf8(input);
8303
8332
  }
8304
8333
  return new Matcher(this, input);
@@ -8314,7 +8343,7 @@
8314
8343
  * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
8315
8344
  */
8316
8345
  test(input) {
8317
- if (Array.isArray(input)) {
8346
+ if (Utils.isByteArray(input)) {
8318
8347
  // Reuse the existing UTF-8 fast-path method
8319
8348
  return this.re2Input.matchUTF8(input);
8320
8349
  }
@@ -8333,7 +8362,7 @@
8333
8362
  * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
8334
8363
  */
8335
8364
  testExact(input) {
8336
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8365
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8337
8366
  return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
8338
8367
  }
8339
8368