re2js 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.2
5
+ * @version v2.3.0
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -94,7 +94,7 @@ for (let i = 0; i < ASCII_SIZE; i++) {
94
94
  }
95
95
  class Codepoint {
96
96
  // codePointAt(0)
97
- static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
97
+ static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
98
98
 
99
99
  // convert unicode codepoint to upper case codepoint
100
100
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
@@ -148,10 +148,6 @@ class UnicodeRangeTable {
148
148
  getStride(index) {
149
149
  return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
150
150
  }
151
- get(index) {
152
- const i = index * this.SIZE;
153
- return [this.data[i], this.data[i + 1], this.getStride(index)];
154
- }
155
151
  get length() {
156
152
  return this.data.length / this.SIZE;
157
153
  }
@@ -648,6 +644,9 @@ class Utils {
648
644
  static emptyInts() {
649
645
  return [];
650
646
  }
647
+ static isByteArray(input) {
648
+ return Array.isArray(input) || input instanceof Uint8Array;
649
+ }
651
650
 
652
651
  // Returns true iff |c| is an ASCII letter or decimal digit.
653
652
  static isalnum(c) {
@@ -949,7 +948,7 @@ class Utf16MatcherInput extends MatcherInputBase {
949
948
  * @returns {number[]}
950
949
  */
951
950
  asBytes() {
952
- return this.charSequence.toString().split('').map(s => s.codePointAt(0));
951
+ return Utils.stringToUtf8ByteArray(this.charSequence.toString());
953
952
  }
954
953
 
955
954
  /**
@@ -974,7 +973,7 @@ class MatcherInput {
974
973
  * @returns {Utf8MatcherInput}
975
974
  */
976
975
  static utf8(input) {
977
- if (Array.isArray(input)) {
976
+ if (Utils.isByteArray(input)) {
978
977
  return new Utf8MatcherInput(input);
979
978
  }
980
979
  return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
@@ -1106,10 +1105,10 @@ class MachineUTF8Input extends MachineInputBase {
1106
1105
  if (start < this.start) {
1107
1106
  start = this.start;
1108
1107
  }
1109
- r1 = this.step(start) >> 3;
1108
+ r1 = this.step(start - this.start) >> 3;
1110
1109
  }
1111
1110
  }
1112
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1111
+ const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
1113
1112
  return Utils.emptyOpContext(r1, r2);
1114
1113
  }
1115
1114
 
@@ -1191,14 +1190,17 @@ class MachineUTF16Input extends MachineInputBase {
1191
1190
  index(re2, pos) {
1192
1191
  pos += this.start;
1193
1192
  const i = this.charSequence.indexOf(re2.prefix, pos);
1194
- return i < 0 ? i : i - pos;
1193
+ if (i < 0 || i > this.end - re2.prefix.length) {
1194
+ return -1;
1195
+ }
1196
+ return i - pos;
1195
1197
  }
1196
1198
 
1197
1199
  // Returns a bitmask of EMPTY_* flags.
1198
1200
  context(pos) {
1199
1201
  pos += this.start;
1200
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1201
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1202
+ const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
1203
+ const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
1202
1204
  return Utils.emptyOpContext(r1, r2);
1203
1205
  }
1204
1206
  prefixLength(re2) {
@@ -1327,6 +1329,11 @@ class RE2JSInternalException extends RE2JSException {
1327
1329
  *
1328
1330
  * @author rsc@google.com (Russ Cox)
1329
1331
  */
1332
+
1333
+ /**
1334
+ * @typedef {import('./index').RE2JS} RE2JS_Pattern
1335
+ */
1336
+
1330
1337
  class Matcher {
1331
1338
  /**
1332
1339
  * Quotes '\' and '$' in {@code s}, so that the returned string could be used in
@@ -1364,14 +1371,17 @@ class Matcher {
1364
1371
  }
1365
1372
  /**
1366
1373
  *
1367
- * @param {RE2JS} pattern
1368
- * @param {Utf8MatcherInput|Utf16MatcherInput|number[]|string} input
1374
+ * @param {RE2JS_Pattern} pattern
1375
+ * @param {Uint8Array|number[]|string} input
1369
1376
  */
1370
1377
  constructor(pattern, input) {
1371
1378
  if (pattern === null) {
1372
1379
  throw new Error('pattern is null');
1373
1380
  }
1374
- // The pattern being matched.
1381
+ /**
1382
+ * The pattern being matched.
1383
+ * @type {RE2JS_Pattern}
1384
+ */
1375
1385
  this.patternInput = pattern;
1376
1386
  const re2 = this.patternInput.re2();
1377
1387
  // The number of submatches (groups) in the pattern.
@@ -1386,7 +1396,7 @@ class Matcher {
1386
1396
  this.numberOfInstructions = re2.numberOfInstructions();
1387
1397
  if (input instanceof MatcherInputBase) {
1388
1398
  this.resetMatcherInput(input);
1389
- } else if (Array.isArray(input)) {
1399
+ } else if (Utils.isByteArray(input)) {
1390
1400
  this.resetMatcherInput(MatcherInput.utf8(input));
1391
1401
  } else {
1392
1402
  this.resetMatcherInput(MatcherInput.utf16(input));
@@ -1395,7 +1405,7 @@ class Matcher {
1395
1405
 
1396
1406
  /**
1397
1407
  * Returns the {@code RE2JS} associated with this {@code Matcher}.
1398
- * @returns {RE2JS}
1408
+ * @returns {RE2JS_Pattern}
1399
1409
  */
1400
1410
  pattern() {
1401
1411
  return this.patternInput;
@@ -1425,7 +1435,7 @@ class Matcher {
1425
1435
 
1426
1436
  /**
1427
1437
  * Resets the {@code Matcher} and changes the input.
1428
- * @param {Utf8MatcherInput|Utf16MatcherInput} input
1438
+ * @param {import('./MatcherInput').MatcherInputBase} input
1429
1439
  * @returns {Matcher} the {@code Matcher} itself, for chained method calls
1430
1440
  */
1431
1441
  resetMatcherInput(input) {
@@ -1490,7 +1500,7 @@ class Matcher {
1490
1500
  /**
1491
1501
  * Returns the named group of the most recent match, or {@code null} if the group was not matched.
1492
1502
  * @param {string|number} [group=0]
1493
- * @returns {?string}
1503
+ * @returns {string|null}
1494
1504
  */
1495
1505
  group(group = 0) {
1496
1506
  if (typeof group === 'string') {
@@ -1548,10 +1558,7 @@ class Matcher {
1548
1558
  if (group === 0 || this.hasGroups) {
1549
1559
  return;
1550
1560
  }
1551
- let end = this.groups[1] + 1;
1552
- if (end > this.matcherInputLength) {
1553
- end = this.matcherInputLength;
1554
- }
1561
+ const end = this.matcherInputLength;
1555
1562
  const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
1556
1563
  const ok = res[0];
1557
1564
  if (!ok) {
@@ -1585,7 +1592,7 @@ class Matcher {
1585
1592
  * Matches the input against the pattern (unanchored), starting at a specified position. If there
1586
1593
  * is a match, {@code find} sets the match state to describe it.
1587
1594
  *
1588
- * @param {number} [start=null] the input position where the search begins
1595
+ * @param {number|null} [start=null] the input position where the search begins
1589
1596
  * @returns {boolean} if it finds a match
1590
1597
  * @throws IndexOutOfBoundsException if start is not a valid input position
1591
1598
  */
@@ -1747,7 +1754,10 @@ class Matcher {
1747
1754
  throw new RE2JSGroupException("named capture group is missing trailing '}'");
1748
1755
  }
1749
1756
  const groupName = replacement.substring(i + 1, j);
1750
- res += this.group(groupName);
1757
+ const groupVal = this.group(groupName);
1758
+ if (groupVal !== null) {
1759
+ res += groupVal;
1760
+ }
1751
1761
  last = j + 1;
1752
1762
  i = j;
1753
1763
  continue;
@@ -1793,6 +1803,22 @@ class Matcher {
1793
1803
  i++;
1794
1804
  last = i + 1;
1795
1805
  continue;
1806
+ } else if (Codepoint.CODES.get('`') === c) {
1807
+ if (last < i) {
1808
+ res += replacement.substring(last, i);
1809
+ }
1810
+ res += this.substring(0, this.start(0));
1811
+ i++;
1812
+ last = i + 1;
1813
+ continue;
1814
+ } else if (Codepoint.CODES.get("'") === c) {
1815
+ if (last < i) {
1816
+ res += replacement.substring(last, i);
1817
+ }
1818
+ res += this.substring(this.end(0), this.matcherInputLength);
1819
+ i++;
1820
+ last = i + 1;
1821
+ continue;
1796
1822
  } else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
1797
1823
  let n = c - Codepoint.CODES.get('0');
1798
1824
  if (last < i) {
@@ -1835,7 +1861,10 @@ class Matcher {
1835
1861
  }
1836
1862
  const groupName = replacement.substring(i + 1, j);
1837
1863
  if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
1838
- res += this.group(groupName);
1864
+ const groupVal = this.group(groupName);
1865
+ if (groupVal !== null) {
1866
+ res += groupVal;
1867
+ }
1839
1868
  } else {
1840
1869
  res += `$<${groupName}>`;
1841
1870
  }
@@ -4337,13 +4366,6 @@ class Prog {
4337
4366
  // start every program with a fail instruction, so we'll never want to point
4338
4367
  // at its output link.
4339
4368
 
4340
- next(l) {
4341
- const i = this.inst[l >> 1];
4342
- if ((l & 1) === 0) {
4343
- return i.out;
4344
- }
4345
- return i.arg;
4346
- }
4347
4369
  patch(l, val) {
4348
4370
  let head = l.head;
4349
4371
  while (head !== 0) {
@@ -5673,6 +5695,7 @@ class Parser {
5673
5695
  case Codepoint.CODES.get('6'):
5674
5696
  case Codepoint.CODES.get('7'):
5675
5697
  {
5698
+ // Single non-zero digit is a backreference; not supported
5676
5699
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
5677
5700
  break;
5678
5701
  }
@@ -5680,6 +5703,7 @@ class Parser {
5680
5703
  // eslint-disable-next-line no-fallthrough
5681
5704
  case Codepoint.CODES.get('0'):
5682
5705
  {
5706
+ // Consume up to three octal digits; already have one.
5683
5707
  let r = c - Codepoint.CODES.get('0');
5684
5708
  for (let i = 1; i < 3; i++) {
5685
5709
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
@@ -7460,7 +7484,11 @@ class RE2 {
7460
7484
  */
7461
7485
  matchWithGroup(input, start, end, anchor, ngroup) {
7462
7486
  if (!(input instanceof MatcherInputBase)) {
7463
- input = MatcherInput.utf16(input);
7487
+ if (Utils.isByteArray(input)) {
7488
+ input = MatcherInput.utf8(input);
7489
+ } else {
7490
+ input = MatcherInput.utf16(input);
7491
+ }
7464
7492
  }
7465
7493
  return this.matchMachineInput(input, start, end, anchor, ngroup);
7466
7494
  }
@@ -7915,9 +7943,18 @@ class RE2 {
7915
7943
  }
7916
7944
 
7917
7945
  class RE2Set {
7946
+ /** @type {number} */
7918
7947
  static UNANCHORED = RE2Flags.UNANCHORED;
7948
+ /** @type {number} */
7919
7949
  static ANCHOR_START = RE2Flags.ANCHOR_START;
7950
+ /** @type {number} */
7920
7951
  static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
7952
+
7953
+ /**
7954
+ * Constructs a new RE2Set with the specified anchor mode and flags.
7955
+ * @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
7956
+ * @param {number} [flags=0] - The public flags to apply to all patterns in the set.
7957
+ */
7921
7958
  constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
7922
7959
  this.anchor = anchor;
7923
7960
  this.jsFlags = flags;
@@ -7934,6 +7971,14 @@ class RE2Set {
7934
7971
  this.dfa = null;
7935
7972
  this.dummyRe2 = null;
7936
7973
  }
7974
+
7975
+ /**
7976
+ * Adds a new regular expression pattern to the set.
7977
+ * Patterns cannot be added after the set has been compiled.
7978
+ * @param {string} pattern - The regular expression pattern to add.
7979
+ * @returns {number} The integer index assigned to the added pattern.
7980
+ * @throws {RE2JSCompileException} If patterns are added after compilation.
7981
+ */
7937
7982
  add(pattern) {
7938
7983
  if (this.prog) {
7939
7984
  throw new RE2JSCompileException('Cannot add patterns after compile');
@@ -7952,6 +7997,12 @@ class RE2Set {
7952
7997
  this.regexps.push(Simplify.simplify(re));
7953
7998
  return this.regexps.length - 1;
7954
7999
  }
8000
+
8001
+ /**
8002
+ * Compiles the added patterns into a single state machine.
8003
+ * This is automatically called on the first match if not called explicitly.
8004
+ * @returns {void}
8005
+ */
7955
8006
  compile() {
7956
8007
  if (this.prog) return;
7957
8008
  this.prog = Compiler.compileSet(this.regexps);
@@ -7964,9 +8015,15 @@ class RE2Set {
7964
8015
  longest: false
7965
8016
  };
7966
8017
  }
8018
+
8019
+ /**
8020
+ * Matches the input against the compiled set of regular expressions.
8021
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
8022
+ * @returns {number[]} An array of indices representing the patterns that successfully matched the input.
8023
+ */
7967
8024
  match(input) {
7968
8025
  if (!this.prog) this.compile();
7969
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8026
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7970
8027
  let internalAnchor = RE2Flags.UNANCHORED;
7971
8028
  if (this.anchor === RE2Set.ANCHOR_START) {
7972
8029
  internalAnchor = RE2Flags.ANCHOR_START;
@@ -7989,13 +8046,19 @@ class RE2Set {
7989
8046
  * Transform JS regex string to RE2 regex string
7990
8047
  */
7991
8048
  class TranslateRegExpString {
7992
- static isUpperCaseAlpha(ch) {
7993
- return 'A' <= ch && ch <= 'Z';
7994
- }
7995
8049
  static isHexadecimal(ch) {
7996
8050
  return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
7997
8051
  }
7998
8052
  static translate(data) {
8053
+ let prefixFlags = '';
8054
+ if (data instanceof RegExp) {
8055
+ if (data.ignoreCase) prefixFlags += 'i';
8056
+ if (data.multiline) prefixFlags += 'm';
8057
+ if (data.dotAll) prefixFlags += 's';
8058
+
8059
+ // execution flags ('g', 'y') are safely ignored here.
8060
+ data = data.source;
8061
+ }
7999
8062
  if (typeof data !== 'string') {
8000
8063
  return data;
8001
8064
  }
@@ -8006,6 +8069,7 @@ class TranslateRegExpString {
8006
8069
  result = '(?:)';
8007
8070
  changed = true;
8008
8071
  }
8072
+ let inCharClass = false;
8009
8073
  let i = 0;
8010
8074
  while (i < size) {
8011
8075
  let ch = data[i];
@@ -8023,54 +8087,160 @@ class TranslateRegExpString {
8023
8087
  {
8024
8088
  if (i + 2 < size) {
8025
8089
  let nextCh = data[i + 2];
8026
- if (TranslateRegExpString.isUpperCaseAlpha(nextCh)) {
8090
+ let code = nextCh.charCodeAt(0);
8091
+ if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
8092
+ let val = code % 32;
8027
8093
  result += '\\x';
8028
- result += (nextCh.charCodeAt(0) - 64 >> 4).toString(16).toUpperCase();
8029
- result += (nextCh.charCodeAt(0) - 64 & 15).toString(16).toUpperCase();
8094
+ result += (val >> 4).toString(16).toUpperCase();
8095
+ result += (val & 15).toString(16).toUpperCase();
8030
8096
  i += 3;
8031
8097
  changed = true;
8032
8098
  continue;
8033
8099
  }
8034
8100
  }
8035
- result += '\\c';
8101
+ result += 'c';
8036
8102
  i += 2;
8103
+ changed = true;
8037
8104
  continue;
8038
8105
  }
8039
8106
  case 'u':
8040
8107
  {
8041
8108
  if (i + 2 < size) {
8042
8109
  let nextCh = data[i + 2];
8043
- if (TranslateRegExpString.isHexadecimal(nextCh)) {
8044
- result += '\\x{' + nextCh;
8045
- i += 3;
8046
- for (let j = 0; j < 3 && i < size; ++i, ++j) {
8047
- nextCh = data[i];
8048
- if (!TranslateRegExpString.isHexadecimal(nextCh)) {
8110
+ if (nextCh === '{') {
8111
+ // Must have a closing brace and at least one valid hex digit inside
8112
+ let j = i + 3;
8113
+ let hasHex = false;
8114
+ let closed = false;
8115
+ while (j < size) {
8116
+ const hexChar = data[j];
8117
+ if (hexChar === '}') {
8118
+ closed = true;
8119
+ break;
8120
+ }
8121
+ if (!TranslateRegExpString.isHexadecimal(hexChar)) {
8049
8122
  break;
8050
8123
  }
8051
- result += nextCh;
8124
+ hasHex = true;
8125
+ j++;
8126
+ }
8127
+ if (closed && hasHex) {
8128
+ result += '\\x';
8129
+ i += 2;
8130
+ changed = true;
8131
+ continue;
8132
+ }
8133
+ } else if (i + 5 < size) {
8134
+ let isHex4 = true;
8135
+ for (let j = 0; j < 4; j++) {
8136
+ if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
8137
+ isHex4 = false;
8138
+ break;
8139
+ }
8140
+ }
8141
+ if (isHex4) {
8142
+ result += '\\x{' + data.substring(i + 2, i + 6) + '}';
8143
+ i += 6;
8144
+ changed = true;
8145
+ continue;
8052
8146
  }
8053
- result += '}';
8054
- changed = true;
8055
- continue;
8056
- } else if (nextCh === '{') {
8057
- result += '\\x';
8058
- i += 2;
8059
- changed = true;
8060
- continue;
8061
8147
  }
8062
8148
  }
8063
- result += '\\u';
8149
+
8150
+ // Graceful degradation for invalid/unclosed \u sequences
8151
+ result += 'u';
8152
+ i += 2;
8153
+ changed = true;
8154
+ continue;
8155
+ }
8156
+ case 'x':
8157
+ {
8158
+ let isValidHex = false;
8159
+ if (i + 2 < size && data[i + 2] === '{') {
8160
+ // Must have a closing brace and at least one valid hex digit inside
8161
+ let j = i + 3;
8162
+ let hasHex = false;
8163
+ let closed = false;
8164
+ while (j < size) {
8165
+ const hexChar = data[j];
8166
+ if (hexChar === '}') {
8167
+ closed = true;
8168
+ break;
8169
+ }
8170
+ if (!TranslateRegExpString.isHexadecimal(hexChar)) {
8171
+ break;
8172
+ }
8173
+ hasHex = true;
8174
+ j++;
8175
+ }
8176
+ if (closed && hasHex) {
8177
+ isValidHex = true;
8178
+ }
8179
+ } else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
8180
+ isValidHex = true;
8181
+ }
8182
+ if (isValidHex) {
8183
+ result += '\\x';
8184
+ i += 2;
8185
+ } else {
8186
+ result += 'x';
8187
+ i += 2;
8188
+ changed = true;
8189
+ }
8190
+ continue;
8191
+ }
8192
+ // Whitelist of valid RE2/JS alphanumeric escapes
8193
+ case 'n':
8194
+ case 'r':
8195
+ case 't':
8196
+ case 'a':
8197
+ case 'f':
8198
+ case 'v':
8199
+ case 'd':
8200
+ case 'D':
8201
+ case 's':
8202
+ case 'S':
8203
+ case 'w':
8204
+ case 'W':
8205
+ case 'b':
8206
+ case 'B':
8207
+ case 'p':
8208
+ case 'P':
8209
+ case 'A':
8210
+ case 'z':
8211
+ case 'Q':
8212
+ case 'E':
8213
+ case '0':
8214
+ case '1':
8215
+ case '2':
8216
+ case '3':
8217
+ case '4':
8218
+ case '5':
8219
+ case '6':
8220
+ case '7':
8221
+ {
8222
+ result += '\\' + ch;
8064
8223
  i += 2;
8065
8224
  continue;
8066
8225
  }
8067
8226
  default:
8068
8227
  {
8069
- result += '\\';
8070
8228
  let cp = data.codePointAt(i + 1);
8071
- let symSize = Utils.charCount(cp);
8072
- result += data.substring(i + 1, i + 1 + symSize);
8073
- i += symSize + 1;
8229
+ let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
8230
+ if (isAlphaNum) {
8231
+ // Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
8232
+ // Gracefully degrade to the literal character to prevent RE2 syntax crashes
8233
+ let symSize = Utils.charCount(cp);
8234
+ result += data.substring(i + 1, i + 1 + symSize);
8235
+ i += symSize + 1;
8236
+ changed = true;
8237
+ } else {
8238
+ // Escaped symbol (e.g. \., \*, \])
8239
+ result += '\\';
8240
+ let symSize = Utils.charCount(cp);
8241
+ result += data.substring(i + 1, i + 1 + symSize);
8242
+ i += symSize + 1;
8243
+ }
8074
8244
  continue;
8075
8245
  }
8076
8246
  }
@@ -8080,7 +8250,13 @@ class TranslateRegExpString {
8080
8250
  i += 1;
8081
8251
  changed = true;
8082
8252
  continue;
8083
- } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
8253
+ } else if (ch === '[') {
8254
+ // Track entry into a character class (protects syntax inside)
8255
+ inCharClass = true;
8256
+ } else if (ch === ']') {
8257
+ // Track exit of a character class
8258
+ inCharClass = false;
8259
+ } else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
8084
8260
  if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
8085
8261
  result += '(?P<';
8086
8262
  i += 3;
@@ -8093,7 +8269,13 @@ class TranslateRegExpString {
8093
8269
  result += data.substring(i, i + symSize);
8094
8270
  i += symSize;
8095
8271
  }
8096
- return changed ? result : data;
8272
+ const finalResult = changed ? result : data;
8273
+
8274
+ // Append any extracted inline flags
8275
+ if (prefixFlags.length > 0) {
8276
+ return `(?${prefixFlags})${finalResult}`;
8277
+ }
8278
+ return finalResult;
8097
8279
  }
8098
8280
  }
8099
8281
 
@@ -8171,7 +8353,7 @@ class RE2JS {
8171
8353
  * RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
8172
8354
  * resulting regex is safe and properly formatted before compilation.
8173
8355
  *
8174
- * @param {string} expr - The regular expression string to be translated.
8356
+ * @param {string|RegExp} expr - The regular expression string to be translated.
8175
8357
  * @returns {string} - The transformed regular expression string, ready for compilation.
8176
8358
  */
8177
8359
  static translateRegExp(expr) {
@@ -8215,7 +8397,7 @@ class RE2JS {
8215
8397
  * Matches a string against a regular expression.
8216
8398
  *
8217
8399
  * @param {string} regex the regular expression
8218
- * @param {string|number[]} input the input
8400
+ * @param {string|number[]|Uint8Array} input the input
8219
8401
  * @returns {boolean} true if the regular expression matches the entire input
8220
8402
  * @throws RE2JSSyntaxException if the regular expression is malformed
8221
8403
  */
@@ -8282,7 +8464,7 @@ class RE2JS {
8282
8464
  /**
8283
8465
  * Matches a string against a regular expression.
8284
8466
  *
8285
- * @param {string|number[]} input the input
8467
+ * @param {string|number[]|Uint8Array} input the input
8286
8468
  * @returns {boolean} true if the regular expression matches the entire input
8287
8469
  */
8288
8470
  matches(input) {
@@ -8292,11 +8474,11 @@ class RE2JS {
8292
8474
  /**
8293
8475
  * Creates a new {@code Matcher} matching the pattern against the input.
8294
8476
  *
8295
- * @param {string|number[]} input the input string
8477
+ * @param {string|number[]|Uint8Array} input the input string
8296
8478
  * @returns {Matcher}
8297
8479
  */
8298
8480
  matcher(input) {
8299
- if (Array.isArray(input)) {
8481
+ if (Utils.isByteArray(input)) {
8300
8482
  input = MatcherInput.utf8(input);
8301
8483
  }
8302
8484
  return new Matcher(this, input);
@@ -8308,11 +8490,11 @@ class RE2JS {
8308
8490
  * a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
8309
8491
  * and guarantees execution on the high-speed DFA engine whenever possible.
8310
8492
  *
8311
- * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
8493
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
8312
8494
  * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
8313
8495
  */
8314
8496
  test(input) {
8315
- if (Array.isArray(input)) {
8497
+ if (Utils.isByteArray(input)) {
8316
8498
  // Reuse the existing UTF-8 fast-path method
8317
8499
  return this.re2Input.matchUTF8(input);
8318
8500
  }
@@ -8327,11 +8509,11 @@ class RE2JS {
8327
8509
  * faster because it does not request capture group data. By requesting 0 capture groups,
8328
8510
  * it securely routes execution through the DFA fast-path.
8329
8511
  *
8330
- * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
8512
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
8331
8513
  * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
8332
8514
  */
8333
8515
  testExact(input) {
8334
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8516
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8335
8517
  return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
8336
8518
  }
8337
8519