re2js 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.2.2
5
+ * @version v2.3.0
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -100,7 +100,7 @@
100
100
  }
101
101
  class Codepoint {
102
102
  // codePointAt(0)
103
- static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
103
+ static CODES = new Map([['\x07', 7], ['\b', 8], ['\t', 9], ['\n', 10], ['\v', 11], ['\f', 12], ['\r', 13], [' ', 32], ['"', 34], ['$', 36], ['&', 38], ["'", 39], ['(', 40], [')', 41], ['*', 42], ['+', 43], ['-', 45], ['.', 46], ['0', 48], ['1', 49], ['2', 50], ['3', 51], ['4', 52], ['5', 53], ['6', 54], ['7', 55], ['8', 56], ['9', 57], [':', 58], ['<', 60], ['>', 62], ['?', 63], ['A', 65], ['B', 66], ['C', 67], ['F', 70], ['P', 80], ['Q', 81], ['U', 85], ['Z', 90], ['[', 91], ['\\', 92], [']', 93], ['^', 94], ['_', 95], ['`', 96], ['a', 97], ['b', 98], ['f', 102], ['i', 105], ['m', 109], ['n', 110], ['r', 114], ['s', 115], ['t', 116], ['v', 118], ['x', 120], ['z', 122], ['{', 123], ['|', 124], ['}', 125]]);
104
104
 
105
105
  // convert unicode codepoint to upper case codepoint
106
106
  // return same codepoint, if cannot do it (or codepoint not have upper variation)
@@ -154,10 +154,6 @@
154
154
  getStride(index) {
155
155
  return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
156
156
  }
157
- get(index) {
158
- const i = index * this.SIZE;
159
- return [this.data[i], this.data[i + 1], this.getStride(index)];
160
- }
161
157
  get length() {
162
158
  return this.data.length / this.SIZE;
163
159
  }
@@ -654,6 +650,9 @@
654
650
  static emptyInts() {
655
651
  return [];
656
652
  }
653
+ static isByteArray(input) {
654
+ return Array.isArray(input) || input instanceof Uint8Array;
655
+ }
657
656
 
658
657
  // Returns true iff |c| is an ASCII letter or decimal digit.
659
658
  static isalnum(c) {
@@ -955,7 +954,7 @@
955
954
  * @returns {number[]}
956
955
  */
957
956
  asBytes() {
958
- return this.charSequence.toString().split('').map(s => s.codePointAt(0));
957
+ return Utils.stringToUtf8ByteArray(this.charSequence.toString());
959
958
  }
960
959
 
961
960
  /**
@@ -980,7 +979,7 @@
980
979
  * @returns {Utf8MatcherInput}
981
980
  */
982
981
  static utf8(input) {
983
- if (Array.isArray(input)) {
982
+ if (Utils.isByteArray(input)) {
984
983
  return new Utf8MatcherInput(input);
985
984
  }
986
985
  return new Utf8MatcherInput(Utils.stringToUtf8ByteArray(input));
@@ -1112,10 +1111,10 @@
1112
1111
  if (start < this.start) {
1113
1112
  start = this.start;
1114
1113
  }
1115
- r1 = this.step(start) >> 3;
1114
+ r1 = this.step(start - this.start) >> 3;
1116
1115
  }
1117
1116
  }
1118
- const r2 = pos < this.end ? this.step(pos) >> 3 : -1;
1117
+ const r2 = pos < this.end ? this.step(pos - this.start) >> 3 : -1;
1119
1118
  return Utils.emptyOpContext(r1, r2);
1120
1119
  }
1121
1120
 
@@ -1197,14 +1196,17 @@
1197
1196
  index(re2, pos) {
1198
1197
  pos += this.start;
1199
1198
  const i = this.charSequence.indexOf(re2.prefix, pos);
1200
- return i < 0 ? i : i - pos;
1199
+ if (i < 0 || i > this.end - re2.prefix.length) {
1200
+ return -1;
1201
+ }
1202
+ return i - pos;
1201
1203
  }
1202
1204
 
1203
1205
  // Returns a bitmask of EMPTY_* flags.
1204
1206
  context(pos) {
1205
1207
  pos += this.start;
1206
- const r1 = pos > 0 && pos <= this.charSequence.length ? this.charSequence.codePointAt(pos - 1) : -1;
1207
- const r2 = pos < this.charSequence.length ? this.charSequence.codePointAt(pos) : -1;
1208
+ const r1 = pos > this.start && pos <= this.end ? this.charSequence.codePointAt(pos - 1) : -1;
1209
+ const r2 = pos < this.end ? this.charSequence.codePointAt(pos) : -1;
1208
1210
  return Utils.emptyOpContext(r1, r2);
1209
1211
  }
1210
1212
  prefixLength(re2) {
@@ -1333,6 +1335,11 @@
1333
1335
  *
1334
1336
  * @author rsc@google.com (Russ Cox)
1335
1337
  */
1338
+
1339
+ /**
1340
+ * @typedef {import('./index').RE2JS} RE2JS_Pattern
1341
+ */
1342
+
1336
1343
  class Matcher {
1337
1344
  /**
1338
1345
  * Quotes '\' and '$' in {@code s}, so that the returned string could be used in
@@ -1370,14 +1377,17 @@
1370
1377
  }
1371
1378
  /**
1372
1379
  *
1373
- * @param {RE2JS} pattern
1374
- * @param {Utf8MatcherInput|Utf16MatcherInput|number[]|string} input
1380
+ * @param {RE2JS_Pattern} pattern
1381
+ * @param {Uint8Array|number[]|string} input
1375
1382
  */
1376
1383
  constructor(pattern, input) {
1377
1384
  if (pattern === null) {
1378
1385
  throw new Error('pattern is null');
1379
1386
  }
1380
- // The pattern being matched.
1387
+ /**
1388
+ * The pattern being matched.
1389
+ * @type {RE2JS_Pattern}
1390
+ */
1381
1391
  this.patternInput = pattern;
1382
1392
  const re2 = this.patternInput.re2();
1383
1393
  // The number of submatches (groups) in the pattern.
@@ -1392,7 +1402,7 @@
1392
1402
  this.numberOfInstructions = re2.numberOfInstructions();
1393
1403
  if (input instanceof MatcherInputBase) {
1394
1404
  this.resetMatcherInput(input);
1395
- } else if (Array.isArray(input)) {
1405
+ } else if (Utils.isByteArray(input)) {
1396
1406
  this.resetMatcherInput(MatcherInput.utf8(input));
1397
1407
  } else {
1398
1408
  this.resetMatcherInput(MatcherInput.utf16(input));
@@ -1401,7 +1411,7 @@
1401
1411
 
1402
1412
  /**
1403
1413
  * Returns the {@code RE2JS} associated with this {@code Matcher}.
1404
- * @returns {RE2JS}
1414
+ * @returns {RE2JS_Pattern}
1405
1415
  */
1406
1416
  pattern() {
1407
1417
  return this.patternInput;
@@ -1431,7 +1441,7 @@
1431
1441
 
1432
1442
  /**
1433
1443
  * Resets the {@code Matcher} and changes the input.
1434
- * @param {Utf8MatcherInput|Utf16MatcherInput} input
1444
+ * @param {import('./MatcherInput').MatcherInputBase} input
1435
1445
  * @returns {Matcher} the {@code Matcher} itself, for chained method calls
1436
1446
  */
1437
1447
  resetMatcherInput(input) {
@@ -1496,7 +1506,7 @@
1496
1506
  /**
1497
1507
  * Returns the named group of the most recent match, or {@code null} if the group was not matched.
1498
1508
  * @param {string|number} [group=0]
1499
- * @returns {?string}
1509
+ * @returns {string|null}
1500
1510
  */
1501
1511
  group(group = 0) {
1502
1512
  if (typeof group === 'string') {
@@ -1554,10 +1564,7 @@
1554
1564
  if (group === 0 || this.hasGroups) {
1555
1565
  return;
1556
1566
  }
1557
- let end = this.groups[1] + 1;
1558
- if (end > this.matcherInputLength) {
1559
- end = this.matcherInputLength;
1560
- }
1567
+ const end = this.matcherInputLength;
1561
1568
  const res = this.patternInput.re2().matchMachineInput(this.matcherInput, this.groups[0], end, this.anchorFlag, 1 + this.patternGroupCount);
1562
1569
  const ok = res[0];
1563
1570
  if (!ok) {
@@ -1591,7 +1598,7 @@
1591
1598
  * Matches the input against the pattern (unanchored), starting at a specified position. If there
1592
1599
  * is a match, {@code find} sets the match state to describe it.
1593
1600
  *
1594
- * @param {number} [start=null] the input position where the search begins
1601
+ * @param {number|null} [start=null] the input position where the search begins
1595
1602
  * @returns {boolean} if it finds a match
1596
1603
  * @throws IndexOutOfBoundsException if start is not a valid input position
1597
1604
  */
@@ -1753,7 +1760,10 @@
1753
1760
  throw new RE2JSGroupException("named capture group is missing trailing '}'");
1754
1761
  }
1755
1762
  const groupName = replacement.substring(i + 1, j);
1756
- res += this.group(groupName);
1763
+ const groupVal = this.group(groupName);
1764
+ if (groupVal !== null) {
1765
+ res += groupVal;
1766
+ }
1757
1767
  last = j + 1;
1758
1768
  i = j;
1759
1769
  continue;
@@ -1799,6 +1809,22 @@
1799
1809
  i++;
1800
1810
  last = i + 1;
1801
1811
  continue;
1812
+ } else if (Codepoint.CODES.get('`') === c) {
1813
+ if (last < i) {
1814
+ res += replacement.substring(last, i);
1815
+ }
1816
+ res += this.substring(0, this.start(0));
1817
+ i++;
1818
+ last = i + 1;
1819
+ continue;
1820
+ } else if (Codepoint.CODES.get("'") === c) {
1821
+ if (last < i) {
1822
+ res += replacement.substring(last, i);
1823
+ }
1824
+ res += this.substring(this.end(0), this.matcherInputLength);
1825
+ i++;
1826
+ last = i + 1;
1827
+ continue;
1802
1828
  } else if (Codepoint.CODES.get('1') <= c && c <= Codepoint.CODES.get('9')) {
1803
1829
  let n = c - Codepoint.CODES.get('0');
1804
1830
  if (last < i) {
@@ -1841,7 +1867,10 @@
1841
1867
  }
1842
1868
  const groupName = replacement.substring(i + 1, j);
1843
1869
  if (Object.prototype.hasOwnProperty.call(this.namedGroups, groupName)) {
1844
- res += this.group(groupName);
1870
+ const groupVal = this.group(groupName);
1871
+ if (groupVal !== null) {
1872
+ res += groupVal;
1873
+ }
1845
1874
  } else {
1846
1875
  res += `$<${groupName}>`;
1847
1876
  }
@@ -4343,13 +4372,6 @@
4343
4372
  // start every program with a fail instruction, so we'll never want to point
4344
4373
  // at its output link.
4345
4374
 
4346
- next(l) {
4347
- const i = this.inst[l >> 1];
4348
- if ((l & 1) === 0) {
4349
- return i.out;
4350
- }
4351
- return i.arg;
4352
- }
4353
4375
  patch(l, val) {
4354
4376
  let head = l.head;
4355
4377
  while (head !== 0) {
@@ -5679,6 +5701,7 @@
5679
5701
  case Codepoint.CODES.get('6'):
5680
5702
  case Codepoint.CODES.get('7'):
5681
5703
  {
5704
+ // Single non-zero digit is a backreference; not supported
5682
5705
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
5683
5706
  break;
5684
5707
  }
@@ -5686,6 +5709,7 @@
5686
5709
  // eslint-disable-next-line no-fallthrough
5687
5710
  case Codepoint.CODES.get('0'):
5688
5711
  {
5712
+ // Consume up to three octal digits; already have one.
5689
5713
  let r = c - Codepoint.CODES.get('0');
5690
5714
  for (let i = 1; i < 3; i++) {
5691
5715
  if (!t.more() || t.peek() < Codepoint.CODES.get('0') || t.peek() > Codepoint.CODES.get('7')) {
@@ -7466,7 +7490,11 @@
7466
7490
  */
7467
7491
  matchWithGroup(input, start, end, anchor, ngroup) {
7468
7492
  if (!(input instanceof MatcherInputBase)) {
7469
- input = MatcherInput.utf16(input);
7493
+ if (Utils.isByteArray(input)) {
7494
+ input = MatcherInput.utf8(input);
7495
+ } else {
7496
+ input = MatcherInput.utf16(input);
7497
+ }
7470
7498
  }
7471
7499
  return this.matchMachineInput(input, start, end, anchor, ngroup);
7472
7500
  }
@@ -7921,9 +7949,18 @@
7921
7949
  }
7922
7950
 
7923
7951
  class RE2Set {
7952
+ /** @type {number} */
7924
7953
  static UNANCHORED = RE2Flags.UNANCHORED;
7954
+ /** @type {number} */
7925
7955
  static ANCHOR_START = RE2Flags.ANCHOR_START;
7956
+ /** @type {number} */
7926
7957
  static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
7958
+
7959
+ /**
7960
+ * Constructs a new RE2Set with the specified anchor mode and flags.
7961
+ * @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
7962
+ * @param {number} [flags=0] - The public flags to apply to all patterns in the set.
7963
+ */
7927
7964
  constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
7928
7965
  this.anchor = anchor;
7929
7966
  this.jsFlags = flags;
@@ -7940,6 +7977,14 @@
7940
7977
  this.dfa = null;
7941
7978
  this.dummyRe2 = null;
7942
7979
  }
7980
+
7981
+ /**
7982
+ * Adds a new regular expression pattern to the set.
7983
+ * Patterns cannot be added after the set has been compiled.
7984
+ * @param {string} pattern - The regular expression pattern to add.
7985
+ * @returns {number} The integer index assigned to the added pattern.
7986
+ * @throws {RE2JSCompileException} If patterns are added after compilation.
7987
+ */
7943
7988
  add(pattern) {
7944
7989
  if (this.prog) {
7945
7990
  throw new RE2JSCompileException('Cannot add patterns after compile');
@@ -7958,6 +8003,12 @@
7958
8003
  this.regexps.push(Simplify.simplify(re));
7959
8004
  return this.regexps.length - 1;
7960
8005
  }
8006
+
8007
+ /**
8008
+ * Compiles the added patterns into a single state machine.
8009
+ * This is automatically called on the first match if not called explicitly.
8010
+ * @returns {void}
8011
+ */
7961
8012
  compile() {
7962
8013
  if (this.prog) return;
7963
8014
  this.prog = Compiler.compileSet(this.regexps);
@@ -7970,9 +8021,15 @@
7970
8021
  longest: false
7971
8022
  };
7972
8023
  }
8024
+
8025
+ /**
8026
+ * Matches the input against the compiled set of regular expressions.
8027
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
8028
+ * @returns {number[]} An array of indices representing the patterns that successfully matched the input.
8029
+ */
7973
8030
  match(input) {
7974
8031
  if (!this.prog) this.compile();
7975
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8032
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
7976
8033
  let internalAnchor = RE2Flags.UNANCHORED;
7977
8034
  if (this.anchor === RE2Set.ANCHOR_START) {
7978
8035
  internalAnchor = RE2Flags.ANCHOR_START;
@@ -7995,13 +8052,19 @@
7995
8052
  * Transform JS regex string to RE2 regex string
7996
8053
  */
7997
8054
  class TranslateRegExpString {
7998
- static isUpperCaseAlpha(ch) {
7999
- return 'A' <= ch && ch <= 'Z';
8000
- }
8001
8055
  static isHexadecimal(ch) {
8002
8056
  return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
8003
8057
  }
8004
8058
  static translate(data) {
8059
+ let prefixFlags = '';
8060
+ if (data instanceof RegExp) {
8061
+ if (data.ignoreCase) prefixFlags += 'i';
8062
+ if (data.multiline) prefixFlags += 'm';
8063
+ if (data.dotAll) prefixFlags += 's';
8064
+
8065
+ // execution flags ('g', 'y') are safely ignored here.
8066
+ data = data.source;
8067
+ }
8005
8068
  if (typeof data !== 'string') {
8006
8069
  return data;
8007
8070
  }
@@ -8012,6 +8075,7 @@
8012
8075
  result = '(?:)';
8013
8076
  changed = true;
8014
8077
  }
8078
+ let inCharClass = false;
8015
8079
  let i = 0;
8016
8080
  while (i < size) {
8017
8081
  let ch = data[i];
@@ -8029,54 +8093,160 @@
8029
8093
  {
8030
8094
  if (i + 2 < size) {
8031
8095
  let nextCh = data[i + 2];
8032
- if (TranslateRegExpString.isUpperCaseAlpha(nextCh)) {
8096
+ let code = nextCh.charCodeAt(0);
8097
+ if (code >= 65 && code <= 90 || code >= 97 && code <= 122) {
8098
+ let val = code % 32;
8033
8099
  result += '\\x';
8034
- result += (nextCh.charCodeAt(0) - 64 >> 4).toString(16).toUpperCase();
8035
- result += (nextCh.charCodeAt(0) - 64 & 15).toString(16).toUpperCase();
8100
+ result += (val >> 4).toString(16).toUpperCase();
8101
+ result += (val & 15).toString(16).toUpperCase();
8036
8102
  i += 3;
8037
8103
  changed = true;
8038
8104
  continue;
8039
8105
  }
8040
8106
  }
8041
- result += '\\c';
8107
+ result += 'c';
8042
8108
  i += 2;
8109
+ changed = true;
8043
8110
  continue;
8044
8111
  }
8045
8112
  case 'u':
8046
8113
  {
8047
8114
  if (i + 2 < size) {
8048
8115
  let nextCh = data[i + 2];
8049
- if (TranslateRegExpString.isHexadecimal(nextCh)) {
8050
- result += '\\x{' + nextCh;
8051
- i += 3;
8052
- for (let j = 0; j < 3 && i < size; ++i, ++j) {
8053
- nextCh = data[i];
8054
- if (!TranslateRegExpString.isHexadecimal(nextCh)) {
8116
+ if (nextCh === '{') {
8117
+ // Must have a closing brace and at least one valid hex digit inside
8118
+ let j = i + 3;
8119
+ let hasHex = false;
8120
+ let closed = false;
8121
+ while (j < size) {
8122
+ const hexChar = data[j];
8123
+ if (hexChar === '}') {
8124
+ closed = true;
8125
+ break;
8126
+ }
8127
+ if (!TranslateRegExpString.isHexadecimal(hexChar)) {
8055
8128
  break;
8056
8129
  }
8057
- result += nextCh;
8130
+ hasHex = true;
8131
+ j++;
8132
+ }
8133
+ if (closed && hasHex) {
8134
+ result += '\\x';
8135
+ i += 2;
8136
+ changed = true;
8137
+ continue;
8138
+ }
8139
+ } else if (i + 5 < size) {
8140
+ let isHex4 = true;
8141
+ for (let j = 0; j < 4; j++) {
8142
+ if (!TranslateRegExpString.isHexadecimal(data[i + 2 + j])) {
8143
+ isHex4 = false;
8144
+ break;
8145
+ }
8146
+ }
8147
+ if (isHex4) {
8148
+ result += '\\x{' + data.substring(i + 2, i + 6) + '}';
8149
+ i += 6;
8150
+ changed = true;
8151
+ continue;
8058
8152
  }
8059
- result += '}';
8060
- changed = true;
8061
- continue;
8062
- } else if (nextCh === '{') {
8063
- result += '\\x';
8064
- i += 2;
8065
- changed = true;
8066
- continue;
8067
8153
  }
8068
8154
  }
8069
- result += '\\u';
8155
+
8156
+ // Graceful degradation for invalid/unclosed \u sequences
8157
+ result += 'u';
8158
+ i += 2;
8159
+ changed = true;
8160
+ continue;
8161
+ }
8162
+ case 'x':
8163
+ {
8164
+ let isValidHex = false;
8165
+ if (i + 2 < size && data[i + 2] === '{') {
8166
+ // Must have a closing brace and at least one valid hex digit inside
8167
+ let j = i + 3;
8168
+ let hasHex = false;
8169
+ let closed = false;
8170
+ while (j < size) {
8171
+ const hexChar = data[j];
8172
+ if (hexChar === '}') {
8173
+ closed = true;
8174
+ break;
8175
+ }
8176
+ if (!TranslateRegExpString.isHexadecimal(hexChar)) {
8177
+ break;
8178
+ }
8179
+ hasHex = true;
8180
+ j++;
8181
+ }
8182
+ if (closed && hasHex) {
8183
+ isValidHex = true;
8184
+ }
8185
+ } else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
8186
+ isValidHex = true;
8187
+ }
8188
+ if (isValidHex) {
8189
+ result += '\\x';
8190
+ i += 2;
8191
+ } else {
8192
+ result += 'x';
8193
+ i += 2;
8194
+ changed = true;
8195
+ }
8196
+ continue;
8197
+ }
8198
+ // Whitelist of valid RE2/JS alphanumeric escapes
8199
+ case 'n':
8200
+ case 'r':
8201
+ case 't':
8202
+ case 'a':
8203
+ case 'f':
8204
+ case 'v':
8205
+ case 'd':
8206
+ case 'D':
8207
+ case 's':
8208
+ case 'S':
8209
+ case 'w':
8210
+ case 'W':
8211
+ case 'b':
8212
+ case 'B':
8213
+ case 'p':
8214
+ case 'P':
8215
+ case 'A':
8216
+ case 'z':
8217
+ case 'Q':
8218
+ case 'E':
8219
+ case '0':
8220
+ case '1':
8221
+ case '2':
8222
+ case '3':
8223
+ case '4':
8224
+ case '5':
8225
+ case '6':
8226
+ case '7':
8227
+ {
8228
+ result += '\\' + ch;
8070
8229
  i += 2;
8071
8230
  continue;
8072
8231
  }
8073
8232
  default:
8074
8233
  {
8075
- result += '\\';
8076
8234
  let cp = data.codePointAt(i + 1);
8077
- let symSize = Utils.charCount(cp);
8078
- result += data.substring(i + 1, i + 1 + symSize);
8079
- i += symSize + 1;
8235
+ let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
8236
+ if (isAlphaNum) {
8237
+ // Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
8238
+ // Gracefully degrade to the literal character to prevent RE2 syntax crashes
8239
+ let symSize = Utils.charCount(cp);
8240
+ result += data.substring(i + 1, i + 1 + symSize);
8241
+ i += symSize + 1;
8242
+ changed = true;
8243
+ } else {
8244
+ // Escaped symbol (e.g. \., \*, \])
8245
+ result += '\\';
8246
+ let symSize = Utils.charCount(cp);
8247
+ result += data.substring(i + 1, i + 1 + symSize);
8248
+ i += symSize + 1;
8249
+ }
8080
8250
  continue;
8081
8251
  }
8082
8252
  }
@@ -8086,7 +8256,13 @@
8086
8256
  i += 1;
8087
8257
  changed = true;
8088
8258
  continue;
8089
- } else if (ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
8259
+ } else if (ch === '[') {
8260
+ // Track entry into a character class (protects syntax inside)
8261
+ inCharClass = true;
8262
+ } else if (ch === ']') {
8263
+ // Track exit of a character class
8264
+ inCharClass = false;
8265
+ } else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
8090
8266
  if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
8091
8267
  result += '(?P<';
8092
8268
  i += 3;
@@ -8099,7 +8275,13 @@
8099
8275
  result += data.substring(i, i + symSize);
8100
8276
  i += symSize;
8101
8277
  }
8102
- return changed ? result : data;
8278
+ const finalResult = changed ? result : data;
8279
+
8280
+ // Append any extracted inline flags
8281
+ if (prefixFlags.length > 0) {
8282
+ return `(?${prefixFlags})${finalResult}`;
8283
+ }
8284
+ return finalResult;
8103
8285
  }
8104
8286
  }
8105
8287
 
@@ -8177,7 +8359,7 @@
8177
8359
  * RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
8178
8360
  * resulting regex is safe and properly formatted before compilation.
8179
8361
  *
8180
- * @param {string} expr - The regular expression string to be translated.
8362
+ * @param {string|RegExp} expr - The regular expression string to be translated.
8181
8363
  * @returns {string} - The transformed regular expression string, ready for compilation.
8182
8364
  */
8183
8365
  static translateRegExp(expr) {
@@ -8221,7 +8403,7 @@
8221
8403
  * Matches a string against a regular expression.
8222
8404
  *
8223
8405
  * @param {string} regex the regular expression
8224
- * @param {string|number[]} input the input
8406
+ * @param {string|number[]|Uint8Array} input the input
8225
8407
  * @returns {boolean} true if the regular expression matches the entire input
8226
8408
  * @throws RE2JSSyntaxException if the regular expression is malformed
8227
8409
  */
@@ -8288,7 +8470,7 @@
8288
8470
  /**
8289
8471
  * Matches a string against a regular expression.
8290
8472
  *
8291
- * @param {string|number[]} input the input
8473
+ * @param {string|number[]|Uint8Array} input the input
8292
8474
  * @returns {boolean} true if the regular expression matches the entire input
8293
8475
  */
8294
8476
  matches(input) {
@@ -8298,11 +8480,11 @@
8298
8480
  /**
8299
8481
  * Creates a new {@code Matcher} matching the pattern against the input.
8300
8482
  *
8301
- * @param {string|number[]} input the input string
8483
+ * @param {string|number[]|Uint8Array} input the input string
8302
8484
  * @returns {Matcher}
8303
8485
  */
8304
8486
  matcher(input) {
8305
- if (Array.isArray(input)) {
8487
+ if (Utils.isByteArray(input)) {
8306
8488
  input = MatcherInput.utf8(input);
8307
8489
  }
8308
8490
  return new Matcher(this, input);
@@ -8314,11 +8496,11 @@
8314
8496
  * a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
8315
8497
  * and guarantees execution on the high-speed DFA engine whenever possible.
8316
8498
  *
8317
- * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
8499
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
8318
8500
  * @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
8319
8501
  */
8320
8502
  test(input) {
8321
- if (Array.isArray(input)) {
8503
+ if (Utils.isByteArray(input)) {
8322
8504
  // Reuse the existing UTF-8 fast-path method
8323
8505
  return this.re2Input.matchUTF8(input);
8324
8506
  }
@@ -8333,11 +8515,11 @@
8333
8515
  * faster because it does not request capture group data. By requesting 0 capture groups,
8334
8516
  * it securely routes execution through the DFA fast-path.
8335
8517
  *
8336
- * @param {string|number[]} input - The input string or UTF-8 byte array to test against.
8518
+ * @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
8337
8519
  * @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
8338
8520
  */
8339
8521
  testExact(input) {
8340
- const machineInput = Array.isArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8522
+ const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
8341
8523
  return this.re2Input.executeEngine(machineInput, 0, RE2Flags.ANCHOR_BOTH, 0) !== null;
8342
8524
  }
8343
8525