re2js 2.3.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -18
- package/build/index.cjs.cjs +106 -36
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +27 -14
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +106 -36
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +106 -36
- package/build/index.umd.js.map +1 -1
- package/package.json +5 -4
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.3.
|
|
5
|
+
* @version v2.3.2
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -633,6 +633,31 @@
|
|
|
633
633
|
}
|
|
634
634
|
}
|
|
635
635
|
|
|
636
|
+
/**
|
|
637
|
+
* Size of the precomputed single-byte lookup table.
|
|
638
|
+
* Covers standard ASCII and Latin-1 characters for fast-path execution.
|
|
639
|
+
*/
|
|
640
|
+
const FAST_PATH_TABLE_SIZE = 256;
|
|
641
|
+
/**
|
|
642
|
+
* Precomputed lookup table for Word Boundary (\b, \B) assertions.
|
|
643
|
+
* * By precomputing the boolean results for standard ASCII word ranges
|
|
644
|
+
* ('a'-'z', 'A'-'Z', '0'-'9', '_'), we completely eliminate 4 logical
|
|
645
|
+
* branches from the NFA's hot execution loop. This prevents costly
|
|
646
|
+
* CPU branch mispredictions when scanning large strings.
|
|
647
|
+
*/
|
|
648
|
+
const WORD_RUNE_TABLE = new Uint8Array(FAST_PATH_TABLE_SIZE);
|
|
649
|
+
for (let i = 0; i < FAST_PATH_TABLE_SIZE; i++) {
|
|
650
|
+
WORD_RUNE_TABLE[i] = 97 <= i && i <= 122 ||
|
|
651
|
+
// 'a' - 'z'
|
|
652
|
+
65 <= i && i <= 90 ||
|
|
653
|
+
// 'A' - 'Z'
|
|
654
|
+
48 <= i && i <= 57 ||
|
|
655
|
+
// '0' - '9'
|
|
656
|
+
i === 95 // '_'
|
|
657
|
+
? 1 : 0;
|
|
658
|
+
}
|
|
659
|
+
let cachedNativeEncoder = null;
|
|
660
|
+
let cachedNativeDecoder = null;
|
|
636
661
|
/**
|
|
637
662
|
* Various constants and helper utilities.
|
|
638
663
|
*/
|
|
@@ -731,12 +756,21 @@
|
|
|
731
756
|
return out;
|
|
732
757
|
}
|
|
733
758
|
|
|
734
|
-
// Returns the array of runes in the specified
|
|
759
|
+
// Returns the array of runes in the specified JS UTF-16 string.
|
|
735
760
|
static stringToRunes(str) {
|
|
736
|
-
|
|
761
|
+
const string = String(str);
|
|
762
|
+
const runes = [];
|
|
763
|
+
let i = 0;
|
|
764
|
+
while (i < string.length) {
|
|
765
|
+
const cp = string.codePointAt(i);
|
|
766
|
+
runes.push(cp);
|
|
767
|
+
// Surrogate pairs (Emojis, etc.) are > 0xFFFF
|
|
768
|
+
i += cp > Unicode.MAX_BMP ? 2 : 1;
|
|
769
|
+
}
|
|
770
|
+
return runes;
|
|
737
771
|
}
|
|
738
772
|
|
|
739
|
-
// Returns the
|
|
773
|
+
// Returns the JS UTF-16 string containing the single rune |r|.
|
|
740
774
|
static runeToString(r) {
|
|
741
775
|
return String.fromCodePoint(r);
|
|
742
776
|
}
|
|
@@ -745,7 +779,7 @@
|
|
|
745
779
|
// during the evaluation of the \b and \B zero-width assertions.
|
|
746
780
|
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
|
|
747
781
|
static isWordRune(r) {
|
|
748
|
-
return
|
|
782
|
+
return r < FAST_PATH_TABLE_SIZE ? WORD_RUNE_TABLE[r] === 1 : false;
|
|
749
783
|
}
|
|
750
784
|
|
|
751
785
|
// emptyOpContext returns the zero-width assertions satisfied at the position
|
|
@@ -758,21 +792,24 @@
|
|
|
758
792
|
static emptyOpContext(r1, r2) {
|
|
759
793
|
let op = 0;
|
|
760
794
|
if (r1 < 0) {
|
|
761
|
-
op |=
|
|
795
|
+
op |= Utils.EMPTY_BEGIN_TEXT | Utils.EMPTY_BEGIN_LINE;
|
|
762
796
|
}
|
|
763
|
-
|
|
764
|
-
|
|
797
|
+
// Hardcode 10 for '\n'
|
|
798
|
+
if (r1 === 10) {
|
|
799
|
+
op |= Utils.EMPTY_BEGIN_LINE;
|
|
765
800
|
}
|
|
766
801
|
if (r2 < 0) {
|
|
767
|
-
op |=
|
|
802
|
+
op |= Utils.EMPTY_END_TEXT | Utils.EMPTY_END_LINE;
|
|
768
803
|
}
|
|
769
|
-
|
|
770
|
-
|
|
804
|
+
|
|
805
|
+
// Hardcode 10 for '\n'
|
|
806
|
+
if (r2 === 10) {
|
|
807
|
+
op |= Utils.EMPTY_END_LINE;
|
|
771
808
|
}
|
|
772
|
-
if (
|
|
773
|
-
op |=
|
|
809
|
+
if (Utils.isWordRune(r1) !== Utils.isWordRune(r2)) {
|
|
810
|
+
op |= Utils.EMPTY_WORD_BOUNDARY;
|
|
774
811
|
} else {
|
|
775
|
-
op |=
|
|
812
|
+
op |= Utils.EMPTY_NO_WORD_BOUNDARY;
|
|
776
813
|
}
|
|
777
814
|
return op;
|
|
778
815
|
}
|
|
@@ -796,9 +833,23 @@
|
|
|
796
833
|
static charCount(codePoint) {
|
|
797
834
|
return codePoint > Unicode.MAX_BMP ? 2 : 1;
|
|
798
835
|
}
|
|
836
|
+
|
|
837
|
+
/**
|
|
838
|
+
* High-speed conversion from TypedArrays to standard JS Arrays.
|
|
839
|
+
* Bypasses the expensive Symbol.iterator overhead of Array.from()
|
|
840
|
+
*/
|
|
841
|
+
static toArray(typedArray) {
|
|
842
|
+
const len = typedArray.length;
|
|
843
|
+
const res = new Array(len);
|
|
844
|
+
for (let i = 0; i < len; i++) {
|
|
845
|
+
res[i] = typedArray[i];
|
|
846
|
+
}
|
|
847
|
+
return res;
|
|
848
|
+
}
|
|
799
849
|
static stringToUtf8ByteArray(str) {
|
|
800
850
|
if (globalThis.TextEncoder) {
|
|
801
|
-
|
|
851
|
+
if (!cachedNativeEncoder) cachedNativeEncoder = new TextEncoder();
|
|
852
|
+
return Utils.toArray(cachedNativeEncoder.encode(str));
|
|
802
853
|
} else {
|
|
803
854
|
// fallback, if no TextEncoder
|
|
804
855
|
let out = [],
|
|
@@ -828,7 +879,9 @@
|
|
|
828
879
|
}
|
|
829
880
|
static utf8ByteArrayToString(bytes) {
|
|
830
881
|
if (globalThis.TextDecoder) {
|
|
831
|
-
|
|
882
|
+
if (!cachedNativeDecoder) cachedNativeDecoder = new TextDecoder('utf-8');
|
|
883
|
+
const view = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
|
|
884
|
+
return cachedNativeDecoder.decode(view);
|
|
832
885
|
} else {
|
|
833
886
|
// fallback, if no TextDecoder
|
|
834
887
|
let out = [],
|
|
@@ -1125,15 +1178,34 @@
|
|
|
1125
1178
|
if (targetLength === 0) {
|
|
1126
1179
|
return fromIndex <= this.end ? fromIndex : -1;
|
|
1127
1180
|
}
|
|
1181
|
+
const firstByte = target[0];
|
|
1128
1182
|
let limit = this.end - targetLength;
|
|
1129
|
-
|
|
1130
|
-
|
|
1183
|
+
// Feature detection: Native TypedArray indexOf (ES2015)
|
|
1184
|
+
const hasNativeIndexOf = typeof source.indexOf === 'function';
|
|
1185
|
+
let i = fromIndex;
|
|
1186
|
+
while (i <= limit) {
|
|
1187
|
+
// Fast-forward to the first matching byte using C++ bindings if available
|
|
1188
|
+
if (hasNativeIndexOf) {
|
|
1189
|
+
i = source.indexOf(firstByte, i);
|
|
1190
|
+
if (i === -1 || i > limit) return -1;
|
|
1191
|
+
} else {
|
|
1192
|
+
// Fallback: Manual loop
|
|
1193
|
+
while (i <= limit && source[i] !== firstByte) i++;
|
|
1194
|
+
if (i > limit) return -1;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
// First byte matches, verify the rest of the target sequence
|
|
1198
|
+
let match = true;
|
|
1199
|
+
for (let j = 1; j < targetLength; j++) {
|
|
1131
1200
|
if (source[i + j] !== target[j]) {
|
|
1201
|
+
match = false;
|
|
1132
1202
|
break;
|
|
1133
|
-
} else if (j === targetLength - 1) {
|
|
1134
|
-
return i;
|
|
1135
1203
|
}
|
|
1136
1204
|
}
|
|
1205
|
+
if (match) {
|
|
1206
|
+
return i;
|
|
1207
|
+
}
|
|
1208
|
+
i++;
|
|
1137
1209
|
}
|
|
1138
1210
|
return -1;
|
|
1139
1211
|
}
|
|
@@ -1205,8 +1277,10 @@
|
|
|
1205
1277
|
// Returns a bitmask of EMPTY_* flags.
|
|
1206
1278
|
context(pos) {
|
|
1207
1279
|
pos += this.start;
|
|
1208
|
-
|
|
1209
|
-
|
|
1280
|
+
|
|
1281
|
+
// We only care about ASCII word runes and newlines for context boundaries
|
|
1282
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.charCodeAt(pos - 1) : -1;
|
|
1283
|
+
const r2 = pos < this.end ? this.charSequence.charCodeAt(pos) : -1;
|
|
1210
1284
|
return Utils.emptyOpContext(r1, r2);
|
|
1211
1285
|
}
|
|
1212
1286
|
prefixLength(re2) {
|
|
@@ -1336,10 +1410,6 @@
|
|
|
1336
1410
|
* @author rsc@google.com (Russ Cox)
|
|
1337
1411
|
*/
|
|
1338
1412
|
|
|
1339
|
-
/**
|
|
1340
|
-
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1341
|
-
*/
|
|
1342
|
-
|
|
1343
1413
|
class Matcher {
|
|
1344
1414
|
/**
|
|
1345
1415
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1377,8 +1447,8 @@
|
|
|
1377
1447
|
}
|
|
1378
1448
|
/**
|
|
1379
1449
|
*
|
|
1380
|
-
* @param {
|
|
1381
|
-
* @param {
|
|
1450
|
+
* @param {RE2JS} pattern
|
|
1451
|
+
* @param {string|number[]|Uint8Array} input
|
|
1382
1452
|
*/
|
|
1383
1453
|
constructor(pattern, input) {
|
|
1384
1454
|
if (pattern === null) {
|
|
@@ -1386,7 +1456,7 @@
|
|
|
1386
1456
|
}
|
|
1387
1457
|
/**
|
|
1388
1458
|
* The pattern being matched.
|
|
1389
|
-
* @type {
|
|
1459
|
+
* @type {RE2JS}
|
|
1390
1460
|
*/
|
|
1391
1461
|
this.patternInput = pattern;
|
|
1392
1462
|
const re2 = this.patternInput.re2();
|
|
@@ -1411,7 +1481,7 @@
|
|
|
1411
1481
|
|
|
1412
1482
|
/**
|
|
1413
1483
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1414
|
-
* @returns {
|
|
1484
|
+
* @returns {RE2JS}
|
|
1415
1485
|
*/
|
|
1416
1486
|
pattern() {
|
|
1417
1487
|
return this.patternInput;
|
|
@@ -1441,7 +1511,7 @@
|
|
|
1441
1511
|
|
|
1442
1512
|
/**
|
|
1443
1513
|
* Resets the {@code Matcher} and changes the input.
|
|
1444
|
-
* @param {
|
|
1514
|
+
* @param {MatcherInputBase} input
|
|
1445
1515
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1446
1516
|
*/
|
|
1447
1517
|
resetMatcherInput(input) {
|
|
@@ -2217,7 +2287,7 @@
|
|
|
2217
2287
|
return Utils.emptyInts();
|
|
2218
2288
|
}
|
|
2219
2289
|
// Use subarray() to create a zero-allocation view before converting
|
|
2220
|
-
return
|
|
2290
|
+
return Utils.toArray(this.matchcap.subarray(0, this.ncap));
|
|
2221
2291
|
}
|
|
2222
2292
|
|
|
2223
2293
|
// alloc() allocates a new thread with the given instruction.
|
|
@@ -3179,7 +3249,7 @@
|
|
|
3179
3249
|
}
|
|
3180
3250
|
|
|
3181
3251
|
// Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
|
|
3182
|
-
const result = ncap === 0 ? [] :
|
|
3252
|
+
const result = ncap === 0 ? [] : Utils.toArray(b.matchcap.subarray(0, ncap));
|
|
3183
3253
|
bitStatePool.push(b);
|
|
3184
3254
|
return result;
|
|
3185
3255
|
}
|
|
@@ -3555,7 +3625,7 @@
|
|
|
3555
3625
|
matchcap[0] = 0;
|
|
3556
3626
|
matchcap[1] = pos;
|
|
3557
3627
|
}
|
|
3558
|
-
return ncap === 0 ? [] :
|
|
3628
|
+
return ncap === 0 ? [] : Utils.toArray(matchcap);
|
|
3559
3629
|
}
|
|
3560
3630
|
case Inst.RUNE:
|
|
3561
3631
|
if (!inst.matchRune(rune)) return null;
|
|
@@ -3604,7 +3674,7 @@
|
|
|
3604
3674
|
}
|
|
3605
3675
|
}
|
|
3606
3676
|
if (!matched) return null;
|
|
3607
|
-
return ncap === 0 ? [] :
|
|
3677
|
+
return ncap === 0 ? [] : Utils.toArray(matchcap);
|
|
3608
3678
|
}
|
|
3609
3679
|
}
|
|
3610
3680
|
|
|
@@ -5358,7 +5428,7 @@
|
|
|
5358
5428
|
return r;
|
|
5359
5429
|
}
|
|
5360
5430
|
lookingAt(s) {
|
|
5361
|
-
return this.
|
|
5431
|
+
return this.str.startsWith(s, this.position);
|
|
5362
5432
|
}
|
|
5363
5433
|
|
|
5364
5434
|
// Returns the rest of the pattern as a Java UTF-16 string.
|