re2js 2.3.1 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -15
- package/build/index.cjs.cjs +101 -27
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +101 -27
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +101 -27
- package/build/index.umd.js.map +1 -1
- package/package.json +5 -4
package/build/index.esm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AAu2CA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;IACE;;;;;;;OAOG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CA2BlB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,EA6BpC;IAvBC;;;OAGG;IACH,cAFU,KAAK,CAEY;IAG3B,qBAAqB;IACrB,mBADW,MAAM,CACqC;IAEtD,uBAAuB;IACvB,QADW,MAAM,EAAE,CACH;IAChB,qCAAqC;IACrC,aADW,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACC;IAClC,qBAAqB;IACrB,sBADW,MAAM,CACqC;IAUxD;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAiBnB;IAbC,qBAAqB;IACrB,uCAAoD;IAEpD,qBAAqB;IACrB,8BAAkB;IAElB,8BAAqB;IAGrB,+BAAsB;IAEtB,+BAAmB;IAIrB;;;;OAIG;IACH,yBAHW,gBAAgB,GACd,OAAO,CASnB;IAHC,2CAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,GAAC,IAAI,CAgBvB;IAED;;;;OAIG;IACH,kBAFa,MAAM,CAAC,MAAM,EAAE,MAAM,GAAC,IAAI,CAAC,CAWvC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAkBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,IAAI,GACT,OAAO,CA4BnB;IAED;;;;;;OAMG;IACH,iBAaC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,sCAgEC;IAED;;;;OAIG;IACH,oCAuGC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AAwsMD;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAAuD;IACvD;;OAEG;IACH,sBAAmC;IACnC;;;OAGG;IACH,yBAAyC;IACzC;;OAEG;IACH,sCAAmE;IACnE;;OAEG;IACH,6BAAiD;IACjD;;OAEG;IACH,2BAA6C;IAE7C;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CAIlB;IAED;;;;;;;;;;OAUG;IACH,6BAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CA2BjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAOnB;IAED;;;;;;;;OAQG;IACH,YAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAUnB;IAED;;;;;;;;OAQG;IACH,iBAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAIlC;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AAtsOD;;GAEG;AACH;CAMC;AAxDD;IACE,8BAA8B;IAC9B,qBADY,MAAM,EAIjB;CACF;AA+DD;;GAEG;AACH;CAMC;AApBD;;GAEG;AACH;CAMC;AAaD;;GAEG;AACH;CAMC;AAjFD;;GAEG;AACH;IACE;;;OAGG;IACH,mBAHW,MAAM,UACN,MAAM,GAAC,IAAI,EAcrB;IAJC,qBAAqB;IACrB,OADW,MAAM,CACC;IAClB,0BAA0B;IAC1B,OADW,MAAM,GAAC,IAAI,CACJ;IAGpB;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,cAFa,MAAM,GAAC,IAAI,CAIvB;CACF;AAqhND;IACE,qBAAqB;IACrB,mBADW,MAAM,CACuB;IACxC,qBAAqB;IACrB,qBADW,MAAM,CAC2B;IAC5C,qBAAqB;IACrB,oBADW,MAAM,CACyB;IAE1C;;;;OAIG;IACH,qBAHW,MAAM,UACN,MAAM,EAiBhB;IAdC,eAAoB;IACpB,gBAAoB;IAQpB,iBAAwB;IACxB,eAAiB;IACjB,kBAAgB;IAChB,gBAAe;IACf;;;;;;aAAoB;IAGtB;;;;;;OAMG;IACH,aAJW,MAAM,GACJ,MAAM,CAoBlB;IAED;;;;OAIG;IACH,WAFa,IAAI,CAahB;IAED;;;;OAIG;IACH,aAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,MAAM,EAAE,CAoBpB;CACF;AAphOD;;GAEG;AACH;IACE,qBAAkD;IAClD,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF;AAm0GD;;GAEG;AACH;IAEI,YAAc;IACd,cAAc;IAGd,eAAe;IACf,gBAAkB;IAClB,cAAc;IAKhB,sBAEC;IAGD,kBAEC;IAID,uBAEC;IAID,sBAOC;IAKD,+BAWC;IAID,oBAoBC;IAeD,8BAYC;IACD,8BAYC;IACD;;;OAGG;IACH,YAFa,MAAM,CAelB;CACF;AArxDD;IACE,gCAA4B;IAC5B,uBASC;IARC,UAAgB;IAChB,0BAA2B;IAC3B,mBAAmB;IACnB,gBAAsB;IACtB,mBAAuB;IACvB,oBAAoB;IACpB,gBAAmB;IACnB,cAAc;IAIhB;;;;aAuCC;IAGD,wBAyDC;IACD,mBAyCC;IAGD,kDA+CC;IAGD,yDA+CC;IAGD,0DAwCC;CACF"}
|
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.3.
|
|
5
|
+
* @version v2.3.2
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -627,6 +627,31 @@ class Unicode {
|
|
|
627
627
|
}
|
|
628
628
|
}
|
|
629
629
|
|
|
630
|
+
/**
|
|
631
|
+
* Size of the precomputed single-byte lookup table.
|
|
632
|
+
* Covers standard ASCII and Latin-1 characters for fast-path execution.
|
|
633
|
+
*/
|
|
634
|
+
const FAST_PATH_TABLE_SIZE = 256;
|
|
635
|
+
/**
|
|
636
|
+
* Precomputed lookup table for Word Boundary (\b, \B) assertions.
|
|
637
|
+
* * By precomputing the boolean results for standard ASCII word ranges
|
|
638
|
+
* ('a'-'z', 'A'-'Z', '0'-'9', '_'), we completely eliminate 4 logical
|
|
639
|
+
* branches from the NFA's hot execution loop. This prevents costly
|
|
640
|
+
* CPU branch mispredictions when scanning large strings.
|
|
641
|
+
*/
|
|
642
|
+
const WORD_RUNE_TABLE = new Uint8Array(FAST_PATH_TABLE_SIZE);
|
|
643
|
+
for (let i = 0; i < FAST_PATH_TABLE_SIZE; i++) {
|
|
644
|
+
WORD_RUNE_TABLE[i] = 97 <= i && i <= 122 ||
|
|
645
|
+
// 'a' - 'z'
|
|
646
|
+
65 <= i && i <= 90 ||
|
|
647
|
+
// 'A' - 'Z'
|
|
648
|
+
48 <= i && i <= 57 ||
|
|
649
|
+
// '0' - '9'
|
|
650
|
+
i === 95 // '_'
|
|
651
|
+
? 1 : 0;
|
|
652
|
+
}
|
|
653
|
+
let cachedNativeEncoder = null;
|
|
654
|
+
let cachedNativeDecoder = null;
|
|
630
655
|
/**
|
|
631
656
|
* Various constants and helper utilities.
|
|
632
657
|
*/
|
|
@@ -725,12 +750,21 @@ class Utils {
|
|
|
725
750
|
return out;
|
|
726
751
|
}
|
|
727
752
|
|
|
728
|
-
// Returns the array of runes in the specified
|
|
753
|
+
// Returns the array of runes in the specified JS UTF-16 string.
|
|
729
754
|
static stringToRunes(str) {
|
|
730
|
-
|
|
755
|
+
const string = String(str);
|
|
756
|
+
const runes = [];
|
|
757
|
+
let i = 0;
|
|
758
|
+
while (i < string.length) {
|
|
759
|
+
const cp = string.codePointAt(i);
|
|
760
|
+
runes.push(cp);
|
|
761
|
+
// Surrogate pairs (Emojis, etc.) are > 0xFFFF
|
|
762
|
+
i += cp > Unicode.MAX_BMP ? 2 : 1;
|
|
763
|
+
}
|
|
764
|
+
return runes;
|
|
731
765
|
}
|
|
732
766
|
|
|
733
|
-
// Returns the
|
|
767
|
+
// Returns the JS UTF-16 string containing the single rune |r|.
|
|
734
768
|
static runeToString(r) {
|
|
735
769
|
return String.fromCodePoint(r);
|
|
736
770
|
}
|
|
@@ -739,7 +773,7 @@ class Utils {
|
|
|
739
773
|
// during the evaluation of the \b and \B zero-width assertions.
|
|
740
774
|
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
|
|
741
775
|
static isWordRune(r) {
|
|
742
|
-
return
|
|
776
|
+
return r < FAST_PATH_TABLE_SIZE ? WORD_RUNE_TABLE[r] === 1 : false;
|
|
743
777
|
}
|
|
744
778
|
|
|
745
779
|
// emptyOpContext returns the zero-width assertions satisfied at the position
|
|
@@ -752,21 +786,24 @@ class Utils {
|
|
|
752
786
|
static emptyOpContext(r1, r2) {
|
|
753
787
|
let op = 0;
|
|
754
788
|
if (r1 < 0) {
|
|
755
|
-
op |=
|
|
789
|
+
op |= Utils.EMPTY_BEGIN_TEXT | Utils.EMPTY_BEGIN_LINE;
|
|
756
790
|
}
|
|
757
|
-
|
|
758
|
-
|
|
791
|
+
// Hardcode 10 for '\n'
|
|
792
|
+
if (r1 === 10) {
|
|
793
|
+
op |= Utils.EMPTY_BEGIN_LINE;
|
|
759
794
|
}
|
|
760
795
|
if (r2 < 0) {
|
|
761
|
-
op |=
|
|
796
|
+
op |= Utils.EMPTY_END_TEXT | Utils.EMPTY_END_LINE;
|
|
762
797
|
}
|
|
763
|
-
|
|
764
|
-
|
|
798
|
+
|
|
799
|
+
// Hardcode 10 for '\n'
|
|
800
|
+
if (r2 === 10) {
|
|
801
|
+
op |= Utils.EMPTY_END_LINE;
|
|
765
802
|
}
|
|
766
|
-
if (
|
|
767
|
-
op |=
|
|
803
|
+
if (Utils.isWordRune(r1) !== Utils.isWordRune(r2)) {
|
|
804
|
+
op |= Utils.EMPTY_WORD_BOUNDARY;
|
|
768
805
|
} else {
|
|
769
|
-
op |=
|
|
806
|
+
op |= Utils.EMPTY_NO_WORD_BOUNDARY;
|
|
770
807
|
}
|
|
771
808
|
return op;
|
|
772
809
|
}
|
|
@@ -790,9 +827,23 @@ class Utils {
|
|
|
790
827
|
static charCount(codePoint) {
|
|
791
828
|
return codePoint > Unicode.MAX_BMP ? 2 : 1;
|
|
792
829
|
}
|
|
830
|
+
|
|
831
|
+
/**
|
|
832
|
+
* High-speed conversion from TypedArrays to standard JS Arrays.
|
|
833
|
+
* Bypasses the expensive Symbol.iterator overhead of Array.from()
|
|
834
|
+
*/
|
|
835
|
+
static toArray(typedArray) {
|
|
836
|
+
const len = typedArray.length;
|
|
837
|
+
const res = new Array(len);
|
|
838
|
+
for (let i = 0; i < len; i++) {
|
|
839
|
+
res[i] = typedArray[i];
|
|
840
|
+
}
|
|
841
|
+
return res;
|
|
842
|
+
}
|
|
793
843
|
static stringToUtf8ByteArray(str) {
|
|
794
844
|
if (globalThis.TextEncoder) {
|
|
795
|
-
|
|
845
|
+
if (!cachedNativeEncoder) cachedNativeEncoder = new TextEncoder();
|
|
846
|
+
return Utils.toArray(cachedNativeEncoder.encode(str));
|
|
796
847
|
} else {
|
|
797
848
|
// fallback, if no TextEncoder
|
|
798
849
|
let out = [],
|
|
@@ -822,7 +873,9 @@ class Utils {
|
|
|
822
873
|
}
|
|
823
874
|
static utf8ByteArrayToString(bytes) {
|
|
824
875
|
if (globalThis.TextDecoder) {
|
|
825
|
-
|
|
876
|
+
if (!cachedNativeDecoder) cachedNativeDecoder = new TextDecoder('utf-8');
|
|
877
|
+
const view = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
|
|
878
|
+
return cachedNativeDecoder.decode(view);
|
|
826
879
|
} else {
|
|
827
880
|
// fallback, if no TextDecoder
|
|
828
881
|
let out = [],
|
|
@@ -1119,15 +1172,34 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1119
1172
|
if (targetLength === 0) {
|
|
1120
1173
|
return fromIndex <= this.end ? fromIndex : -1;
|
|
1121
1174
|
}
|
|
1175
|
+
const firstByte = target[0];
|
|
1122
1176
|
let limit = this.end - targetLength;
|
|
1123
|
-
|
|
1124
|
-
|
|
1177
|
+
// Feature detection: Native TypedArray indexOf (ES2015)
|
|
1178
|
+
const hasNativeIndexOf = typeof source.indexOf === 'function';
|
|
1179
|
+
let i = fromIndex;
|
|
1180
|
+
while (i <= limit) {
|
|
1181
|
+
// Fast-forward to the first matching byte using C++ bindings if available
|
|
1182
|
+
if (hasNativeIndexOf) {
|
|
1183
|
+
i = source.indexOf(firstByte, i);
|
|
1184
|
+
if (i === -1 || i > limit) return -1;
|
|
1185
|
+
} else {
|
|
1186
|
+
// Fallback: Manual loop
|
|
1187
|
+
while (i <= limit && source[i] !== firstByte) i++;
|
|
1188
|
+
if (i > limit) return -1;
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
// First byte matches, verify the rest of the target sequence
|
|
1192
|
+
let match = true;
|
|
1193
|
+
for (let j = 1; j < targetLength; j++) {
|
|
1125
1194
|
if (source[i + j] !== target[j]) {
|
|
1195
|
+
match = false;
|
|
1126
1196
|
break;
|
|
1127
|
-
} else if (j === targetLength - 1) {
|
|
1128
|
-
return i;
|
|
1129
1197
|
}
|
|
1130
1198
|
}
|
|
1199
|
+
if (match) {
|
|
1200
|
+
return i;
|
|
1201
|
+
}
|
|
1202
|
+
i++;
|
|
1131
1203
|
}
|
|
1132
1204
|
return -1;
|
|
1133
1205
|
}
|
|
@@ -1199,8 +1271,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1199
1271
|
// Returns a bitmask of EMPTY_* flags.
|
|
1200
1272
|
context(pos) {
|
|
1201
1273
|
pos += this.start;
|
|
1202
|
-
|
|
1203
|
-
|
|
1274
|
+
|
|
1275
|
+
// We only care about ASCII word runes and newlines for context boundaries
|
|
1276
|
+
const r1 = pos > this.start && pos <= this.end ? this.charSequence.charCodeAt(pos - 1) : -1;
|
|
1277
|
+
const r2 = pos < this.end ? this.charSequence.charCodeAt(pos) : -1;
|
|
1204
1278
|
return Utils.emptyOpContext(r1, r2);
|
|
1205
1279
|
}
|
|
1206
1280
|
prefixLength(re2) {
|
|
@@ -2207,7 +2281,7 @@ class Machine {
|
|
|
2207
2281
|
return Utils.emptyInts();
|
|
2208
2282
|
}
|
|
2209
2283
|
// Use subarray() to create a zero-allocation view before converting
|
|
2210
|
-
return
|
|
2284
|
+
return Utils.toArray(this.matchcap.subarray(0, this.ncap));
|
|
2211
2285
|
}
|
|
2212
2286
|
|
|
2213
2287
|
// alloc() allocates a new thread with the given instruction.
|
|
@@ -3169,7 +3243,7 @@ class Backtracker {
|
|
|
3169
3243
|
}
|
|
3170
3244
|
|
|
3171
3245
|
// Must slice so we don't accidentally leak trailing arrays from previously recycled typed arrays
|
|
3172
|
-
const result = ncap === 0 ? [] :
|
|
3246
|
+
const result = ncap === 0 ? [] : Utils.toArray(b.matchcap.subarray(0, ncap));
|
|
3173
3247
|
bitStatePool.push(b);
|
|
3174
3248
|
return result;
|
|
3175
3249
|
}
|
|
@@ -3545,7 +3619,7 @@ class OnePass {
|
|
|
3545
3619
|
matchcap[0] = 0;
|
|
3546
3620
|
matchcap[1] = pos;
|
|
3547
3621
|
}
|
|
3548
|
-
return ncap === 0 ? [] :
|
|
3622
|
+
return ncap === 0 ? [] : Utils.toArray(matchcap);
|
|
3549
3623
|
}
|
|
3550
3624
|
case Inst.RUNE:
|
|
3551
3625
|
if (!inst.matchRune(rune)) return null;
|
|
@@ -3594,7 +3668,7 @@ class OnePass {
|
|
|
3594
3668
|
}
|
|
3595
3669
|
}
|
|
3596
3670
|
if (!matched) return null;
|
|
3597
|
-
return ncap === 0 ? [] :
|
|
3671
|
+
return ncap === 0 ? [] : Utils.toArray(matchcap);
|
|
3598
3672
|
}
|
|
3599
3673
|
}
|
|
3600
3674
|
|
|
@@ -5348,7 +5422,7 @@ class StringIterator {
|
|
|
5348
5422
|
return r;
|
|
5349
5423
|
}
|
|
5350
5424
|
lookingAt(s) {
|
|
5351
|
-
return this.
|
|
5425
|
+
return this.str.startsWith(s, this.position);
|
|
5352
5426
|
}
|
|
5353
5427
|
|
|
5354
5428
|
// Returns the rest of the pattern as a Java UTF-16 string.
|