re2js 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -25
- package/build/index.cjs.cjs +59 -45
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +59 -45
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +59 -45
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.esm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AAwlCA;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH;IACE;;;;;;;OAOG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CA2BlB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,gBAAgB,GAAC,iBAAiB,GAAC,MAAM,EAAE,GAAC,MAAM,EA0B5D;IAnBC,oBAA2B;IAG3B,qBAAqB;IACrB,mBADW,MAAM,CACqC;IAEtD,uBAAuB;IACvB,QADW,MAAM,EAAE,CACH;IAChB,qCAAqC;IACrC,aADW,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACC;IAClC,qBAAqB;IACrB,sBADW,MAAM,CACqC;IAUxD;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAiBnB;IAbC,qBAAqB;IACrB,uCAAoD;IAEpD,qBAAqB;IACrB,8BAAkB;IAElB,8BAAqB;IAGrB,+BAAsB;IAEtB,+BAAmB;IAIrB;;;;OAIG;IACH,yBAHW,gBAAgB,GAAC,iBAAiB,GAChC,OAAO,CASnB;IAHC,+DAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACV,MAAM,OAAA,CAgBnB;IACD;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAqBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GACJ,OAAO,CAoBnB;IAED;;;;;;OAMG;IACH,iBAWC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,sCA2DC;IAED;;;;OAIG;IACH,oCAiFC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AAq5ID;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAA4B;IAC5B;;OAEG;IACH,sBAAkB;IAClB;;;OAGG;IACH,yBAAqB;IACrB;;OAEG;IACH,sCAAkC;IAClC;;OAEG;IACH,6BAA0B;IAE1B;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CAIlB;IAED;;;;;;;;;;OAUG;IACH,6BAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CAwBjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAOnB;IAED;;;;;;;;OAQG;IACH,YAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAUnB;IAED;;;;;;;;OAQG;IACH,iBAHW,MAAM,GAAC,MAAM,EAAE,GACb,OAAO,CAKnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAIlC;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AA10KD;;GAEG;AACH;CAMC;AAxDD;IACE,8BAA8B;IAC9B,qBADY,MAAM,EAIjB;CACF;AA+DD;;GAEG;AACH;CAMC;AApBD;;GAEG;AACH;CAMC;AA3DD;;GAEG;AACH;IACE;;;OAGG;IACH,mBAHW,MAAM,UACN,MAAM,GAAC,IAAI,EAcrB;IAJC,qBAAqB;IACrB,OADW,MAAM,CACC;IAClB,0BAA0B;IAC1B,OADW,MAAM,GAAC,IAAI,CACJ;IAGpB;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,cAFa,MAAM,GAAC,IAAI,CAIvB;CACF;AA3TD;IACE,0BAGC;IADC,WAAkB;IAEpB,mBAEC;IACD;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AACD;IACE,iCAGC;IADC,kBAAgC;IAElC,mBAEC;IAED;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,EAAE,CAIpB;IAED;;;OAGG;IACH,UAFa,MAAM,CAIlB;CACF;AAzFD;;GAEG;AACH;IACE,8BAAkD;IAClD,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF"}
|
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.0.
|
|
5
|
+
* @version v2.0.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -359,6 +359,11 @@ class Unicode {
|
|
|
359
359
|
// Checked during test.
|
|
360
360
|
static MIN_FOLD = 0x0041;
|
|
361
361
|
static MAX_FOLD = 0x1e943;
|
|
362
|
+
static MIN_HIGH_SURROGATE = 0xd800;
|
|
363
|
+
static MAX_HIGH_SURROGATE = 0xdbff;
|
|
364
|
+
static MIN_LOW_SURROGATE = 0xdc00;
|
|
365
|
+
static MAX_LOW_SURROGATE = 0xdfff;
|
|
366
|
+
static MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
|
|
362
367
|
|
|
363
368
|
// is32 uses binary search to test whether rune is in the specified
|
|
364
369
|
// slice of 32-bit ranges.
|
|
@@ -665,9 +670,9 @@ class Utils {
|
|
|
665
670
|
} else if (c < 2048) {
|
|
666
671
|
out[p++] = c >> 6 | 192;
|
|
667
672
|
out[p++] = c & 63 | 128;
|
|
668
|
-
} else if ((c & 0xfc00) ===
|
|
673
|
+
} else if ((c & 0xfc00) === Unicode.MIN_HIGH_SURROGATE && i + 1 < str.length && (str.charCodeAt(i + 1) & 0xfc00) === Unicode.MIN_LOW_SURROGATE) {
|
|
669
674
|
// Surrogate Pair
|
|
670
|
-
c =
|
|
675
|
+
c = Unicode.MIN_SUPPLEMENTARY_CODE_POINT + ((c & 0x03ff) << 10) + (str.charCodeAt(++i) & 0x03ff);
|
|
671
676
|
out[p++] = c >> 18 | 240;
|
|
672
677
|
out[p++] = c >> 12 & 63 | 128;
|
|
673
678
|
out[p++] = c >> 6 & 63 | 128;
|
|
@@ -701,9 +706,9 @@ class Utils {
|
|
|
701
706
|
let c2 = bytes[pos++];
|
|
702
707
|
let c3 = bytes[pos++];
|
|
703
708
|
let c4 = bytes[pos++];
|
|
704
|
-
let u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) -
|
|
705
|
-
out[c++] = String.fromCharCode(
|
|
706
|
-
out[c++] = String.fromCharCode(
|
|
709
|
+
let u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) - Unicode.MIN_SUPPLEMENTARY_CODE_POINT;
|
|
710
|
+
out[c++] = String.fromCharCode(Unicode.MIN_HIGH_SURROGATE + (u >> 10));
|
|
711
|
+
out[c++] = String.fromCharCode(Unicode.MIN_LOW_SURROGATE + (u & 1023));
|
|
707
712
|
} else {
|
|
708
713
|
let c2 = bytes[pos++];
|
|
709
714
|
let c3 = bytes[pos++];
|
|
@@ -877,38 +882,34 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
877
882
|
// the lower 3 bits, and the rune (Unicode code point) in the high
|
|
878
883
|
// bits. Never negative, except for EOF which is represented as -1
|
|
879
884
|
// << 3 | 0.
|
|
880
|
-
step(
|
|
881
|
-
|
|
882
|
-
if (
|
|
885
|
+
step(pos) {
|
|
886
|
+
pos += this.start;
|
|
887
|
+
if (pos >= this.end) {
|
|
883
888
|
return MachineInputBase.EOF();
|
|
884
889
|
}
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
890
|
+
|
|
891
|
+
// Read UTF-8 bytes to extract the Rune and its width
|
|
892
|
+
const c = this.bytes[pos] & 0xff;
|
|
893
|
+
if (c < 0x80) {
|
|
894
|
+
return c << 3 | 1;
|
|
895
|
+
} else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
|
|
896
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
897
|
+
const rune = (c & 0x1f) << 6 | c1 & 0x3f;
|
|
898
|
+
return rune << 3 | 2;
|
|
899
|
+
} else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
|
|
900
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
901
|
+
const c2 = this.bytes[pos + 2] & 0xff;
|
|
902
|
+
const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
|
|
903
|
+
return rune << 3 | 3;
|
|
904
|
+
} else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
|
|
905
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
906
|
+
const c2 = this.bytes[pos + 2] & 0xff;
|
|
907
|
+
const c3 = this.bytes[pos + 3] & 0xff;
|
|
908
|
+
const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
|
|
909
|
+
return rune << 3 | 4;
|
|
903
910
|
} else {
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
return MachineInputBase.EOF();
|
|
907
|
-
}
|
|
908
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
909
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
910
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
911
|
-
return x << 3 | 4;
|
|
911
|
+
// Invalid sequence fallback
|
|
912
|
+
return c << 3 | 1;
|
|
912
913
|
}
|
|
913
914
|
}
|
|
914
915
|
|
|
@@ -983,12 +984,25 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
983
984
|
// << 3 | 0.
|
|
984
985
|
step(pos) {
|
|
985
986
|
pos += this.start;
|
|
986
|
-
if (pos
|
|
987
|
-
const rune = this.charSequence.codePointAt(pos);
|
|
988
|
-
return rune << 3 | Utils.charCount(rune);
|
|
989
|
-
} else {
|
|
987
|
+
if (pos >= this.end) {
|
|
990
988
|
return MachineInputBase.EOF();
|
|
991
989
|
}
|
|
990
|
+
const c1 = this.charSequence.charCodeAt(pos);
|
|
991
|
+
|
|
992
|
+
// Fast path: standard BMP character (not a high surrogate)
|
|
993
|
+
if (c1 < Unicode.MIN_HIGH_SURROGATE || c1 > Unicode.MAX_HIGH_SURROGATE || pos + 1 >= this.end) {
|
|
994
|
+
return c1 << 3 | 1;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// Slow path: Calculate surrogate pair manually
|
|
998
|
+
const c2 = this.charSequence.charCodeAt(pos + 1);
|
|
999
|
+
if (c2 >= Unicode.MIN_LOW_SURROGATE && c2 <= Unicode.MAX_LOW_SURROGATE) {
|
|
1000
|
+
const rune = (c1 - Unicode.MIN_HIGH_SURROGATE) * 0x400 + (c2 - Unicode.MIN_LOW_SURROGATE) + Unicode.MIN_SUPPLEMENTARY_CODE_POINT;
|
|
1001
|
+
return rune << 3 | 2;
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// Invalid surrogate pair fallback
|
|
1005
|
+
return c1 << 3 | 1;
|
|
992
1006
|
}
|
|
993
1007
|
|
|
994
1008
|
// Returns the index relative to |pos| at which |re2.prefix| is found
|
|
@@ -1736,7 +1750,7 @@ class Inst {
|
|
|
1736
1750
|
let lo = 0;
|
|
1737
1751
|
let hi = this.runes.length / 2 | 0;
|
|
1738
1752
|
while (lo < hi) {
|
|
1739
|
-
const m = lo + ((hi - lo) / 2 | 0)
|
|
1753
|
+
const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
|
|
1740
1754
|
const c = this.runes[2 * m];
|
|
1741
1755
|
if (c <= r) {
|
|
1742
1756
|
if (r <= this.runes[2 * m + 1]) {
|
|
@@ -1797,10 +1811,10 @@ class Thread {
|
|
|
1797
1811
|
// A queue is a 'sparse array' holding pending threads of execution. See:
|
|
1798
1812
|
// research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
|
|
1799
1813
|
class Queue {
|
|
1800
|
-
constructor() {
|
|
1801
|
-
this.sparse =
|
|
1802
|
-
this.densePcs =
|
|
1803
|
-
this.denseThreads =
|
|
1814
|
+
constructor(numInst) {
|
|
1815
|
+
this.sparse = new Int32Array(numInst); // may contain stale but in-bounds values.
|
|
1816
|
+
this.densePcs = new Int32Array(numInst); // may contain stale pc in slots >= size
|
|
1817
|
+
this.denseThreads = new Array(numInst); // may contain stale Thread in slots >= size
|
|
1804
1818
|
this.size = 0;
|
|
1805
1819
|
}
|
|
1806
1820
|
contains(pc) {
|
|
@@ -2301,7 +2315,7 @@ class DFA {
|
|
|
2301
2315
|
if (width === 0) {
|
|
2302
2316
|
break;
|
|
2303
2317
|
}
|
|
2304
|
-
currentState = this.step(currentState, rune, anchor);
|
|
2318
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2305
2319
|
|
|
2306
2320
|
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2307
2321
|
if (currentState === null) return null;
|