re2js 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -25
- package/build/index.cjs.cjs +59 -45
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +59 -45
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +59 -45
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.umd.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.0.
|
|
5
|
+
* @version v2.0.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -365,6 +365,11 @@
|
|
|
365
365
|
// Checked during test.
|
|
366
366
|
static MIN_FOLD = 0x0041;
|
|
367
367
|
static MAX_FOLD = 0x1e943;
|
|
368
|
+
static MIN_HIGH_SURROGATE = 0xd800;
|
|
369
|
+
static MAX_HIGH_SURROGATE = 0xdbff;
|
|
370
|
+
static MIN_LOW_SURROGATE = 0xdc00;
|
|
371
|
+
static MAX_LOW_SURROGATE = 0xdfff;
|
|
372
|
+
static MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
|
|
368
373
|
|
|
369
374
|
// is32 uses binary search to test whether rune is in the specified
|
|
370
375
|
// slice of 32-bit ranges.
|
|
@@ -671,9 +676,9 @@
|
|
|
671
676
|
} else if (c < 2048) {
|
|
672
677
|
out[p++] = c >> 6 | 192;
|
|
673
678
|
out[p++] = c & 63 | 128;
|
|
674
|
-
} else if ((c & 0xfc00) ===
|
|
679
|
+
} else if ((c & 0xfc00) === Unicode.MIN_HIGH_SURROGATE && i + 1 < str.length && (str.charCodeAt(i + 1) & 0xfc00) === Unicode.MIN_LOW_SURROGATE) {
|
|
675
680
|
// Surrogate Pair
|
|
676
|
-
c =
|
|
681
|
+
c = Unicode.MIN_SUPPLEMENTARY_CODE_POINT + ((c & 0x03ff) << 10) + (str.charCodeAt(++i) & 0x03ff);
|
|
677
682
|
out[p++] = c >> 18 | 240;
|
|
678
683
|
out[p++] = c >> 12 & 63 | 128;
|
|
679
684
|
out[p++] = c >> 6 & 63 | 128;
|
|
@@ -707,9 +712,9 @@
|
|
|
707
712
|
let c2 = bytes[pos++];
|
|
708
713
|
let c3 = bytes[pos++];
|
|
709
714
|
let c4 = bytes[pos++];
|
|
710
|
-
let u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) -
|
|
711
|
-
out[c++] = String.fromCharCode(
|
|
712
|
-
out[c++] = String.fromCharCode(
|
|
715
|
+
let u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) - Unicode.MIN_SUPPLEMENTARY_CODE_POINT;
|
|
716
|
+
out[c++] = String.fromCharCode(Unicode.MIN_HIGH_SURROGATE + (u >> 10));
|
|
717
|
+
out[c++] = String.fromCharCode(Unicode.MIN_LOW_SURROGATE + (u & 1023));
|
|
713
718
|
} else {
|
|
714
719
|
let c2 = bytes[pos++];
|
|
715
720
|
let c3 = bytes[pos++];
|
|
@@ -883,38 +888,34 @@
|
|
|
883
888
|
// the lower 3 bits, and the rune (Unicode code point) in the high
|
|
884
889
|
// bits. Never negative, except for EOF which is represented as -1
|
|
885
890
|
// << 3 | 0.
|
|
886
|
-
step(
|
|
887
|
-
|
|
888
|
-
if (
|
|
891
|
+
step(pos) {
|
|
892
|
+
pos += this.start;
|
|
893
|
+
if (pos >= this.end) {
|
|
889
894
|
return MachineInputBase.EOF();
|
|
890
895
|
}
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
896
|
+
|
|
897
|
+
// Read UTF-8 bytes to extract the Rune and its width
|
|
898
|
+
const c = this.bytes[pos] & 0xff;
|
|
899
|
+
if (c < 0x80) {
|
|
900
|
+
return c << 3 | 1;
|
|
901
|
+
} else if (c >= 0xc2 && c <= 0xdf && pos + 1 < this.end) {
|
|
902
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
903
|
+
const rune = (c & 0x1f) << 6 | c1 & 0x3f;
|
|
904
|
+
return rune << 3 | 2;
|
|
905
|
+
} else if (c >= 0xe0 && c <= 0xef && pos + 2 < this.end) {
|
|
906
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
907
|
+
const c2 = this.bytes[pos + 2] & 0xff;
|
|
908
|
+
const rune = (c & 0x0f) << 12 | (c1 & 0x3f) << 6 | c2 & 0x3f;
|
|
909
|
+
return rune << 3 | 3;
|
|
910
|
+
} else if (c >= 0xf0 && c <= 0xf4 && pos + 3 < this.end) {
|
|
911
|
+
const c1 = this.bytes[pos + 1] & 0xff;
|
|
912
|
+
const c2 = this.bytes[pos + 2] & 0xff;
|
|
913
|
+
const c3 = this.bytes[pos + 3] & 0xff;
|
|
914
|
+
const rune = (c & 0x07) << 18 | (c1 & 0x3f) << 12 | (c2 & 0x3f) << 6 | c3 & 0x3f;
|
|
915
|
+
return rune << 3 | 4;
|
|
909
916
|
} else {
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
return MachineInputBase.EOF();
|
|
913
|
-
}
|
|
914
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
915
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
916
|
-
x = x << 6 | this.bytes[i++] & 63;
|
|
917
|
-
return x << 3 | 4;
|
|
917
|
+
// Invalid sequence fallback
|
|
918
|
+
return c << 3 | 1;
|
|
918
919
|
}
|
|
919
920
|
}
|
|
920
921
|
|
|
@@ -989,12 +990,25 @@
|
|
|
989
990
|
// << 3 | 0.
|
|
990
991
|
step(pos) {
|
|
991
992
|
pos += this.start;
|
|
992
|
-
if (pos
|
|
993
|
-
const rune = this.charSequence.codePointAt(pos);
|
|
994
|
-
return rune << 3 | Utils.charCount(rune);
|
|
995
|
-
} else {
|
|
993
|
+
if (pos >= this.end) {
|
|
996
994
|
return MachineInputBase.EOF();
|
|
997
995
|
}
|
|
996
|
+
const c1 = this.charSequence.charCodeAt(pos);
|
|
997
|
+
|
|
998
|
+
// Fast path: standard BMP character (not a high surrogate)
|
|
999
|
+
if (c1 < Unicode.MIN_HIGH_SURROGATE || c1 > Unicode.MAX_HIGH_SURROGATE || pos + 1 >= this.end) {
|
|
1000
|
+
return c1 << 3 | 1;
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
// Slow path: Calculate surrogate pair manually
|
|
1004
|
+
const c2 = this.charSequence.charCodeAt(pos + 1);
|
|
1005
|
+
if (c2 >= Unicode.MIN_LOW_SURROGATE && c2 <= Unicode.MAX_LOW_SURROGATE) {
|
|
1006
|
+
const rune = (c1 - Unicode.MIN_HIGH_SURROGATE) * 0x400 + (c2 - Unicode.MIN_LOW_SURROGATE) + Unicode.MIN_SUPPLEMENTARY_CODE_POINT;
|
|
1007
|
+
return rune << 3 | 2;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
// Invalid surrogate pair fallback
|
|
1011
|
+
return c1 << 3 | 1;
|
|
998
1012
|
}
|
|
999
1013
|
|
|
1000
1014
|
// Returns the index relative to |pos| at which |re2.prefix| is found
|
|
@@ -1742,7 +1756,7 @@
|
|
|
1742
1756
|
let lo = 0;
|
|
1743
1757
|
let hi = this.runes.length / 2 | 0;
|
|
1744
1758
|
while (lo < hi) {
|
|
1745
|
-
const m = lo + ((hi - lo) / 2 | 0)
|
|
1759
|
+
const m = lo + hi >> 1; // native cpu instruction for "lo + (((hi - lo) / 2) | 0)"
|
|
1746
1760
|
const c = this.runes[2 * m];
|
|
1747
1761
|
if (c <= r) {
|
|
1748
1762
|
if (r <= this.runes[2 * m + 1]) {
|
|
@@ -1803,10 +1817,10 @@
|
|
|
1803
1817
|
// A queue is a 'sparse array' holding pending threads of execution. See:
|
|
1804
1818
|
// research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
|
|
1805
1819
|
class Queue {
|
|
1806
|
-
constructor() {
|
|
1807
|
-
this.sparse =
|
|
1808
|
-
this.densePcs =
|
|
1809
|
-
this.denseThreads =
|
|
1820
|
+
constructor(numInst) {
|
|
1821
|
+
this.sparse = new Int32Array(numInst); // may contain stale but in-bounds values.
|
|
1822
|
+
this.densePcs = new Int32Array(numInst); // may contain stale pc in slots >= size
|
|
1823
|
+
this.denseThreads = new Array(numInst); // may contain stale Thread in slots >= size
|
|
1810
1824
|
this.size = 0;
|
|
1811
1825
|
}
|
|
1812
1826
|
contains(pc) {
|
|
@@ -2307,7 +2321,7 @@
|
|
|
2307
2321
|
if (width === 0) {
|
|
2308
2322
|
break;
|
|
2309
2323
|
}
|
|
2310
|
-
currentState = this.step(currentState, rune, anchor);
|
|
2324
|
+
currentState = anchor === RE2Flags.UNANCHORED && rune <= Unicode.MAX_ASCII && currentState.nextAscii[rune] || this.step(currentState, rune, anchor);
|
|
2311
2325
|
|
|
2312
2326
|
// If we hit an unrecoverable DFA error or bailout, signal fallback
|
|
2313
2327
|
if (currentState === null) return null;
|