tldts 7.0.32 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +168 -59
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/src/data/trie.js +9 -11
- package/dist/cjs/src/data/trie.js.map +1 -1
- package/dist/cjs/src/suffix-trie.js +143 -40
- package/dist/cjs/src/suffix-trie.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/index.js +6 -6
- package/dist/es6/index.js.map +1 -1
- package/dist/es6/src/data/trie.js +8 -10
- package/dist/es6/src/data/trie.js.map +1 -1
- package/dist/es6/src/suffix-trie.js +144 -41
- package/dist/es6/src/suffix-trie.js.map +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/index.cjs.min.js +1 -1
- package/dist/index.cjs.min.js.map +1 -1
- package/dist/index.esm.min.js +1 -1
- package/dist/index.esm.min.js.map +1 -1
- package/dist/index.umd.min.js +1 -1
- package/dist/index.umd.min.js.map +1 -1
- package/dist/types/src/data/trie.d.ts +7 -5
- package/dist/types/src/suffix-trie.d.ts +1 -1
- package/index.ts +6 -6
- package/package.json +4 -4
- package/src/data/trie.ts +8 -14
- package/src/suffix-trie.ts +160 -63
package/src/suffix-trie.ts
CHANGED
|
@@ -1,60 +1,174 @@
|
|
|
1
|
+
// NOTE: kept (intentionally) near-identical to packages/tldts-icann/src/suffix-trie.ts.
|
|
2
|
+
// They are separate copies rather than a shared helper because the lookup is
|
|
3
|
+
// only fast when the typed arrays are module-scope monomorphic globals —
|
|
4
|
+
// closing over them (a shared factory) measured ~20% slower. The ICANN build
|
|
5
|
+
// also specializes (constant mask, no isIcann/isPrivate). Keep the two in sync.
|
|
1
6
|
import {
|
|
2
7
|
fastPathLookup,
|
|
3
8
|
IPublicSuffix,
|
|
4
9
|
ISuffixLookupOptions,
|
|
5
10
|
} from 'tldts-core';
|
|
6
|
-
import {
|
|
11
|
+
import {
|
|
12
|
+
edgeChild,
|
|
13
|
+
edgeLength,
|
|
14
|
+
edgeStart,
|
|
15
|
+
exceptionsRoot,
|
|
16
|
+
labelText,
|
|
17
|
+
nodeFlags,
|
|
18
|
+
rulesRoot,
|
|
19
|
+
} from './data/trie';
|
|
7
20
|
|
|
8
|
-
// Flags used to know if a rule is ICANN or Private
|
|
9
21
|
const enum RULE_TYPE {
|
|
10
22
|
ICANN = 1,
|
|
11
23
|
PRIVATE = 2,
|
|
12
24
|
}
|
|
13
25
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
26
|
+
// `edgeOffset` (where each label starts in `labelText`), `edgeHash` (djb2 of
|
|
27
|
+
// each label) and `wildcardEdge` (each node's '*' edge, or -1) are derived once
|
|
28
|
+
// at load instead of being shipped: the bundle then carries only the
|
|
29
|
+
// compressible `labelText` + structure, while the lookup binary-searches
|
|
30
|
+
// integer hashes. The cost is a single ~1ms pass at first import — cheaper than
|
|
31
|
+
// the object trie it replaces. Kept at module scope (not captured in a closure)
|
|
32
|
+
// so V8 treats the typed arrays as fast monomorphic globals.
|
|
33
|
+
const numberOfNodes = nodeFlags.length;
|
|
34
|
+
const numberOfEdges = edgeLength.length;
|
|
35
|
+
const edgeOffset = new Uint32Array(numberOfEdges);
|
|
36
|
+
const edgeHash = new Uint32Array(numberOfEdges);
|
|
37
|
+
const wildcardEdge = new Int32Array(numberOfNodes).fill(-1);
|
|
38
|
+
for (let node = 0, offset = 0; node < numberOfNodes; node += 1) {
|
|
39
|
+
for (let edge = edgeStart[node]!; edge < edgeStart[node + 1]!; edge += 1) {
|
|
40
|
+
edgeOffset[edge] = offset;
|
|
41
|
+
const end = offset + edgeLength[edge]!;
|
|
42
|
+
let hash = 5381;
|
|
43
|
+
for (let i = end - 1; i >= offset; i -= 1) {
|
|
44
|
+
hash = (hash * 33) ^ labelText.charCodeAt(i);
|
|
45
|
+
}
|
|
46
|
+
edgeHash[edge] = hash >>> 0;
|
|
47
|
+
if (
|
|
48
|
+
edgeLength[edge] === 1 &&
|
|
49
|
+
labelText.charCodeAt(offset) === 42 /* '*' */
|
|
50
|
+
) {
|
|
51
|
+
wildcardEdge[node] = edge;
|
|
52
|
+
}
|
|
53
|
+
offset = end;
|
|
54
|
+
}
|
|
18
55
|
}
|
|
19
56
|
|
|
57
|
+
// Result of the last `walk`, kept in module scope to avoid allocating a match
|
|
58
|
+
// object. Safe because lookups are synchronous and read right after `walk`.
|
|
59
|
+
let matchNode = -1;
|
|
60
|
+
let matchStart = 0;
|
|
61
|
+
let matchEnd = 0;
|
|
62
|
+
|
|
20
63
|
/**
|
|
21
|
-
*
|
|
64
|
+
* True if edge `edge`'s label equals `hostname[start, start + length)`.
|
|
22
65
|
*/
|
|
23
|
-
function
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
):
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
isIcann: (node[0] & RULE_TYPE.ICANN) !== 0,
|
|
37
|
-
isPrivate: (node[0] & RULE_TYPE.PRIVATE) !== 0,
|
|
38
|
-
};
|
|
66
|
+
function labelEquals(
|
|
67
|
+
edge: number,
|
|
68
|
+
hostname: string,
|
|
69
|
+
start: number,
|
|
70
|
+
length: number,
|
|
71
|
+
): boolean {
|
|
72
|
+
if (edgeLength[edge] !== length) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
const offset = edgeOffset[edge]!;
|
|
76
|
+
for (let i = 0; i < length; i += 1) {
|
|
77
|
+
if (labelText.charCodeAt(offset + i) !== hostname.charCodeAt(start + i)) {
|
|
78
|
+
return false;
|
|
39
79
|
}
|
|
80
|
+
}
|
|
81
|
+
return true;
|
|
82
|
+
}
|
|
40
83
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
84
|
+
/**
|
|
85
|
+
* Find the child edge of `node` whose label is `hostname[start, start + length)`.
|
|
86
|
+
* Edges are sorted by hash, so binary-search the hash then verify the label
|
|
87
|
+
* (scanning the rare run of equal hashes). Returns the edge index or -1.
|
|
88
|
+
*/
|
|
89
|
+
function findEdge(
|
|
90
|
+
node: number,
|
|
91
|
+
hash: number,
|
|
92
|
+
hostname: string,
|
|
93
|
+
start: number,
|
|
94
|
+
length: number,
|
|
95
|
+
): number {
|
|
96
|
+
let lo = edgeStart[node]!;
|
|
97
|
+
let hi = edgeStart[node + 1]!;
|
|
98
|
+
while (lo < hi) {
|
|
99
|
+
const mid = (lo + hi) >>> 1;
|
|
100
|
+
const value = edgeHash[mid]!;
|
|
101
|
+
if (value < hash) {
|
|
102
|
+
lo = mid + 1;
|
|
103
|
+
} else if (value > hash) {
|
|
104
|
+
hi = mid;
|
|
105
|
+
} else {
|
|
106
|
+
for (let e = mid; e >= lo && edgeHash[e] === hash; e -= 1) {
|
|
107
|
+
if (labelEquals(e, hostname, start, length)) return e;
|
|
108
|
+
}
|
|
109
|
+
for (let e = mid + 1; e < hi && edgeHash[e] === hash; e += 1) {
|
|
110
|
+
if (labelEquals(e, hostname, start, length)) return e;
|
|
111
|
+
}
|
|
112
|
+
return -1;
|
|
44
113
|
}
|
|
114
|
+
}
|
|
115
|
+
return -1;
|
|
116
|
+
}
|
|
45
117
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
118
|
+
/**
|
|
119
|
+
* Walk `hostname`'s labels right-to-left from `root`, recording the deepest
|
|
120
|
+
* node whose flag passes `allowedMask` (with the label boundaries of that match
|
|
121
|
+
* in `matchStart`/`matchEnd`). Returns whether any match was found.
|
|
122
|
+
*/
|
|
123
|
+
function walk(hostname: string, root: number, allowedMask: number): boolean {
|
|
124
|
+
let node = root;
|
|
125
|
+
let end = hostname.length;
|
|
126
|
+
let hash = 5381;
|
|
127
|
+
matchNode = -1;
|
|
128
|
+
for (let i = hostname.length - 1; i >= 0; i -= 1) {
|
|
129
|
+
const code = hostname.charCodeAt(i);
|
|
130
|
+
if (code === 46 /* '.' */) {
|
|
131
|
+
const start = i + 1;
|
|
132
|
+
let edge = findEdge(node, hash >>> 0, hostname, start, end - start);
|
|
133
|
+
if (edge === -1) {
|
|
134
|
+
edge = wildcardEdge[node]!;
|
|
135
|
+
}
|
|
136
|
+
if (edge === -1) {
|
|
137
|
+
return matchNode !== -1;
|
|
138
|
+
}
|
|
139
|
+
node = edgeChild[edge]!;
|
|
140
|
+
if ((nodeFlags[node]! & allowedMask) !== 0) {
|
|
141
|
+
matchNode = node;
|
|
142
|
+
matchStart = start;
|
|
143
|
+
matchEnd = end;
|
|
144
|
+
}
|
|
145
|
+
end = i;
|
|
146
|
+
hash = 5381;
|
|
147
|
+
} else {
|
|
148
|
+
hash = (hash * 33) ^ code;
|
|
149
|
+
}
|
|
51
150
|
}
|
|
52
151
|
|
|
53
|
-
|
|
152
|
+
// Left-most label: hostname[0, end). Same find/descend/record as the loop —
|
|
153
|
+
// duplicated rather than folded into the loop (via `i >= -1`) because that
|
|
154
|
+
// extra per-character branch measured slightly slower on the hot path.
|
|
155
|
+
let edge = findEdge(node, hash >>> 0, hostname, 0, end);
|
|
156
|
+
if (edge === -1) {
|
|
157
|
+
edge = wildcardEdge[node]!;
|
|
158
|
+
}
|
|
159
|
+
if (edge !== -1) {
|
|
160
|
+
node = edgeChild[edge]!;
|
|
161
|
+
if ((nodeFlags[node]! & allowedMask) !== 0) {
|
|
162
|
+
matchNode = node;
|
|
163
|
+
matchStart = 0;
|
|
164
|
+
matchEnd = end;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return matchNode !== -1;
|
|
54
168
|
}
|
|
55
169
|
|
|
56
170
|
/**
|
|
57
|
-
* Check if `hostname` has a valid public suffix in
|
|
171
|
+
* Check if `hostname` has a valid public suffix in the trie.
|
|
58
172
|
*/
|
|
59
173
|
export default function suffixLookup(
|
|
60
174
|
hostname: string,
|
|
@@ -65,46 +179,29 @@ export default function suffixLookup(
|
|
|
65
179
|
return;
|
|
66
180
|
}
|
|
67
181
|
|
|
68
|
-
const hostnameParts = hostname.split('.');
|
|
69
|
-
|
|
70
182
|
const allowedMask =
|
|
71
183
|
(options.allowPrivateDomains ? RULE_TYPE.PRIVATE : 0) |
|
|
72
184
|
(options.allowIcannDomains ? RULE_TYPE.ICANN : 0);
|
|
73
185
|
|
|
74
|
-
//
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
);
|
|
81
|
-
|
|
82
|
-
if (exceptionMatch !== null) {
|
|
83
|
-
out.isIcann = exceptionMatch.isIcann;
|
|
84
|
-
out.isPrivate = exceptionMatch.isPrivate;
|
|
85
|
-
out.publicSuffix = hostnameParts.slice(exceptionMatch.index + 1).join('.');
|
|
186
|
+
// Exceptions have priority and strip their own left-most label (e.g. the
|
|
187
|
+
// rule '!www.ck' makes the suffix of 'www.ck' be 'ck').
|
|
188
|
+
if (walk(hostname, exceptionsRoot, allowedMask)) {
|
|
189
|
+
out.isIcann = (nodeFlags[matchNode]! & RULE_TYPE.ICANN) !== 0;
|
|
190
|
+
out.isPrivate = (nodeFlags[matchNode]! & RULE_TYPE.PRIVATE) !== 0;
|
|
191
|
+
out.publicSuffix = hostname.slice(matchEnd + 1);
|
|
86
192
|
return;
|
|
87
193
|
}
|
|
88
194
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
hostnameParts.length - 1,
|
|
94
|
-
allowedMask,
|
|
95
|
-
);
|
|
96
|
-
|
|
97
|
-
if (rulesMatch !== null) {
|
|
98
|
-
out.isIcann = rulesMatch.isIcann;
|
|
99
|
-
out.isPrivate = rulesMatch.isPrivate;
|
|
100
|
-
out.publicSuffix = hostnameParts.slice(rulesMatch.index).join('.');
|
|
195
|
+
if (walk(hostname, rulesRoot, allowedMask)) {
|
|
196
|
+
out.isIcann = (nodeFlags[matchNode]! & RULE_TYPE.ICANN) !== 0;
|
|
197
|
+
out.isPrivate = (nodeFlags[matchNode]! & RULE_TYPE.PRIVATE) !== 0;
|
|
198
|
+
out.publicSuffix = hostname.slice(matchStart);
|
|
101
199
|
return;
|
|
102
200
|
}
|
|
103
201
|
|
|
104
|
-
// No match
|
|
105
|
-
// Prevailing rule is '*' so we consider the top-level domain to be the
|
|
106
|
-
// public suffix of `hostname` (e.g.: 'example.org' => 'org').
|
|
202
|
+
// No match: the prevailing '*' rule makes the right-most label the suffix.
|
|
107
203
|
out.isIcann = false;
|
|
108
204
|
out.isPrivate = false;
|
|
109
|
-
|
|
205
|
+
const lastDot = hostname.lastIndexOf('.');
|
|
206
|
+
out.publicSuffix = lastDot === -1 ? hostname : hostname.slice(lastDot + 1);
|
|
110
207
|
}
|