aeorank 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -10
- package/dist/browser.js +90 -64
- package/dist/browser.js.map +1 -1
- package/dist/{chunk-RYV25AUV.js → chunk-DW7MPQ4X.js} +188 -30
- package/dist/chunk-DW7MPQ4X.js.map +1 -0
- package/dist/chunk-PYV5JVTC.js +179 -0
- package/dist/chunk-PYV5JVTC.js.map +1 -0
- package/dist/cli.js +83 -59
- package/dist/cli.js.map +1 -1
- package/dist/{full-site-crawler-TQ35TB2X.js → full-site-crawler-HAF2X2X3.js} +2 -2
- package/dist/{full-site-crawler-OBECS7AT.js → full-site-crawler-W3WSE6WT.js} +18 -30
- package/dist/full-site-crawler-W3WSE6WT.js.map +1 -0
- package/dist/index.cjs +277 -90
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +90 -64
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-RYV25AUV.js.map +0 -1
- package/dist/full-site-crawler-OBECS7AT.js.map +0 -1
- /package/dist/{full-site-crawler-TQ35TB2X.js.map → full-site-crawler-HAF2X2X3.js.map} +0 -0
|
@@ -1,3 +1,173 @@
|
|
|
1
|
+
// src/network-guard.ts
|
|
2
|
+
var DEFAULT_USER_AGENT = "AEO-Visibility-Bot/1.0";
|
|
3
|
+
var LOCAL_HOST_SUFFIXES = [".localhost", ".local", ".localdomain", ".internal", ".home.arpa", ".test"];
|
|
4
|
+
var dnsSafetyCache = /* @__PURE__ */ new Map();
|
|
5
|
+
var dnsLookupOverride = null;
|
|
6
|
+
function stripIpv6Brackets(hostname) {
|
|
7
|
+
return hostname.replace(/^\[/, "").replace(/\]$/, "");
|
|
8
|
+
}
|
|
9
|
+
function normalizeHostname(hostname) {
|
|
10
|
+
return stripIpv6Brackets(hostname).replace(/^www\./, "").trim().toLowerCase();
|
|
11
|
+
}
|
|
12
|
+
function parseIpv4(hostname) {
|
|
13
|
+
if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(hostname)) return null;
|
|
14
|
+
const parts = hostname.split(".").map(Number);
|
|
15
|
+
return parts.every((part) => Number.isInteger(part) && part >= 0 && part <= 255) ? parts : null;
|
|
16
|
+
}
|
|
17
|
+
function isPrivateOrReservedIpv4(parts) {
|
|
18
|
+
const [a, b, c] = parts;
|
|
19
|
+
if (a === 0 || a === 10 || a === 127) return true;
|
|
20
|
+
if (a === 100 && b >= 64 && b <= 127) return true;
|
|
21
|
+
if (a === 169 && b === 254) return true;
|
|
22
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
23
|
+
if (a === 192 && b === 0) return true;
|
|
24
|
+
if (a === 192 && b === 88 && c === 99) return true;
|
|
25
|
+
if (a === 192 && b === 168) return true;
|
|
26
|
+
if (a === 198 && (b === 18 || b === 19)) return true;
|
|
27
|
+
if (a === 198 && b === 51 && c === 100) return true;
|
|
28
|
+
if (a === 203 && b === 0 && c === 113) return true;
|
|
29
|
+
if (a >= 224) return true;
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
function isBlockedIpv6(hostname) {
|
|
33
|
+
const host = stripIpv6Brackets(hostname).toLowerCase();
|
|
34
|
+
if (!host.includes(":")) return false;
|
|
35
|
+
if (host === "::" || host === "::1") return true;
|
|
36
|
+
if (host.startsWith("fc") || host.startsWith("fd")) return true;
|
|
37
|
+
if (/^fe[89ab]/.test(host)) return true;
|
|
38
|
+
if (host.startsWith("2001:db8")) return true;
|
|
39
|
+
const mappedIpv4 = host.match(/^::ffff:(\d{1,3}(?:\.\d{1,3}){3})$/);
|
|
40
|
+
if (mappedIpv4) {
|
|
41
|
+
const parts = parseIpv4(mappedIpv4[1]);
|
|
42
|
+
return parts ? isPrivateOrReservedIpv4(parts) : true;
|
|
43
|
+
}
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
function isBlockedHostname(hostname) {
|
|
47
|
+
const host = normalizeHostname(hostname);
|
|
48
|
+
if (!host) return true;
|
|
49
|
+
if (host === "localhost" || host === "metadata.google.internal") return true;
|
|
50
|
+
if (LOCAL_HOST_SUFFIXES.some((suffix) => host.endsWith(suffix))) return true;
|
|
51
|
+
const ipv4 = parseIpv4(host);
|
|
52
|
+
if (ipv4) return isPrivateOrReservedIpv4(ipv4);
|
|
53
|
+
if (isBlockedIpv6(host)) return true;
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
function isSameSiteHost(hostname, domain) {
|
|
57
|
+
const host = normalizeHostname(hostname);
|
|
58
|
+
const base = normalizeHostname(domain);
|
|
59
|
+
if (!host || !base) return false;
|
|
60
|
+
return host === base || host.endsWith(`.${base}`);
|
|
61
|
+
}
|
|
62
|
+
function isSafePublicUrl(url, expectedDomain) {
|
|
63
|
+
try {
|
|
64
|
+
const parsed = new URL(url);
|
|
65
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
|
|
66
|
+
if (isBlockedHostname(parsed.hostname)) return false;
|
|
67
|
+
if (expectedDomain && !isSameSiteHost(parsed.hostname, expectedDomain)) return false;
|
|
68
|
+
return true;
|
|
69
|
+
} catch {
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
function isNodeRuntime() {
|
|
74
|
+
return typeof process !== "undefined" && !!process.versions?.node;
|
|
75
|
+
}
|
|
76
|
+
function canUseManualRedirects() {
|
|
77
|
+
return isNodeRuntime();
|
|
78
|
+
}
|
|
79
|
+
function isRedirectStatus(status) {
|
|
80
|
+
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
|
81
|
+
}
|
|
82
|
+
function isIpLiteral(hostname) {
|
|
83
|
+
const host = stripIpv6Brackets(hostname);
|
|
84
|
+
return !!parseIpv4(host) || host.includes(":");
|
|
85
|
+
}
|
|
86
|
+
function isBlockedResolvedAddress(address) {
|
|
87
|
+
const normalizedAddress = stripIpv6Brackets(address).toLowerCase();
|
|
88
|
+
const ipv4 = parseIpv4(normalizedAddress);
|
|
89
|
+
if (ipv4) return isPrivateOrReservedIpv4(ipv4);
|
|
90
|
+
return isBlockedIpv6(normalizedAddress);
|
|
91
|
+
}
|
|
92
|
+
async function loadDnsLookup() {
|
|
93
|
+
if (dnsLookupOverride) return dnsLookupOverride;
|
|
94
|
+
if (!isNodeRuntime()) return null;
|
|
95
|
+
try {
|
|
96
|
+
const mod = "node:dns/promises";
|
|
97
|
+
const dns = await import(
|
|
98
|
+
/* @vite-ignore */
|
|
99
|
+
mod
|
|
100
|
+
);
|
|
101
|
+
return async (hostname) => dns.lookup(hostname, { all: true, verbatim: true });
|
|
102
|
+
} catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
async function isDnsResolvedHostSafe(hostname) {
|
|
107
|
+
const host = normalizeHostname(hostname);
|
|
108
|
+
if (!host || isIpLiteral(host)) return true;
|
|
109
|
+
const cached = dnsSafetyCache.get(host);
|
|
110
|
+
if (cached !== void 0) return cached;
|
|
111
|
+
const lookup = await loadDnsLookup();
|
|
112
|
+
if (!lookup) return true;
|
|
113
|
+
try {
|
|
114
|
+
const records = await lookup(host);
|
|
115
|
+
const safe = records.length > 0 && records.every((record) => !isBlockedResolvedAddress(record.address));
|
|
116
|
+
dnsSafetyCache.set(host, safe);
|
|
117
|
+
return safe;
|
|
118
|
+
} catch {
|
|
119
|
+
dnsSafetyCache.set(host, false);
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async function isSafeFetchTarget(url, expectedDomain) {
|
|
124
|
+
if (!isSafePublicUrl(url, expectedDomain)) return false;
|
|
125
|
+
try {
|
|
126
|
+
const parsed = new URL(url);
|
|
127
|
+
return await isDnsResolvedHostSafe(parsed.hostname);
|
|
128
|
+
} catch {
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
async function safeFetch(url, options = {}) {
|
|
133
|
+
const timeoutMs = options.timeoutMs ?? 15e3;
|
|
134
|
+
const userAgent = options.userAgent ?? DEFAULT_USER_AGENT;
|
|
135
|
+
const maxRedirects = options.maxRedirects ?? 5;
|
|
136
|
+
const expectedDomain = options.expectedDomain ?? null;
|
|
137
|
+
const manualRedirects = canUseManualRedirects();
|
|
138
|
+
const redirectMode = manualRedirects ? "manual" : "error";
|
|
139
|
+
let currentUrl = url;
|
|
140
|
+
for (let redirects = 0; redirects <= maxRedirects; redirects++) {
|
|
141
|
+
if (!await isSafeFetchTarget(currentUrl, expectedDomain)) return null;
|
|
142
|
+
let res;
|
|
143
|
+
try {
|
|
144
|
+
res = await fetch(currentUrl, {
|
|
145
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
146
|
+
headers: { "User-Agent": userAgent },
|
|
147
|
+
redirect: redirectMode
|
|
148
|
+
});
|
|
149
|
+
} catch {
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
if (!manualRedirects) {
|
|
153
|
+
return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
|
|
154
|
+
}
|
|
155
|
+
if (isRedirectStatus(res.status)) {
|
|
156
|
+
if (redirects === maxRedirects) return null;
|
|
157
|
+
const location = res.headers.get("location");
|
|
158
|
+
if (!location) return null;
|
|
159
|
+
try {
|
|
160
|
+
currentUrl = new URL(location, currentUrl).toString();
|
|
161
|
+
} catch {
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
|
|
1
171
|
// src/full-site-crawler.ts
|
|
2
172
|
var RESOURCE_EXTENSIONS = /\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|woff|woff2|ttf|eot|mp4|mp3|webp|avif|zip|gz|tar|json)$/i;
|
|
3
173
|
var SKIP_PATH_PATTERNS = /^\/(api|wp-admin|wp-json|wp-content|wp-includes|static|assets|_next|auth|login|signup|cart|checkout|admin|feed|xmlrpc|tag|tags|author|authors|category|categories|attachment|embed|trackback|comments|search|print|amp)\b/i;
|
|
@@ -46,33 +216,17 @@ function isDisallowedByRobots(urlPath, rules) {
|
|
|
46
216
|
if (longestAllow === 0 && longestDisallow === 0) return false;
|
|
47
217
|
return longestDisallow > longestAllow;
|
|
48
218
|
}
|
|
49
|
-
async function fetchPage(url, timeoutMs = 1e4) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
});
|
|
56
|
-
if (res.status !== 200) return null;
|
|
57
|
-
const text = await res.text();
|
|
58
|
-
if (text.length < 200) return null;
|
|
59
|
-
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
60
|
-
} catch {
|
|
61
|
-
return null;
|
|
62
|
-
}
|
|
219
|
+
async function fetchPage(url, domain, timeoutMs = 1e4) {
|
|
220
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
221
|
+
if (!res || res.status !== 200) return null;
|
|
222
|
+
const text = await res.text();
|
|
223
|
+
if (text.length < 200) return null;
|
|
224
|
+
return { text: text.slice(0, 5e5), status: res.status, finalUrl: res.url };
|
|
63
225
|
}
|
|
64
|
-
async function fetchSitemapXml(url, timeoutMs = 1e4) {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
headers: { "User-Agent": "AEO-Visibility-Bot/1.0" },
|
|
69
|
-
redirect: "follow"
|
|
70
|
-
});
|
|
71
|
-
if (res.status !== 200) return null;
|
|
72
|
-
return await res.text();
|
|
73
|
-
} catch {
|
|
74
|
-
return null;
|
|
75
|
-
}
|
|
226
|
+
async function fetchSitemapXml(url, domain, timeoutMs = 1e4) {
|
|
227
|
+
const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });
|
|
228
|
+
if (!res || res.status !== 200) return null;
|
|
229
|
+
return await res.text();
|
|
76
230
|
}
|
|
77
231
|
async function extractAllUrlsFromSitemap(sitemapText, domain, timeoutMs = 1e4) {
|
|
78
232
|
const cleanDomain = domain.replace(/^www\./, "").toLowerCase();
|
|
@@ -82,9 +236,9 @@ async function extractAllUrlsFromSitemap(sitemapText, domain, timeoutMs = 1e4) {
|
|
|
82
236
|
const subUrls = [];
|
|
83
237
|
for (const block of subSitemapLocs) {
|
|
84
238
|
const locMatch = block.match(/<loc>([^<]+)<\/loc>/i);
|
|
85
|
-
if (locMatch) subUrls.push(locMatch[1].trim());
|
|
239
|
+
if (locMatch && isSafePublicUrl(locMatch[1].trim(), cleanDomain)) subUrls.push(locMatch[1].trim());
|
|
86
240
|
}
|
|
87
|
-
const fetches = subUrls.slice(0, 10).map((u) => fetchSitemapXml(u, timeoutMs));
|
|
241
|
+
const fetches = subUrls.slice(0, 10).map((u) => fetchSitemapXml(u, cleanDomain, timeoutMs));
|
|
88
242
|
const results = await Promise.all(fetches);
|
|
89
243
|
for (const text of results) {
|
|
90
244
|
if (text) {
|
|
@@ -242,7 +396,7 @@ async function crawlFullSite(siteData, options) {
|
|
|
242
396
|
batch.push(url);
|
|
243
397
|
}
|
|
244
398
|
if (batch.length === 0) continue;
|
|
245
|
-
const fetchResults = await Promise.all(batch.map((url) => fetchPage(url, timeoutMs)));
|
|
399
|
+
const fetchResults = await Promise.all(batch.map((url) => fetchPage(url, siteData.domain, timeoutMs)));
|
|
246
400
|
const batchResults = [];
|
|
247
401
|
for (let i = 0; i < fetchResults.length; i++) {
|
|
248
402
|
const result = fetchResults[i];
|
|
@@ -294,6 +448,10 @@ function normalizeUrl(url) {
|
|
|
294
448
|
}
|
|
295
449
|
|
|
296
450
|
export {
|
|
451
|
+
normalizeHostname,
|
|
452
|
+
isSafePublicUrl,
|
|
453
|
+
isSafeFetchTarget,
|
|
454
|
+
safeFetch,
|
|
297
455
|
parseRobotsTxt,
|
|
298
456
|
isDisallowedByRobots,
|
|
299
457
|
extractAllUrlsFromSitemap,
|
|
@@ -301,4 +459,4 @@ export {
|
|
|
301
459
|
inferCategory,
|
|
302
460
|
crawlFullSite
|
|
303
461
|
};
|
|
304
|
-
//# sourceMappingURL=chunk-
|
|
462
|
+
//# sourceMappingURL=chunk-DW7MPQ4X.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/network-guard.ts","../src/full-site-crawler.ts"],"sourcesContent":["export interface SafeFetchOptions {\n timeoutMs?: number;\n userAgent?: string;\n expectedDomain?: string | null;\n maxRedirects?: number;\n}\n\ninterface DnsLookupResult {\n address: string;\n family?: number;\n}\n\ntype DnsLookupFn = (hostname: string) => Promise<DnsLookupResult[]>;\n\nconst DEFAULT_USER_AGENT = 'AEO-Visibility-Bot/1.0';\nconst LOCAL_HOST_SUFFIXES = ['.localhost', '.local', '.localdomain', '.internal', '.home.arpa', '.test'];\nconst dnsSafetyCache = new Map<string, boolean>();\nlet dnsLookupOverride: DnsLookupFn | null = null;\n\nfunction stripIpv6Brackets(hostname: string): string {\n return hostname.replace(/^\\[/, '').replace(/\\]$/, '');\n}\n\nexport function normalizeHostname(hostname: string): string {\n return stripIpv6Brackets(hostname).replace(/^www\\./, '').trim().toLowerCase();\n}\n\nfunction parseIpv4(hostname: string): number[] | null {\n if (!/^\\d{1,3}(?:\\.\\d{1,3}){3}$/.test(hostname)) return null;\n const parts = hostname.split('.').map(Number);\n return parts.every(part => Number.isInteger(part) && part >= 0 && part <= 255) ? parts : null;\n}\n\nfunction isPrivateOrReservedIpv4(parts: number[]): boolean {\n const [a, b, c] = parts;\n\n if (a === 0 || a === 10 || a === 127) return true;\n if (a === 100 && b >= 64 && b <= 127) return true;\n if (a === 169 && b === 254) return true;\n if (a === 172 && b >= 16 && b <= 31) return true;\n if (a === 192 && b === 0) return true;\n if (a === 192 && b === 88 && c === 99) return true;\n if (a === 192 && b === 168) return true;\n if (a === 198 && (b === 18 || b === 19)) return true;\n if (a === 198 && b === 51 && c === 100) return true;\n if (a === 203 && b === 0 && c === 113) return true;\n if (a >= 224) return true;\n\n return false;\n}\n\nfunction isBlockedIpv6(hostname: string): boolean {\n const host = stripIpv6Brackets(hostname).toLowerCase();\n if (!host.includes(':')) return false;\n\n if (host === '::' || host === '::1') return true;\n if (host.startsWith('fc') || host.startsWith('fd')) return true;\n if (/^fe[89ab]/.test(host)) return true;\n if (host.startsWith('2001:db8')) return true;\n\n const mappedIpv4 = host.match(/^::ffff:(\\d{1,3}(?:\\.\\d{1,3}){3})$/);\n if (mappedIpv4) {\n const parts = parseIpv4(mappedIpv4[1]);\n return parts ? isPrivateOrReservedIpv4(parts) : true;\n }\n\n return false;\n}\n\nexport function isBlockedHostname(hostname: string): boolean {\n const host = normalizeHostname(hostname);\n if (!host) return true;\n\n if (host === 'localhost' || host === 'metadata.google.internal') return true;\n if (LOCAL_HOST_SUFFIXES.some(suffix => host.endsWith(suffix))) return true;\n\n const ipv4 = parseIpv4(host);\n if (ipv4) return isPrivateOrReservedIpv4(ipv4);\n\n if (isBlockedIpv6(host)) return true;\n\n return false;\n}\n\nexport function isSameSiteHost(hostname: string, domain: string): boolean {\n const host = normalizeHostname(hostname);\n const base = normalizeHostname(domain);\n if (!host || !base) return false;\n return host === base || host.endsWith(`.${base}`);\n}\n\nexport function isSafePublicUrl(url: string, expectedDomain?: string | null): boolean {\n try {\n const parsed = new URL(url);\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;\n if (isBlockedHostname(parsed.hostname)) return false;\n if (expectedDomain && !isSameSiteHost(parsed.hostname, expectedDomain)) return false;\n return true;\n } catch {\n return false;\n }\n}\n\nfunction isNodeRuntime(): boolean {\n return typeof process !== 'undefined' && !!process.versions?.node;\n}\n\nfunction canUseManualRedirects(): boolean {\n return isNodeRuntime();\n}\n\nfunction isRedirectStatus(status: number): boolean {\n return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;\n}\n\nfunction isIpLiteral(hostname: string): boolean {\n const host = stripIpv6Brackets(hostname);\n return !!parseIpv4(host) || host.includes(':');\n}\n\nfunction isBlockedResolvedAddress(address: string): boolean {\n const normalizedAddress = stripIpv6Brackets(address).toLowerCase();\n const ipv4 = parseIpv4(normalizedAddress);\n if (ipv4) return isPrivateOrReservedIpv4(ipv4);\n return isBlockedIpv6(normalizedAddress);\n}\n\nasync function loadDnsLookup(): Promise<DnsLookupFn | null> {\n if (dnsLookupOverride) return dnsLookupOverride;\n if (!isNodeRuntime()) return null;\n\n try {\n const mod = 'node:dns/promises';\n const dns = await import(/* @vite-ignore */ mod);\n return async (hostname: string) => dns.lookup(hostname, { all: true, verbatim: true });\n } catch {\n return null;\n }\n}\n\nasync function isDnsResolvedHostSafe(hostname: string): Promise<boolean> {\n const host = normalizeHostname(hostname);\n if (!host || isIpLiteral(host)) return true;\n\n const cached = dnsSafetyCache.get(host);\n if (cached !== undefined) return cached;\n\n const lookup = await loadDnsLookup();\n if (!lookup) return true;\n\n try {\n const records = await lookup(host);\n const safe = records.length > 0 && records.every(record => !isBlockedResolvedAddress(record.address));\n dnsSafetyCache.set(host, safe);\n return safe;\n } catch {\n dnsSafetyCache.set(host, false);\n return false;\n }\n}\n\nexport async function isSafeFetchTarget(url: string, expectedDomain?: string | null): Promise<boolean> {\n if (!isSafePublicUrl(url, expectedDomain)) return false;\n\n try {\n const parsed = new URL(url);\n return await isDnsResolvedHostSafe(parsed.hostname);\n } catch {\n return false;\n }\n}\n\nexport function __setDnsLookupForTests(lookup: DnsLookupFn | null): void {\n dnsLookupOverride = lookup;\n dnsSafetyCache.clear();\n}\n\nexport function __clearDnsSafetyCacheForTests(): void {\n dnsSafetyCache.clear();\n}\n\nexport async function safeFetch(url: string, options: SafeFetchOptions = {}): Promise<Response | null> {\n const timeoutMs = options.timeoutMs ?? 15000;\n const userAgent = options.userAgent ?? DEFAULT_USER_AGENT;\n const maxRedirects = options.maxRedirects ?? 5;\n const expectedDomain = options.expectedDomain ?? null;\n const manualRedirects = canUseManualRedirects();\n const redirectMode = manualRedirects ? 'manual' : 'error';\n\n let currentUrl = url;\n\n for (let redirects = 0; redirects <= maxRedirects; redirects++) {\n if (!(await isSafeFetchTarget(currentUrl, expectedDomain))) return null;\n\n let res: Response;\n try {\n res = await fetch(currentUrl, {\n signal: AbortSignal.timeout(timeoutMs),\n headers: { 'User-Agent': userAgent },\n redirect: redirectMode,\n });\n } catch {\n return null;\n }\n\n if (!manualRedirects) {\n return (await isSafeFetchTarget(res.url || currentUrl, expectedDomain)) ? res : null;\n }\n\n if (isRedirectStatus(res.status)) {\n if (redirects === maxRedirects) return null;\n const location = res.headers.get('location');\n if (!location) return null;\n try {\n currentUrl = new URL(location, currentUrl).toString();\n } catch {\n return null;\n }\n continue;\n }\n\n return (await isSafeFetchTarget(res.url || currentUrl, expectedDomain)) ? res : null;\n }\n\n return null;\n}\n","/**\n * Full-site crawler for deep AEO audits.\n * BFS crawl that discovers all internal pages up to a configurable limit.\n */\n\nimport type { FetchResult, SiteData, PageCategory } from './site-crawler.js';\nimport { isSafePublicUrl, safeFetch } from './network-guard.js';\n\n// ─── Types ──────────────────────────────────────────────────────────────────\n\nexport interface CrawlOptions {\n /** Maximum pages to fetch (default 200) */\n maxPages?: number;\n /** Per-page fetch timeout in ms (default 10000) */\n timeoutMs?: number;\n /** Parallel fetches (default 5) */\n concurrency?: number;\n /** Honor robots.txt Disallow rules (default true) */\n respectRobots?: boolean;\n /** Include asset files — skipped by default */\n includeAssets?: boolean;\n /** Called after each batch with per-URL results */\n onProgress?: (event: CrawlProgressEvent) => void;\n}\n\nexport interface CrawlProgressEvent {\n /** URLs attempted in this batch */\n urls: string[];\n /** Whether each URL succeeded */\n results: Array<{ url: string; ok: boolean; status?: number }>;\n /** Total fetched so far */\n fetched: number;\n /** Total discovered so far */\n discovered: number;\n /** Max pages limit */\n maxPages: number;\n}\n\nexport interface CrawlResult {\n pages: FetchResult[];\n discoveredUrls: string[];\n fetchedUrls: string[];\n skippedUrls: string[];\n elapsed: number;\n}\n\n// ─── Resource file extensions to skip ────────────────────────────────────────\n\nconst RESOURCE_EXTENSIONS = /\\.(js|css|png|jpg|jpeg|gif|svg|ico|pdf|xml|txt|woff|woff2|ttf|eot|mp4|mp3|webp|avif|zip|gz|tar|json)$/i;\n\nconst SKIP_PATH_PATTERNS = /^\\/(api|wp-admin|wp-json|wp-content|wp-includes|static|assets|_next|auth|login|signup|cart|checkout|admin|feed|xmlrpc|tag|tags|author|authors|category|categories|attachment|embed|trackback|comments|search|print|amp)\\b/i;\n\n/** WordPress pagination and query-string junk */\nconst SKIP_URL_PATTERNS = /\\/page\\/\\d+\\/?$|[?&](s|replytocom|p|preview|share|like|amp)=/i;\n\n// ─── Robots.txt parsing ─────────────────────────────────────────────────────\n\ninterface RobotsRules {\n disallow: string[];\n allow: string[];\n}\n\nexport function parseRobotsTxt(robotsText: string): RobotsRules {\n const lines = robotsText.split('\\n');\n const rules: RobotsRules = { disallow: [], allow: [] };\n\n // Collect rules for User-agent: * and User-agent: AEO-Visibility-Bot\n let inRelevantSection = false;\n\n for (const rawLine of lines) {\n const line = rawLine.trim();\n if (!line || line.startsWith('#')) continue;\n\n const uaMatch = line.match(/^user-agent:\\s*(.+)/i);\n if (uaMatch) {\n const agent = uaMatch[1].trim().toLowerCase();\n inRelevantSection = agent === '*' || agent === 'aeo-visibility-bot';\n continue;\n }\n\n if (!inRelevantSection) continue;\n\n const disallowMatch = line.match(/^disallow:\\s*(.*)/i);\n if (disallowMatch) {\n const path = disallowMatch[1].trim();\n if (path) rules.disallow.push(path);\n continue;\n }\n\n const allowMatch = line.match(/^allow:\\s*(.*)/i);\n if (allowMatch) {\n const path = allowMatch[1].trim();\n if (path) rules.allow.push(path);\n }\n }\n\n return rules;\n}\n\nexport function isDisallowedByRobots(urlPath: string, rules: RobotsRules): boolean {\n // Check allow rules first — more specific (longer) rules take precedence\n let longestAllow = 0;\n let longestDisallow = 0;\n\n for (const pattern of rules.allow) {\n if (urlPath.startsWith(pattern) && pattern.length > longestAllow) {\n longestAllow = pattern.length;\n }\n }\n\n for (const pattern of rules.disallow) {\n if (urlPath.startsWith(pattern) && pattern.length > longestDisallow) {\n longestDisallow = pattern.length;\n }\n }\n\n // More specific (longer) rule wins; if equal length, allow wins\n if (longestAllow === 0 && longestDisallow === 0) return false;\n return longestDisallow > longestAllow;\n}\n\n// ─── Fetch helper (matches multi-page-fetcher.ts fetchPage) ──────────────────\n\nasync function fetchPage(url: string, domain: string, timeoutMs = 10000): Promise<FetchResult | null> {\n const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });\n if (!res || res.status !== 200) return null;\n const text = await res.text();\n if (text.length < 200) return null;\n return { text: text.slice(0, 500_000), status: res.status, finalUrl: res.url };\n}\n\nasync function fetchSitemapXml(url: string, domain: string, timeoutMs = 10000): Promise<string | null> {\n const res = await safeFetch(url, { timeoutMs, expectedDomain: domain });\n if (!res || res.status !== 200) return null;\n return await res.text();\n}\n\n// ─── Sitemap parsing ────────────────────────────────────────────────────────\n\n/**\n * Extract all page URLs from sitemap XML (handles sitemapindex with sub-sitemaps).\n * Filters to same domain only, skips resource files.\n */\nexport async function extractAllUrlsFromSitemap(\n sitemapText: string,\n domain: string,\n timeoutMs = 10000,\n): Promise<string[]> {\n const cleanDomain = domain.replace(/^www\\./, '').toLowerCase();\n const urls = new Set<string>();\n\n // Check for sitemapindex — fetch sub-sitemaps\n const subSitemapLocs = sitemapText.match(/<sitemap>[\\s\\S]*?<loc>([^<]+)<\\/loc>[\\s\\S]*?<\\/sitemap>/gi) || [];\n if (subSitemapLocs.length > 0) {\n const subUrls: string[] = [];\n for (const block of subSitemapLocs) {\n const locMatch = block.match(/<loc>([^<]+)<\\/loc>/i);\n if (locMatch && isSafePublicUrl(locMatch[1].trim(), cleanDomain)) subUrls.push(locMatch[1].trim());\n }\n\n // Fetch sub-sitemaps in parallel (limit to 10)\n const fetches = subUrls.slice(0, 10).map(u => fetchSitemapXml(u, cleanDomain, timeoutMs));\n const results = await Promise.all(fetches);\n for (const text of results) {\n if (text) {\n extractLocsFromXml(text, cleanDomain, urls);\n }\n }\n }\n\n // Also extract <url><loc> from the main sitemap text (could be a regular sitemap)\n extractLocsFromXml(sitemapText, cleanDomain, urls);\n\n return Array.from(urls);\n}\n\nfunction extractLocsFromXml(xml: string, cleanDomain: string, urls: Set<string>): void {\n const locMatches = xml.match(/<url>[\\s\\S]*?<loc>([^<]+)<\\/loc>[\\s\\S]*?<\\/url>/gi) || [];\n for (const block of locMatches) {\n const locMatch = block.match(/<loc>([^<]+)<\\/loc>/i);\n if (!locMatch) continue;\n const url = locMatch[1].trim();\n\n try {\n const parsed = new URL(url);\n const urlDomain = parsed.hostname.replace(/^www\\./, '').toLowerCase();\n if (urlDomain !== cleanDomain) continue;\n if (RESOURCE_EXTENSIONS.test(parsed.pathname)) continue;\n urls.add(url);\n } catch {\n continue;\n }\n }\n}\n\n// ─── Internal link extraction ───────────────────────────────────────────────\n\n/**\n * Extract ALL internal links from HTML (not just nav).\n * Returns deduplicated full URLs for the same domain.\n */\nexport function extractInternalLinks(html: string, domain: string): string[] {\n const cleanDomain = domain.replace(/^www\\./, '').toLowerCase();\n const hrefMatches = html.match(/href=\"([^\"]*)\"/gi) || [];\n const urls = new Set<string>();\n\n for (const match of hrefMatches) {\n const href = match.match(/href=\"([^\"]*)\"/i)?.[1];\n if (!href || !href.trim()) continue;\n\n let fullUrl: string;\n\n if (href.startsWith('//')) {\n fullUrl = `https:${href}`;\n } else if (href.startsWith('/')) {\n // Skip fragment-only, query-only, and anchor links\n if (href === '/' || href.startsWith('/#')) continue;\n fullUrl = `https://${domain}${href}`;\n } else if (href.startsWith('http')) {\n fullUrl = href;\n } else if (href.startsWith('#') || href.startsWith('?') || href.startsWith('mailto:') || href.startsWith('tel:') || href.startsWith('javascript:')) {\n continue;\n } else {\n // Relative path\n fullUrl = `https://${domain}/${href}`;\n }\n\n try {\n const parsed = new URL(fullUrl);\n const linkDomain = parsed.hostname.replace(/^www\\./, '').toLowerCase();\n if (linkDomain !== cleanDomain) continue;\n\n // Strip hash and normalize\n parsed.hash = '';\n const path = parsed.pathname;\n\n if (path === '/' || path === '') continue;\n if (RESOURCE_EXTENSIONS.test(path)) continue;\n if (SKIP_PATH_PATTERNS.test(path)) continue;\n\n // Normalize: strip trailing slash\n const normalized = parsed.origin + path.replace(/\\/+$/, '') + parsed.search;\n if (SKIP_URL_PATTERNS.test(normalized)) continue;\n urls.add(normalized);\n } catch {\n continue;\n }\n }\n\n return Array.from(urls);\n}\n\n// ─── Category inference ─────────────────────────────────────────────────────\n\nconst CATEGORY_PATTERNS: Array<[RegExp, PageCategory]> = [\n [/\\/([^/]*-?)?(blog|articles?|posts?|news|insights|guides)\\b/i, 'blog'],\n [/\\/(about|about-us|company|who-we-are)\\b/i, 'about'],\n [/\\/(pricing|plans|packages)\\b/i, 'pricing'],\n [/\\/(services?|features?|solutions?|products?|what-we-do|offerings?)\\b/i, 'services'],\n [/\\/(contact|contact-us|get-in-touch)\\b/i, 'contact'],\n [/\\/(team|our-team|authors?|people|leadership|staff)\\b/i, 'team'],\n [/\\/(resources?|resource-center|library|downloads?)\\b/i, 'resources'],\n [/\\/(docs?|documentation|help|help-center|support|knowledge-base)\\b/i, 'docs'],\n [/\\/(case-stud\\w*|cases|customers?|success-stor\\w*|testimonials?)\\b/i, 'cases'],\n [/\\/(faq|frequently-asked|questions)\\b/i, 'faq'],\n];\n\n/**\n * Infer PageCategory from URL path patterns.\n */\nexport function inferCategory(url: string): PageCategory {\n try {\n const path = new URL(url).pathname;\n for (const [pattern, category] of CATEGORY_PATTERNS) {\n if (pattern.test(path)) return category;\n }\n } catch {\n // Fall through to default\n }\n return 'content';\n}\n\n// ─── Main crawler ───────────────────────────────────────────────────────────\n\n/**\n * BFS crawl of a site, discovering all internal pages up to maxPages.\n * Seeds from sitemap URLs + homepage internal links.\n * Skips URLs already in siteData.blogSample and homepage.\n */\nexport async function crawlFullSite(\n siteData: SiteData,\n options?: CrawlOptions,\n): Promise<CrawlResult> {\n const startTime = Date.now();\n const maxPages = options?.maxPages ?? 200;\n const timeoutMs = options?.timeoutMs ?? 10000;\n const concurrency = options?.concurrency ?? 5;\n const respectRobots = options?.respectRobots ?? true;\n\n const pages: FetchResult[] = [];\n const discoveredUrls = new Set<string>();\n const fetchedUrls = new Set<string>();\n const skippedUrls = new Set<string>();\n const visited = new Set<string>();\n\n // Parse robots.txt rules\n let robotsRules: RobotsRules = { disallow: [], allow: [] };\n if (respectRobots && siteData.robotsTxt?.text) {\n robotsRules = parseRobotsTxt(siteData.robotsTxt.text);\n }\n\n const baseUrl = `${siteData.protocol}://${siteData.domain}`;\n\n // Mark already-fetched URLs as visited\n visited.add(normalizeUrl(baseUrl));\n visited.add(normalizeUrl(baseUrl + '/'));\n if (siteData.blogSample) {\n for (const page of siteData.blogSample) {\n if (page.finalUrl) visited.add(normalizeUrl(page.finalUrl));\n }\n }\n\n // Seed the queue from sitemap\n const queue: string[] = [];\n if (siteData.sitemapXml?.text) {\n const sitemapUrls = await extractAllUrlsFromSitemap(\n siteData.sitemapXml.text,\n siteData.domain,\n timeoutMs,\n );\n for (const url of sitemapUrls) {\n const norm = normalizeUrl(url);\n if (!visited.has(norm)) {\n discoveredUrls.add(url);\n if (!queue.includes(url)) queue.push(url);\n }\n }\n }\n\n // Seed from homepage internal links\n if (siteData.homepage?.text) {\n const homeLinks = extractInternalLinks(siteData.homepage.text, siteData.domain);\n for (const url of homeLinks) {\n const norm = normalizeUrl(url);\n if (!visited.has(norm) && !discoveredUrls.has(url)) {\n discoveredUrls.add(url);\n if (!queue.includes(url)) queue.push(url);\n }\n }\n }\n\n // BFS loop\n while (queue.length > 0 && fetchedUrls.size < maxPages) {\n // Take a batch\n const batchSize = Math.min(concurrency, maxPages - fetchedUrls.size, queue.length);\n const batch: string[] = [];\n\n while (batch.length < batchSize && queue.length > 0) {\n const url = queue.shift()!;\n const norm = normalizeUrl(url);\n\n if (visited.has(norm)) continue;\n visited.add(norm);\n\n // Check robots.txt\n if (respectRobots) {\n try {\n const path = new URL(url).pathname;\n if (isDisallowedByRobots(path, robotsRules)) {\n skippedUrls.add(url);\n continue;\n }\n } catch {\n // Skip malformed URLs\n continue;\n }\n }\n\n batch.push(url);\n }\n\n if (batch.length === 0) continue;\n\n // Fetch batch in parallel\n const fetchResults = await Promise.all(batch.map(url => fetchPage(url, siteData.domain, timeoutMs)));\n\n const batchResults: Array<{ url: string; ok: boolean; status?: number }> = [];\n\n for (let i = 0; i < fetchResults.length; i++) {\n const result = fetchResults[i];\n const url = batch[i];\n fetchedUrls.add(url);\n batchResults.push({ url, ok: !!result, status: result?.status });\n\n if (!result) continue;\n\n result.category = inferCategory(url);\n pages.push(result);\n\n // Extract new internal links from fetched page\n const newLinks = extractInternalLinks(result.text, siteData.domain);\n for (const link of newLinks) {\n const norm = normalizeUrl(link);\n if (!visited.has(norm) && !discoveredUrls.has(link)) {\n discoveredUrls.add(link);\n queue.push(link);\n }\n }\n }\n\n // Notify caller of per-batch progress\n if (options?.onProgress) {\n options.onProgress({\n urls: batch,\n results: batchResults,\n fetched: fetchedUrls.size,\n discovered: discoveredUrls.size,\n maxPages,\n });\n }\n }\n\n // Any remaining queued URLs count as discovered but skipped\n for (const url of queue) {\n if (!fetchedUrls.has(url)) {\n skippedUrls.add(url);\n }\n }\n\n return {\n pages,\n discoveredUrls: Array.from(discoveredUrls),\n fetchedUrls: Array.from(fetchedUrls),\n skippedUrls: Array.from(skippedUrls),\n elapsed: Math.round((Date.now() - startTime) / 100) / 10,\n };\n}\n\nfunction normalizeUrl(url: string): string {\n try {\n const parsed = new URL(url);\n // Normalize: lowercase host, strip trailing slash, strip hash\n return (parsed.origin + parsed.pathname.replace(/\\/+$/, '') + parsed.search).toLowerCase();\n } catch {\n return url.toLowerCase();\n }\n}\n"],"mappings":";AAcA,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB,CAAC,cAAc,UAAU,gBAAgB,aAAa,cAAc,OAAO;AACvG,IAAM,iBAAiB,oBAAI,IAAqB;AAChD,IAAI,oBAAwC;AAE5C,SAAS,kBAAkB,UAA0B;AACnD,SAAO,SAAS,QAAQ,OAAO,EAAE,EAAE,QAAQ,OAAO,EAAE;AACtD;AAEO,SAAS,kBAAkB,UAA0B;AAC1D,SAAO,kBAAkB,QAAQ,EAAE,QAAQ,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY;AAC9E;AAEA,SAAS,UAAU,UAAmC;AACpD,MAAI,CAAC,4BAA4B,KAAK,QAAQ,EAAG,QAAO;AACxD,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAC5C,SAAO,MAAM,MAAM,UAAQ,OAAO,UAAU,IAAI,KAAK,QAAQ,KAAK,QAAQ,GAAG,IAAI,QAAQ;AAC3F;AAEA,SAAS,wBAAwB,OAA0B;AACzD,QAAM,CAAC,GAAG,GAAG,CAAC,IAAI;AAElB,MAAI,MAAM,KAAK,MAAM,MAAM,MAAM,IAAK,QAAO;AAC7C,MAAI,MAAM,OAAO,KAAK,MAAM,KAAK,IAAK,QAAO;AAC7C,MAAI,MAAM,OAAO,MAAM,IAAK,QAAO;AACnC,MAAI,MAAM,OAAO,KAAK,MAAM,KAAK,GAAI,QAAO;AAC5C,MAAI,MAAM,OAAO,MAAM,EAAG,QAAO;AACjC,MAAI,MAAM,OAAO,MAAM,MAAM,MAAM,GAAI,QAAO;AAC9C,MAAI,MAAM,OAAO,MAAM,IAAK,QAAO;AACnC,MAAI,MAAM,QAAQ,MAAM,MAAM,MAAM,IAAK,QAAO;AAChD,MAAI,MAAM,OAAO,MAAM,MAAM,MAAM,IAAK,QAAO;AAC/C,MAAI,MAAM,OAAO,MAAM,KAAK,MAAM,IAAK,QAAO;AAC9C,MAAI,KAAK,IAAK,QAAO;AAErB,SAAO;AACT;AAEA,SAAS,cAAc,UAA2B;AAChD,QAAM,OAAO,kBAAkB,QAAQ,EAAE,YAAY;AACrD,MAAI,CAAC,KAAK,SAAS,GAAG,EAAG,QAAO;AAEhC,MAAI,SAAS,QAAQ,SAAS,MAAO,QAAO;AAC5C,MAAI,KAAK,WAAW,IAAI,KAAK,KAAK,WAAW,IAAI,EAAG,QAAO;AAC3D,MAAI,YAAY,KAAK,IAAI,EAAG,QAAO;AACnC,MAAI,KAAK,WAAW,UAAU,EAAG,QAAO;AAExC,QAAM,aAAa,KAAK,MAAM,oCAAoC;AAClE,MAAI,YAAY;AACd,UAAM,QAAQ,UAAU,WAAW,CAAC,CAAC;AACrC,WAAO,QAAQ,wBAAwB,KAAK,IAAI;AAAA,EAClD;AAEA,SAAO;AACT;AAEO,SAAS,kBAAkB,UAA2B;AAC3D,QAAM,OAAO,kBAAkB,QAAQ;AACvC,MAAI,CAAC,KAAM,QAAO;AAElB,MAAI,SAAS,eAAe,SAAS,2BAA4B,QAAO;AACxE,MAAI,oBAAoB,KAAK,YAAU,KAAK,SAAS,MAAM,CAAC,EAAG,QAAO;AAEtE,QAAM,OAAO,UAAU,IAAI;AAC3B,MAAI,KAAM,QAAO,wBAAwB,IAAI;AAE7C,MAAI,cAAc,IAAI,EAAG,QAAO;AAEhC,SAAO;AACT;AAEO,SAAS,eAAe,UAAkB,QAAyB;AACxE,QAAM,OAAO,kBAAkB,QAAQ;AACvC,QAAM,OAAO,kBAAkB,MAAM;AACrC,MAAI,CAAC,QAAQ,CAAC,KAAM,QAAO;AAC3B,SAAO,SAAS,QAAQ,KAAK,SAAS,IAAI,IAAI,EAAE;AAClD;AAEO,SAAS,gBAAgB,KAAa,gBAAyC;AACpF,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,QAAI,OAAO,aAAa,WAAW,OAAO,aAAa,SAAU,QAAO;AACxE,QAAI,kBAAkB,OAAO,QAAQ,EAAG,QAAO;AAC/C,QAAI,kBAAkB,CAAC,eAAe,OAAO,UAAU,cAAc,EAAG,QAAO;AAC/E,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,gBAAyB;AAChC,SAAO,OAAO,YAAY,eAAe,CAAC,CAAC,QAAQ,UAAU;AAC/D;AAEA,SAAS,wBAAiC;AACxC,SAAO,cAAc;AACvB;AAEA,SAAS,iBAAiB,QAAyB;AACjD,SAAO,WAAW,OAAO,WAAW,OAAO,WAAW,OAAO,WAAW,OAAO,WAAW;AAC5F;AAEA,SAAS,YAAY,UAA2B;AAC9C,QAAM,OAAO,kBAAkB,QAAQ;AACvC,SAAO,CAAC,CAAC,UAAU,IAAI,KAAK,KAAK,SAAS,GAAG;AAC/C;AAEA,SAAS,yBAAyB,SAA0B;AAC1D,QAAM,oBAAoB,kBAAkB,OAAO,EAAE,YAAY;AACjE,QAAM,OAAO,UAAU,iBAAiB;AACxC,MAAI,KAAM,QAAO,wBAAwB,IAAI;AAC7C,SAAO,cAAc,iBAAiB;AACxC;AAEA,eAAe,gBAA6C;AAC1D,MAAI,kBAAmB,QAAO;AAC9B,MAAI,CAAC,cAAc,EAAG,QAAO;AAE7B,MAAI;AACF,UAAM,MAAM;AACZ,UAAM,MAAM,MAAM;AAAA;AAAA,MAA0B;AAAA;AAC5C,WAAO,OAAO,aAAqB,IAAI,OAAO,UAAU,EAAE,KAAK,MAAM,UAAU,KAAK,CAAC;AAAA,EACvF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,sBAAsB,UAAoC;AACvE,QAAM,OAAO,kBAAkB,QAAQ;AACvC,MAAI,CAAC,QAAQ,YAAY,IAAI,EAAG,QAAO;AAEvC,QAAM,SAAS,eAAe,IAAI,IAAI;AACtC,MAAI,WAAW,OAAW,QAAO;AAEjC,QAAM,SAAS,MAAM,cAAc;AACnC,MAAI,CAAC,OAAQ,QAAO;AAEpB,MAAI;AACF,UAAM,UAAU,MAAM,OAAO,IAAI;AACjC,UAAM,OAAO,QAAQ,SAAS,KAAK,QAAQ,MAAM,YAAU,CAAC,yBAAyB,OAAO,OAAO,CAAC;AACpG,mBAAe,IAAI,MAAM,IAAI;AAC7B,WAAO;AAAA,EACT,QAAQ;AACN,mBAAe,IAAI,MAAM,KAAK;AAC9B,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,kBAAkB,KAAa,gBAAkD;AACrG,MAAI,CAAC,gBAAgB,KAAK,cAAc,EAAG,QAAO;AAElD,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,WAAO,MAAM,sBAAsB,OAAO,QAAQ;AAAA,EACpD,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAWA,eAAsB,UAAU,KAAa,UAA4B,CAAC,GAA6B;AACrG,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,kBAAkB,sBAAsB;AAC9C,QAAM,eAAe,kBAAkB,WAAW;AAElD,MAAI,aAAa;AAEjB,WAAS,YAAY,GAAG,aAAa,cAAc,aAAa;AAC9D,QAAI,CAAE,MAAM,kBAAkB,YAAY,cAAc,EAAI,QAAO;AAEnE,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,YAAY;AAAA,QAC5B,QAAQ,YAAY,QAAQ,SAAS;AAAA,QACrC,SAAS,EAAE,cAAc,UAAU;AAAA,QACnC,UAAU;AAAA,MACZ,CAAC;AAAA,IACH,QAAQ;AACN,aAAO;AAAA,IACT;AAEA,QAAI,CAAC,iBAAiB;AACpB,aAAQ,MAAM,kBAAkB,IAAI,OAAO,YAAY,cAAc,IAAK,MAAM;AAAA,IAClF;AAEA,QAAI,iBAAiB,IAAI,MAAM,GAAG;AAChC,UAAI,cAAc,aAAc,QAAO;AACvC,YAAM,WAAW,IAAI,QAAQ,IAAI,UAAU;AAC3C,UAAI,CAAC,SAAU,QAAO;AACtB,UAAI;AACF,qBAAa,IAAI,IAAI,UAAU,UAAU,EAAE,SAAS;AAAA,MACtD,QAAQ;AACN,eAAO;AAAA,MACT;AACA;AAAA,IACF;AAEA,WAAQ,MAAM,kBAAkB,IAAI,OAAO,YAAY,cAAc,IAAK,MAAM;AAAA,EAClF;AAEA,SAAO;AACT;;;ACjLA,IAAM,sBAAsB;AAE5B,IAAM,qBAAqB;AAG3B,IAAM,oBAAoB;AASnB,SAAS,eAAe,YAAiC;AAC9D,QAAM,QAAQ,WAAW,MAAM,IAAI;AACnC,QAAM,QAAqB,EAAE,UAAU,CAAC,GAAG,OAAO,CAAC,EAAE;AAGrD,MAAI,oBAAoB;AAExB,aAAW,WAAW,OAAO;AAC3B,UAAM,OAAO,QAAQ,KAAK;AAC1B,QAAI,CAAC,QAAQ,KAAK,WAAW,GAAG,EAAG;AAEnC,UAAM,UAAU,KAAK,MAAM,sBAAsB;AACjD,QAAI,SAAS;AACX,YAAM,QAAQ,QAAQ,CAAC,EAAE,KAAK,EAAE,YAAY;AAC5C,0BAAoB,UAAU,OAAO,UAAU;AAC/C;AAAA,IACF;AAEA,QAAI,CAAC,kBAAmB;AAExB,UAAM,gBAAgB,KAAK,MAAM,oBAAoB;AACrD,QAAI,eAAe;AACjB,YAAM,OAAO,cAAc,CAAC,EAAE,KAAK;AACnC,UAAI,KAAM,OAAM,SAAS,KAAK,IAAI;AAClC;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,MAAM,iBAAiB;AAC/C,QAAI,YAAY;AACd,YAAM,OAAO,WAAW,CAAC,EAAE,KAAK;AAChC,UAAI,KAAM,OAAM,MAAM,KAAK,IAAI;AAAA,IACjC;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,qBAAqB,SAAiB,OAA6B;AAEjF,MAAI,eAAe;AACnB,MAAI,kBAAkB;AAEtB,aAAW,WAAW,MAAM,OAAO;AACjC,QAAI,QAAQ,WAAW,OAAO,KAAK,QAAQ,SAAS,cAAc;AAChE,qBAAe,QAAQ;AAAA,IACzB;AAAA,EACF;AAEA,aAAW,WAAW,MAAM,UAAU;AACpC,QAAI,QAAQ,WAAW,OAAO,KAAK,QAAQ,SAAS,iBAAiB;AACnE,wBAAkB,QAAQ;AAAA,IAC5B;AAAA,EACF;AAGA,MAAI,iBAAiB,KAAK,oBAAoB,EAAG,QAAO;AACxD,SAAO,kBAAkB;AAC3B;AAIA,eAAe,UAAU,KAAa,QAAgB,YAAY,KAAoC;AACpG,QAAM,MAAM,MAAM,UAAU,KAAK,EAAE,WAAW,gBAAgB,OAAO,CAAC;AACtE,MAAI,CAAC,OAAO,IAAI,WAAW,IAAK,QAAO;AACvC,QAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,MAAI,KAAK,SAAS,IAAK,QAAO;AAC9B,SAAO,EAAE,MAAM,KAAK,MAAM,GAAG,GAAO,GAAG,QAAQ,IAAI,QAAQ,UAAU,IAAI,IAAI;AAC/E;AAEA,eAAe,gBAAgB,KAAa,QAAgB,YAAY,KAA+B;AACrG,QAAM,MAAM,MAAM,UAAU,KAAK,EAAE,WAAW,gBAAgB,OAAO,CAAC;AACtE,MAAI,CAAC,OAAO,IAAI,WAAW,IAAK,QAAO;AACvC,SAAO,MAAM,IAAI,KAAK;AACxB;AAQA,eAAsB,0BACpB,aACA,QACA,YAAY,KACO;AACnB,QAAM,cAAc,OAAO,QAAQ,UAAU,EAAE,EAAE,YAAY;AAC7D,QAAM,OAAO,oBAAI,IAAY;AAG7B,QAAM,iBAAiB,YAAY,MAAM,2DAA2D,KAAK,CAAC;AAC1G,MAAI,eAAe,SAAS,GAAG;AAC7B,UAAM,UAAoB,CAAC;AAC3B,eAAW,SAAS,gBAAgB;AAClC,YAAM,WAAW,MAAM,MAAM,sBAAsB;AACnD,UAAI,YAAY,gBAAgB,SAAS,CAAC,EAAE,KAAK,GAAG,WAAW,EAAG,SAAQ,KAAK,SAAS,CAAC,EAAE,KAAK,CAAC;AAAA,IACnG;AAGA,UAAM,UAAU,QAAQ,MAAM,GAAG,EAAE,EAAE,IAAI,OAAK,gBAAgB,GAAG,aAAa,SAAS,CAAC;AACxF,UAAM,UAAU,MAAM,QAAQ,IAAI,OAAO;AACzC,eAAW,QAAQ,SAAS;AAC1B,UAAI,MAAM;AACR,2BAAmB,MAAM,aAAa,IAAI;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAGA,qBAAmB,aAAa,aAAa,IAAI;AAEjD,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,mBAAmB,KAAa,aAAqB,MAAyB;AACrF,QAAM,aAAa,IAAI,MAAM,mDAAmD,KAAK,CAAC;AACtF,aAAW,SAAS,YAAY;AAC9B,UAAM,WAAW,MAAM,MAAM,sBAAsB;AACnD,QAAI,CAAC,SAAU;AACf,UAAM,MAAM,SAAS,CAAC,EAAE,KAAK;AAE7B,QAAI;AACF,YAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,YAAM,YAAY,OAAO,SAAS,QAAQ,UAAU,EAAE,EAAE,YAAY;AACpE,UAAI,cAAc,YAAa;AAC/B,UAAI,oBAAoB,KAAK,OAAO,QAAQ,EAAG;AAC/C,WAAK,IAAI,GAAG;AAAA,IACd,QAAQ;AACN;AAAA,IACF;AAAA,EACF;AACF;AAQO,SAAS,qBAAqB,MAAc,QAA0B;AAC3E,QAAM,cAAc,OAAO,QAAQ,UAAU,EAAE,EAAE,YAAY;AAC7D,QAAM,cAAc,KAAK,MAAM,kBAAkB,KAAK,CAAC;AACvD,QAAM,OAAO,oBAAI,IAAY;AAE7B,aAAW,SAAS,aAAa;AAC/B,UAAM,OAAO,MAAM,MAAM,iBAAiB,IAAI,CAAC;AAC/C,QAAI,CAAC,QAAQ,CAAC,KAAK,KAAK,EAAG;AAE3B,QAAI;AAEJ,QAAI,KAAK,WAAW,IAAI,GAAG;AACzB,gBAAU,SAAS,IAAI;AAAA,IACzB,WAAW,KAAK,WAAW,GAAG,GAAG;AAE/B,UAAI,SAAS,OAAO,KAAK,WAAW,IAAI,EAAG;AAC3C,gBAAU,WAAW,MAAM,GAAG,IAAI;AAAA,IACpC,WAAW,KAAK,WAAW,MAAM,GAAG;AAClC,gBAAU;AAAA,IACZ,WAAW,KAAK,WAAW,GAAG,KAAK,KAAK,WAAW,GAAG,KAAK,KAAK,WAAW,SAAS,KAAK,KAAK,WAAW,MAAM,KAAK,KAAK,WAAW,aAAa,GAAG;AAClJ;AAAA,IACF,OAAO;AAEL,gBAAU,WAAW,MAAM,IAAI,IAAI;AAAA,IACrC;AAEA,QAAI;AACF,YAAM,SAAS,IAAI,IAAI,OAAO;AAC9B,YAAM,aAAa,OAAO,SAAS,QAAQ,UAAU,EAAE,EAAE,YAAY;AACrE,UAAI,eAAe,YAAa;AAGhC,aAAO,OAAO;AACd,YAAM,OAAO,OAAO;AAEpB,UAAI,SAAS,OAAO,SAAS,GAAI;AACjC,UAAI,oBAAoB,KAAK,IAAI,EAAG;AACpC,UAAI,mBAAmB,KAAK,IAAI,EAAG;AAGnC,YAAM,aAAa,OAAO,SAAS,KAAK,QAAQ,QAAQ,EAAE,IAAI,OAAO;AACrE,UAAI,kBAAkB,KAAK,UAAU,EAAG;AACxC,WAAK,IAAI,UAAU;AAAA,IACrB,QAAQ;AACN;AAAA,IACF;AAAA,EACF;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,IAAM,oBAAmD;AAAA,EACvD,CAAC,+DAA+D,MAAM;AAAA,EACtE,CAAC,4CAA4C,OAAO;AAAA,EACpD,CAAC,iCAAiC,SAAS;AAAA,EAC3C,CAAC,yEAAyE,UAAU;AAAA,EACpF,CAAC,0CAA0C,SAAS;AAAA,EACpD,CAAC,yDAAyD,MAAM;AAAA,EAChE,CAAC,wDAAwD,WAAW;AAAA,EACpE,CAAC,sEAAsE,MAAM;AAAA,EAC7E,CAAC,sEAAsE,OAAO;AAAA,EAC9E,CAAC,yCAAyC,KAAK;AACjD;AAKO,SAAS,cAAc,KAA2B;AACvD,MAAI;AACF,UAAM,OAAO,IAAI,IAAI,GAAG,EAAE;AAC1B,eAAW,CAAC,SAAS,QAAQ,KAAK,mBAAmB;AACnD,UAAI,QAAQ,KAAK,IAAI,EAAG,QAAO;AAAA,IACjC;AAAA,EACF,QAAQ;AAAA,EAER;AACA,SAAO;AACT;AASA,eAAsB,cACpB,UACA,SACsB;AACtB,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,WAAW,SAAS,YAAY;AACtC,QAAM,YAAY,SAAS,aAAa;AACxC,QAAM,cAAc,SAAS,eAAe;AAC5C,QAAM,gBAAgB,SAAS,iBAAiB;AAEhD,QAAM,QAAuB,CAAC;AAC9B,QAAM,iBAAiB,oBAAI,IAAY;AACvC,QAAM,cAAc,oBAAI,IAAY;AACpC,QAAM,cAAc,oBAAI,IAAY;AACpC,QAAM,UAAU,oBAAI,IAAY;AAGhC,MAAI,cAA2B,EAAE,UAAU,CAAC,GAAG,OAAO,CAAC,EAAE;AACzD,MAAI,iBAAiB,SAAS,WAAW,MAAM;AAC7C,kBAAc,eAAe,SAAS,UAAU,IAAI;AAAA,EACtD;AAEA,QAAM,UAAU,GAAG,SAAS,QAAQ,MAAM,SAAS,MAAM;AAGzD,UAAQ,IAAI,aAAa,OAAO,CAAC;AACjC,UAAQ,IAAI,aAAa,UAAU,GAAG,CAAC;AACvC,MAAI,SAAS,YAAY;AACvB,eAAW,QAAQ,SAAS,YAAY;AACtC,UAAI,KAAK,SAAU,SAAQ,IAAI,aAAa,KAAK,QAAQ,CAAC;AAAA,IAC5D;AAAA,EACF;AAGA,QAAM,QAAkB,CAAC;AACzB,MAAI,SAAS,YAAY,MAAM;AAC7B,UAAM,cAAc,MAAM;AAAA,MACxB,SAAS,WAAW;AAAA,MACpB,SAAS;AAAA,MACT;AAAA,IACF;AACA,eAAW,OAAO,aAAa;AAC7B,YAAM,OAAO,aAAa,GAAG;AAC7B,UAAI,CAAC,QAAQ,IAAI,IAAI,GAAG;AACtB,uBAAe,IAAI,GAAG;AACtB,YAAI,CAAC,MAAM,SAAS,GAAG,EAAG,OAAM,KAAK,GAAG;AAAA,MAC1C;AAAA,IACF;AAAA,EACF;AAGA,MAAI,SAAS,UAAU,MAAM;AAC3B,UAAM,YAAY,qBAAqB,SAAS,SAAS,MAAM,SAAS,MAAM;AAC9E,eAAW,OAAO,WAAW;AAC3B,YAAM,OAAO,aAAa,GAAG;AAC7B,UAAI,CAAC,QAAQ,IAAI,IAAI,KAAK,CAAC,eAAe,IAAI,GAAG,GAAG;AAClD,uBAAe,IAAI,GAAG;AACtB,YAAI,CAAC,MAAM,SAAS,GAAG,EAAG,OAAM,KAAK,GAAG;AAAA,MAC1C;AAAA,IACF;AAAA,EACF;AAGA,SAAO,MAAM,SAAS,KAAK,YAAY,OAAO,UAAU;AAEtD,UAAM,YAAY,KAAK,IAAI,aAAa,WAAW,YAAY,MAAM,MAAM,MAAM;AACjF,UAAM,QAAkB,CAAC;AAEzB,WAAO,MAAM,SAAS,aAAa,MAAM,SAAS,GAAG;AACnD,YAAM,MAAM,MAAM,MAAM;AACxB,YAAM,OAAO,aAAa,GAAG;AAE7B,UAAI,QAAQ,IAAI,IAAI,EAAG;AACvB,cAAQ,IAAI,IAAI;AAGhB,UAAI,eAAe;AACjB,YAAI;AACF,gBAAM,OAAO,IAAI,IAAI,GAAG,EAAE;AAC1B,cAAI,qBAAqB,MAAM,WAAW,GAAG;AAC3C,wBAAY,IAAI,GAAG;AACnB;AAAA,UACF;AAAA,QACF,QAAQ;AAEN;AAAA,QACF;AAAA,MACF;AAEA,YAAM,KAAK,GAAG;AAAA,IAChB;AAEA,QAAI,MAAM,WAAW,EAAG;AAGxB,UAAM,eAAe,MAAM,QAAQ,IAAI,MAAM,IAAI,SAAO,UAAU,KAAK,SAAS,QAAQ,SAAS,CAAC,CAAC;AAEnG,UAAM,eAAqE,CAAC;AAE5E,aAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,YAAM,SAAS,aAAa,CAAC;AAC7B,YAAM,MAAM,MAAM,CAAC;AACnB,kBAAY,IAAI,GAAG;AACnB,mBAAa,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC,QAAQ,QAAQ,QAAQ,OAAO,CAAC;AAE/D,UAAI,CAAC,OAAQ;AAEb,aAAO,WAAW,cAAc,GAAG;AACnC,YAAM,KAAK,MAAM;AAGjB,YAAM,WAAW,qBAAqB,OAAO,MAAM,SAAS,MAAM;AAClE,iBAAW,QAAQ,UAAU;AAC3B,cAAM,OAAO,aAAa,IAAI;AAC9B,YAAI,CAAC,QAAQ,IAAI,IAAI,KAAK,CAAC,eAAe,IAAI,IAAI,GAAG;AACnD,yBAAe,IAAI,IAAI;AACvB,gBAAM,KAAK,IAAI;AAAA,QACjB;AAAA,MACF;AAAA,IACF;AAGA,QAAI,SAAS,YAAY;AACvB,cAAQ,WAAW;AAAA,QACjB,MAAM;AAAA,QACN,SAAS;AAAA,QACT,SAAS,YAAY;AAAA,QACrB,YAAY,eAAe;AAAA,QAC3B;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAGA,aAAW,OAAO,OAAO;AACvB,QAAI,CAAC,YAAY,IAAI,GAAG,GAAG;AACzB,kBAAY,IAAI,GAAG;AAAA,IACrB;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,MAAM,KAAK,cAAc;AAAA,IACzC,aAAa,MAAM,KAAK,WAAW;AAAA,IACnC,aAAa,MAAM,KAAK,WAAW;AAAA,IACnC,SAAS,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAG,IAAI;AAAA,EACxD;AACF;AAEA,SAAS,aAAa,KAAqB;AACzC,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,GAAG;AAE1B,YAAQ,OAAO,SAAS,OAAO,SAAS,QAAQ,QAAQ,EAAE,IAAI,OAAO,QAAQ,YAAY;AAAA,EAC3F,QAAQ;AACN,WAAO,IAAI,YAAY;AAAA,EACzB;AACF;","names":[]}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/network-guard.ts
|
|
4
|
+
var DEFAULT_USER_AGENT = "AEO-Visibility-Bot/1.0";
|
|
5
|
+
var LOCAL_HOST_SUFFIXES = [".localhost", ".local", ".localdomain", ".internal", ".home.arpa", ".test"];
|
|
6
|
+
var dnsSafetyCache = /* @__PURE__ */ new Map();
|
|
7
|
+
var dnsLookupOverride = null;
|
|
8
|
+
function stripIpv6Brackets(hostname) {
|
|
9
|
+
return hostname.replace(/^\[/, "").replace(/\]$/, "");
|
|
10
|
+
}
|
|
11
|
+
function normalizeHostname(hostname) {
|
|
12
|
+
return stripIpv6Brackets(hostname).replace(/^www\./, "").trim().toLowerCase();
|
|
13
|
+
}
|
|
14
|
+
function parseIpv4(hostname) {
|
|
15
|
+
if (!/^\d{1,3}(?:\.\d{1,3}){3}$/.test(hostname)) return null;
|
|
16
|
+
const parts = hostname.split(".").map(Number);
|
|
17
|
+
return parts.every((part) => Number.isInteger(part) && part >= 0 && part <= 255) ? parts : null;
|
|
18
|
+
}
|
|
19
|
+
function isPrivateOrReservedIpv4(parts) {
|
|
20
|
+
const [a, b, c] = parts;
|
|
21
|
+
if (a === 0 || a === 10 || a === 127) return true;
|
|
22
|
+
if (a === 100 && b >= 64 && b <= 127) return true;
|
|
23
|
+
if (a === 169 && b === 254) return true;
|
|
24
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
25
|
+
if (a === 192 && b === 0) return true;
|
|
26
|
+
if (a === 192 && b === 88 && c === 99) return true;
|
|
27
|
+
if (a === 192 && b === 168) return true;
|
|
28
|
+
if (a === 198 && (b === 18 || b === 19)) return true;
|
|
29
|
+
if (a === 198 && b === 51 && c === 100) return true;
|
|
30
|
+
if (a === 203 && b === 0 && c === 113) return true;
|
|
31
|
+
if (a >= 224) return true;
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
function isBlockedIpv6(hostname) {
|
|
35
|
+
const host = stripIpv6Brackets(hostname).toLowerCase();
|
|
36
|
+
if (!host.includes(":")) return false;
|
|
37
|
+
if (host === "::" || host === "::1") return true;
|
|
38
|
+
if (host.startsWith("fc") || host.startsWith("fd")) return true;
|
|
39
|
+
if (/^fe[89ab]/.test(host)) return true;
|
|
40
|
+
if (host.startsWith("2001:db8")) return true;
|
|
41
|
+
const mappedIpv4 = host.match(/^::ffff:(\d{1,3}(?:\.\d{1,3}){3})$/);
|
|
42
|
+
if (mappedIpv4) {
|
|
43
|
+
const parts = parseIpv4(mappedIpv4[1]);
|
|
44
|
+
return parts ? isPrivateOrReservedIpv4(parts) : true;
|
|
45
|
+
}
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
function isBlockedHostname(hostname) {
|
|
49
|
+
const host = normalizeHostname(hostname);
|
|
50
|
+
if (!host) return true;
|
|
51
|
+
if (host === "localhost" || host === "metadata.google.internal") return true;
|
|
52
|
+
if (LOCAL_HOST_SUFFIXES.some((suffix) => host.endsWith(suffix))) return true;
|
|
53
|
+
const ipv4 = parseIpv4(host);
|
|
54
|
+
if (ipv4) return isPrivateOrReservedIpv4(ipv4);
|
|
55
|
+
if (isBlockedIpv6(host)) return true;
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
function isSameSiteHost(hostname, domain) {
|
|
59
|
+
const host = normalizeHostname(hostname);
|
|
60
|
+
const base = normalizeHostname(domain);
|
|
61
|
+
if (!host || !base) return false;
|
|
62
|
+
return host === base || host.endsWith(`.${base}`);
|
|
63
|
+
}
|
|
64
|
+
function isSafePublicUrl(url, expectedDomain) {
|
|
65
|
+
try {
|
|
66
|
+
const parsed = new URL(url);
|
|
67
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
|
|
68
|
+
if (isBlockedHostname(parsed.hostname)) return false;
|
|
69
|
+
if (expectedDomain && !isSameSiteHost(parsed.hostname, expectedDomain)) return false;
|
|
70
|
+
return true;
|
|
71
|
+
} catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
function isNodeRuntime() {
|
|
76
|
+
return typeof process !== "undefined" && !!process.versions?.node;
|
|
77
|
+
}
|
|
78
|
+
function canUseManualRedirects() {
|
|
79
|
+
return isNodeRuntime();
|
|
80
|
+
}
|
|
81
|
+
function isRedirectStatus(status) {
|
|
82
|
+
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
|
83
|
+
}
|
|
84
|
+
function isIpLiteral(hostname) {
|
|
85
|
+
const host = stripIpv6Brackets(hostname);
|
|
86
|
+
return !!parseIpv4(host) || host.includes(":");
|
|
87
|
+
}
|
|
88
|
+
function isBlockedResolvedAddress(address) {
|
|
89
|
+
const normalizedAddress = stripIpv6Brackets(address).toLowerCase();
|
|
90
|
+
const ipv4 = parseIpv4(normalizedAddress);
|
|
91
|
+
if (ipv4) return isPrivateOrReservedIpv4(ipv4);
|
|
92
|
+
return isBlockedIpv6(normalizedAddress);
|
|
93
|
+
}
|
|
94
|
+
async function loadDnsLookup() {
|
|
95
|
+
if (dnsLookupOverride) return dnsLookupOverride;
|
|
96
|
+
if (!isNodeRuntime()) return null;
|
|
97
|
+
try {
|
|
98
|
+
const mod = "node:dns/promises";
|
|
99
|
+
const dns = await import(
|
|
100
|
+
/* @vite-ignore */
|
|
101
|
+
mod
|
|
102
|
+
);
|
|
103
|
+
return async (hostname) => dns.lookup(hostname, { all: true, verbatim: true });
|
|
104
|
+
} catch {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
async function isDnsResolvedHostSafe(hostname) {
|
|
109
|
+
const host = normalizeHostname(hostname);
|
|
110
|
+
if (!host || isIpLiteral(host)) return true;
|
|
111
|
+
const cached = dnsSafetyCache.get(host);
|
|
112
|
+
if (cached !== void 0) return cached;
|
|
113
|
+
const lookup = await loadDnsLookup();
|
|
114
|
+
if (!lookup) return true;
|
|
115
|
+
try {
|
|
116
|
+
const records = await lookup(host);
|
|
117
|
+
const safe = records.length > 0 && records.every((record) => !isBlockedResolvedAddress(record.address));
|
|
118
|
+
dnsSafetyCache.set(host, safe);
|
|
119
|
+
return safe;
|
|
120
|
+
} catch {
|
|
121
|
+
dnsSafetyCache.set(host, false);
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async function isSafeFetchTarget(url, expectedDomain) {
|
|
126
|
+
if (!isSafePublicUrl(url, expectedDomain)) return false;
|
|
127
|
+
try {
|
|
128
|
+
const parsed = new URL(url);
|
|
129
|
+
return await isDnsResolvedHostSafe(parsed.hostname);
|
|
130
|
+
} catch {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
async function safeFetch(url, options = {}) {
|
|
135
|
+
const timeoutMs = options.timeoutMs ?? 15e3;
|
|
136
|
+
const userAgent = options.userAgent ?? DEFAULT_USER_AGENT;
|
|
137
|
+
const maxRedirects = options.maxRedirects ?? 5;
|
|
138
|
+
const expectedDomain = options.expectedDomain ?? null;
|
|
139
|
+
const manualRedirects = canUseManualRedirects();
|
|
140
|
+
const redirectMode = manualRedirects ? "manual" : "error";
|
|
141
|
+
let currentUrl = url;
|
|
142
|
+
for (let redirects = 0; redirects <= maxRedirects; redirects++) {
|
|
143
|
+
if (!await isSafeFetchTarget(currentUrl, expectedDomain)) return null;
|
|
144
|
+
let res;
|
|
145
|
+
try {
|
|
146
|
+
res = await fetch(currentUrl, {
|
|
147
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
148
|
+
headers: { "User-Agent": userAgent },
|
|
149
|
+
redirect: redirectMode
|
|
150
|
+
});
|
|
151
|
+
} catch {
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
if (!manualRedirects) {
|
|
155
|
+
return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
|
|
156
|
+
}
|
|
157
|
+
if (isRedirectStatus(res.status)) {
|
|
158
|
+
if (redirects === maxRedirects) return null;
|
|
159
|
+
const location = res.headers.get("location");
|
|
160
|
+
if (!location) return null;
|
|
161
|
+
try {
|
|
162
|
+
currentUrl = new URL(location, currentUrl).toString();
|
|
163
|
+
} catch {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
return await isSafeFetchTarget(res.url || currentUrl, expectedDomain) ? res : null;
|
|
169
|
+
}
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export {
|
|
174
|
+
normalizeHostname,
|
|
175
|
+
isSafePublicUrl,
|
|
176
|
+
isSafeFetchTarget,
|
|
177
|
+
safeFetch
|
|
178
|
+
};
|
|
179
|
+
//# sourceMappingURL=chunk-PYV5JVTC.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/network-guard.ts"],"sourcesContent":["export interface SafeFetchOptions {\n timeoutMs?: number;\n userAgent?: string;\n expectedDomain?: string | null;\n maxRedirects?: number;\n}\n\ninterface DnsLookupResult {\n address: string;\n family?: number;\n}\n\ntype DnsLookupFn = (hostname: string) => Promise<DnsLookupResult[]>;\n\nconst DEFAULT_USER_AGENT = 'AEO-Visibility-Bot/1.0';\nconst LOCAL_HOST_SUFFIXES = ['.localhost', '.local', '.localdomain', '.internal', '.home.arpa', '.test'];\nconst dnsSafetyCache = new Map<string, boolean>();\nlet dnsLookupOverride: DnsLookupFn | null = null;\n\nfunction stripIpv6Brackets(hostname: string): string {\n return hostname.replace(/^\\[/, '').replace(/\\]$/, '');\n}\n\nexport function normalizeHostname(hostname: string): string {\n return stripIpv6Brackets(hostname).replace(/^www\\./, '').trim().toLowerCase();\n}\n\nfunction parseIpv4(hostname: string): number[] | null {\n if (!/^\\d{1,3}(?:\\.\\d{1,3}){3}$/.test(hostname)) return null;\n const parts = hostname.split('.').map(Number);\n return parts.every(part => Number.isInteger(part) && part >= 0 && part <= 255) ? parts : null;\n}\n\nfunction isPrivateOrReservedIpv4(parts: number[]): boolean {\n const [a, b, c] = parts;\n\n if (a === 0 || a === 10 || a === 127) return true;\n if (a === 100 && b >= 64 && b <= 127) return true;\n if (a === 169 && b === 254) return true;\n if (a === 172 && b >= 16 && b <= 31) return true;\n if (a === 192 && b === 0) return true;\n if (a === 192 && b === 88 && c === 99) return true;\n if (a === 192 && b === 168) return true;\n if (a === 198 && (b === 18 || b === 19)) return true;\n if (a === 198 && b === 51 && c === 100) return true;\n if (a === 203 && b === 0 && c === 113) return true;\n if (a >= 224) return true;\n\n return false;\n}\n\nfunction isBlockedIpv6(hostname: string): boolean {\n const host = stripIpv6Brackets(hostname).toLowerCase();\n if (!host.includes(':')) return false;\n\n if (host === '::' || host === '::1') return true;\n if (host.startsWith('fc') || host.startsWith('fd')) return true;\n if (/^fe[89ab]/.test(host)) return true;\n if (host.startsWith('2001:db8')) return true;\n\n const mappedIpv4 = host.match(/^::ffff:(\\d{1,3}(?:\\.\\d{1,3}){3})$/);\n if (mappedIpv4) {\n const parts = parseIpv4(mappedIpv4[1]);\n return parts ? isPrivateOrReservedIpv4(parts) : true;\n }\n\n return false;\n}\n\nexport function isBlockedHostname(hostname: string): boolean {\n const host = normalizeHostname(hostname);\n if (!host) return true;\n\n if (host === 'localhost' || host === 'metadata.google.internal') return true;\n if (LOCAL_HOST_SUFFIXES.some(suffix => host.endsWith(suffix))) return true;\n\n const ipv4 = parseIpv4(host);\n if (ipv4) return isPrivateOrReservedIpv4(ipv4);\n\n if (isBlockedIpv6(host)) return true;\n\n return false;\n}\n\nexport function isSameSiteHost(hostname: string, domain: string): boolean {\n const host = normalizeHostname(hostname);\n const base = normalizeHostname(domain);\n if (!host || !base) return false;\n return host === base || host.endsWith(`.${base}`);\n}\n\nexport function isSafePublicUrl(url: string, expectedDomain?: string | null): boolean {\n try {\n const parsed = new URL(url);\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;\n if (isBlockedHostname(parsed.hostname)) return false;\n if (expectedDomain && !isSameSiteHost(parsed.hostname, expectedDomain)) return false;\n return true;\n } catch {\n return false;\n }\n}\n\nfunction isNodeRuntime(): boolean {\n return typeof process !== 'undefined' && !!process.versions?.node;\n}\n\nfunction canUseManualRedirects(): boolean {\n return isNodeRuntime();\n}\n\nfunction isRedirectStatus(status: number): boolean {\n return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;\n}\n\nfunction isIpLiteral(hostname: string): boolean {\n const host = stripIpv6Brackets(hostname);\n return !!parseIpv4(host) || host.includes(':');\n}\n\nfunction isBlockedResolvedAddress(address: string): boolean {\n const normalizedAddress = stripIpv6Brackets(address).toLowerCase();\n const ipv4 = parseIpv4(normalizedAddress);\n if (ipv4) return isPrivateOrReservedIpv4(ipv4);\n return isBlockedIpv6(normalizedAddress);\n}\n\nasync function loadDnsLookup(): Promise<DnsLookupFn | null> {\n if (dnsLookupOverride) return dnsLookupOverride;\n if (!isNodeRuntime()) return null;\n\n try {\n const mod = 'node:dns/promises';\n const dns = await import(/* @vite-ignore */ mod);\n return async (hostname: string) => dns.lookup(hostname, { all: true, verbatim: true });\n } catch {\n return null;\n }\n}\n\nasync function isDnsResolvedHostSafe(hostname: string): Promise<boolean> {\n const host = normalizeHostname(hostname);\n if (!host || isIpLiteral(host)) return true;\n\n const cached = dnsSafetyCache.get(host);\n if (cached !== undefined) return cached;\n\n const lookup = await loadDnsLookup();\n if (!lookup) return true;\n\n try {\n const records = await lookup(host);\n const safe = records.length > 0 && records.every(record => !isBlockedResolvedAddress(record.address));\n dnsSafetyCache.set(host, safe);\n return safe;\n } catch {\n dnsSafetyCache.set(host, false);\n return false;\n }\n}\n\nexport async function isSafeFetchTarget(url: string, expectedDomain?: string | null): Promise<boolean> {\n if (!isSafePublicUrl(url, expectedDomain)) return false;\n\n try {\n const parsed = new URL(url);\n return await isDnsResolvedHostSafe(parsed.hostname);\n } catch {\n return false;\n }\n}\n\nexport function __setDnsLookupForTests(lookup: DnsLookupFn | null): void {\n dnsLookupOverride = lookup;\n dnsSafetyCache.clear();\n}\n\nexport function __clearDnsSafetyCacheForTests(): void {\n dnsSafetyCache.clear();\n}\n\nexport async function safeFetch(url: string, options: SafeFetchOptions = {}): Promise<Response | null> {\n const timeoutMs = options.timeoutMs ?? 15000;\n const userAgent = options.userAgent ?? DEFAULT_USER_AGENT;\n const maxRedirects = options.maxRedirects ?? 5;\n const expectedDomain = options.expectedDomain ?? null;\n const manualRedirects = canUseManualRedirects();\n const redirectMode = manualRedirects ? 'manual' : 'error';\n\n let currentUrl = url;\n\n for (let redirects = 0; redirects <= maxRedirects; redirects++) {\n if (!(await isSafeFetchTarget(currentUrl, expectedDomain))) return null;\n\n let res: Response;\n try {\n res = await fetch(currentUrl, {\n signal: AbortSignal.timeout(timeoutMs),\n headers: { 'User-Agent': userAgent },\n redirect: redirectMode,\n });\n } catch {\n return null;\n }\n\n if (!manualRedirects) {\n return (await isSafeFetchTarget(res.url || currentUrl, expectedDomain)) ? res : null;\n }\n\n if (isRedirectStatus(res.status)) {\n if (redirects === maxRedirects) return null;\n const location = res.headers.get('location');\n if (!location) return null;\n try {\n currentUrl = new URL(location, currentUrl).toString();\n } catch {\n return null;\n }\n continue;\n }\n\n return (await isSafeFetchTarget(res.url || currentUrl, expectedDomain)) ? res : null;\n }\n\n return null;\n}\n"],"mappings":";;;AAcA,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB,CAAC,cAAc,UAAU,gBAAgB,aAAa,cAAc,OAAO;AACvG,IAAM,iBAAiB,oBAAI,IAAqB;AAChD,IAAI,oBAAwC;AAE5C,SAAS,kBAAkB,UAA0B;AACnD,SAAO,SAAS,QAAQ,OAAO,EAAE,EAAE,QAAQ,OAAO,EAAE;AACtD;AAEO,SAAS,kBAAkB,UAA0B;AAC1D,SAAO,kBAAkB,QAAQ,EAAE,QAAQ,UAAU,EAAE,EAAE,KAAK,EAAE,YAAY;AAC9E;AAEA,SAAS,UAAU,UAAmC;AACpD,MAAI,CAAC,4BAA4B,KAAK,QAAQ,EAAG,QAAO;AACxD,QAAM,QAAQ,SAAS,MAAM,GAAG,EAAE,IAAI,MAAM;AAC5C,SAAO,MAAM,MAAM,UAAQ,OAAO,UAAU,IAAI,KAAK,QAAQ,KAAK,QAAQ,GAAG,IAAI,QAAQ;AAC3F;AAEA,SAAS,wBAAwB,OAA0B;AACzD,QAAM,CAAC,GAAG,GAAG,CAAC,IAAI;AAElB,MAAI,MAAM,KAAK,MAAM,MAAM,MAAM,IAAK,QAAO;AAC7C,MAAI,MAAM,OAAO,KAAK,MAAM,KAAK,IAAK,QAAO;AAC7C,MAAI,MAAM,OAAO,MAAM,IAAK,QAAO;AACnC,MAAI,MAAM,OAAO,KAAK,MAAM,KAAK,GAAI,QAAO;AAC5C,MAAI,MAAM,OAAO,MAAM,EAAG,QAAO;AACjC,MAAI,MAAM,OAAO,MAAM,MAAM,MAAM,GAAI,QAAO;AAC9C,MAAI,MAAM,OAAO,MAAM,IAAK,QAAO;AACnC,MAAI,MAAM,QAAQ,MAAM,MAAM,MAAM,IAAK,QAAO;AAChD,MAAI,MAAM,OAAO,MAAM,MAAM,MAAM,IAAK,QAAO;AAC/C,MAAI,MAAM,OAAO,MAAM,KAAK,MAAM,IAAK,QAAO;AAC9C,MAAI,KAAK,IAAK,QAAO;AAErB,SAAO;AACT;AAEA,SAAS,cAAc,UAA2B;AAChD,QAAM,OAAO,kBAAkB,QAAQ,EAAE,YAAY;AACrD,MAAI,CAAC,KAAK,SAAS,GAAG,EAAG,QAAO;AAEhC,MAAI,SAAS,QAAQ,SAAS,MAAO,QAAO;AAC5C,MAAI,KAAK,WAAW,IAAI,KAAK,KAAK,WAAW,IAAI,EAAG,QAAO;AAC3D,MAAI,YAAY,KAAK,IAAI,EAAG,QAAO;AACnC,MAAI,KAAK,WAAW,UAAU,EAAG,QAAO;AAExC,QAAM,aAAa,KAAK,MAAM,oCAAoC;AAClE,MAAI,YAAY;AACd,UAAM,QAAQ,UAAU,WAAW,CAAC,CAAC;AACrC,WAAO,QAAQ,wBAAwB,KAAK,IAAI;AAAA,EAClD;AAEA,SAAO;AACT;AAEO,SAAS,kBAAkB,UAA2B;AAC3D,QAAM,OAAO,kBAAkB,QAAQ;AACvC,MAAI,CAAC,KAAM,QAAO;AAElB,MAAI,SAAS,eAAe,SAAS,2BAA4B,QAAO;AACxE,MAAI,oBAAoB,KAAK,YAAU,KAAK,SAAS,MAAM,CAAC,EAAG,QAAO;AAEtE,QAAM,OAAO,UAAU,IAAI;AAC3B,MAAI,KAAM,QAAO,wBAAwB,IAAI;AAE7C,MAAI,cAAc,IAAI,EAAG,QAAO;AAEhC,SAAO;AACT;AAEO,SAAS,eAAe,UAAkB,QAAyB;AACxE,QAAM,OAAO,kBAAkB,QAAQ;AACvC,QAAM,OAAO,kBAAkB,MAAM;AACrC,MAAI,CAAC,QAAQ,CAAC,KAAM,QAAO;AAC3B,SAAO,SAAS,QAAQ,KAAK,SAAS,IAAI,IAAI,EAAE;AAClD;AAEO,SAAS,gBAAgB,KAAa,gBAAyC;AACpF,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,QAAI,OAAO,aAAa,WAAW,OAAO,aAAa,SAAU,QAAO;AACxE,QAAI,kBAAkB,OAAO,QAAQ,EAAG,QAAO;AAC/C,QAAI,kBAAkB,CAAC,eAAe,OAAO,UAAU,cAAc,EAAG,QAAO;AAC/E,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,gBAAyB;AAChC,SAAO,OAAO,YAAY,eAAe,CAAC,CAAC,QAAQ,UAAU;AAC/D;AAEA,SAAS,wBAAiC;AACxC,SAAO,cAAc;AACvB;AAEA,SAAS,iBAAiB,QAAyB;AACjD,SAAO,WAAW,OAAO,WAAW,OAAO,WAAW,OAAO,WAAW,OAAO,WAAW;AAC5F;AAEA,SAAS,YAAY,UAA2B;AAC9C,QAAM,OAAO,kBAAkB,QAAQ;AACvC,SAAO,CAAC,CAAC,UAAU,IAAI,KAAK,KAAK,SAAS,GAAG;AAC/C;AAEA,SAAS,yBAAyB,SAA0B;AAC1D,QAAM,oBAAoB,kBAAkB,OAAO,EAAE,YAAY;AACjE,QAAM,OAAO,UAAU,iBAAiB;AACxC,MAAI,KAAM,QAAO,wBAAwB,IAAI;AAC7C,SAAO,cAAc,iBAAiB;AACxC;AAEA,eAAe,gBAA6C;AAC1D,MAAI,kBAAmB,QAAO;AAC9B,MAAI,CAAC,cAAc,EAAG,QAAO;AAE7B,MAAI;AACF,UAAM,MAAM;AACZ,UAAM,MAAM,MAAM;AAAA;AAAA,MAA0B;AAAA;AAC5C,WAAO,OAAO,aAAqB,IAAI,OAAO,UAAU,EAAE,KAAK,MAAM,UAAU,KAAK,CAAC;AAAA,EACvF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,sBAAsB,UAAoC;AACvE,QAAM,OAAO,kBAAkB,QAAQ;AACvC,MAAI,CAAC,QAAQ,YAAY,IAAI,EAAG,QAAO;AAEvC,QAAM,SAAS,eAAe,IAAI,IAAI;AACtC,MAAI,WAAW,OAAW,QAAO;AAEjC,QAAM,SAAS,MAAM,cAAc;AACnC,MAAI,CAAC,OAAQ,QAAO;AAEpB,MAAI;AACF,UAAM,UAAU,MAAM,OAAO,IAAI;AACjC,UAAM,OAAO,QAAQ,SAAS,KAAK,QAAQ,MAAM,YAAU,CAAC,yBAAyB,OAAO,OAAO,CAAC;AACpG,mBAAe,IAAI,MAAM,IAAI;AAC7B,WAAO;AAAA,EACT,QAAQ;AACN,mBAAe,IAAI,MAAM,KAAK;AAC9B,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,kBAAkB,KAAa,gBAAkD;AACrG,MAAI,CAAC,gBAAgB,KAAK,cAAc,EAAG,QAAO;AAElD,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,WAAO,MAAM,sBAAsB,OAAO,QAAQ;AAAA,EACpD,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAWA,eAAsB,UAAU,KAAa,UAA4B,CAAC,GAA6B;AACrG,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,kBAAkB,sBAAsB;AAC9C,QAAM,eAAe,kBAAkB,WAAW;AAElD,MAAI,aAAa;AAEjB,WAAS,YAAY,GAAG,aAAa,cAAc,aAAa;AAC9D,QAAI,CAAE,MAAM,kBAAkB,YAAY,cAAc,EAAI,QAAO;AAEnE,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,YAAY;AAAA,QAC5B,QAAQ,YAAY,QAAQ,SAAS;AAAA,QACrC,SAAS,EAAE,cAAc,UAAU;AAAA,QACnC,UAAU;AAAA,MACZ,CAAC;AAAA,IACH,QAAQ;AACN,aAAO;AAAA,IACT;AAEA,QAAI,CAAC,iBAAiB;AACpB,aAAQ,MAAM,kBAAkB,IAAI,OAAO,YAAY,cAAc,IAAK,MAAM;AAAA,IAClF;AAEA,QAAI,iBAAiB,IAAI,MAAM,GAAG;AAChC,UAAI,cAAc,aAAc,QAAO;AACvC,YAAM,WAAW,IAAI,QAAQ,IAAI,UAAU;AAC3C,UAAI,CAAC,SAAU,QAAO;AACtB,UAAI;AACF,qBAAa,IAAI,IAAI,UAAU,UAAU,EAAE,SAAS;AAAA,MACtD,QAAQ;AACN,eAAO;AAAA,MACT;AACA;AAAA,IACF;AAEA,WAAQ,MAAM,kBAAkB,IAAI,OAAO,YAAY,cAAc,IAAK,MAAM;AAAA,EAClF;AAEA,SAAO;AACT;","names":[]}
|