@j0hanz/fetch-url-mcp 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +2 -3
- package/dist/cli.js +1 -2
- package/dist/http/auth.d.ts +5 -3
- package/dist/http/auth.js +64 -15
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -4
- package/dist/http/helpers.js +21 -21
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +34 -26
- package/dist/http/rate-limit.d.ts +0 -1
- package/dist/http/rate-limit.js +3 -4
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -18
- package/dist/lib/{markdown-cleanup.d.ts → content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/lib/{tool-pipeline.d.ts → fetch-pipeline.d.ts} +1 -2
- package/dist/lib/{tool-pipeline.js → fetch-pipeline.js} +10 -19
- package/dist/lib/{fetch.d.ts → http.d.ts} +7 -9
- package/dist/lib/{fetch.js → http.js} +706 -944
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/lib/{tool-progress.d.ts → progress.d.ts} +0 -1
- package/dist/lib/{tool-progress.js → progress.js} +8 -13
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/lib/{mcp.js → task-handlers.js} +56 -12
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/prompts/index.d.ts +0 -1
- package/dist/prompts/index.js +0 -1
- package/dist/resources/index.d.ts +0 -1
- package/dist/resources/index.js +74 -33
- package/dist/resources/instructions.d.ts +0 -1
- package/dist/resources/instructions.js +2 -2
- package/dist/schemas/inputs.d.ts +0 -1
- package/dist/schemas/inputs.js +2 -3
- package/dist/schemas/outputs.d.ts +0 -1
- package/dist/schemas/outputs.js +1 -2
- package/dist/server.d.ts +0 -1
- package/dist/server.js +16 -26
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +27 -24
- package/dist/tasks/manager.d.ts +7 -3
- package/dist/tasks/manager.js +53 -34
- package/dist/tasks/owner.d.ts +1 -2
- package/dist/tasks/owner.js +1 -2
- package/dist/tasks/tool-registry.d.ts +1 -2
- package/dist/tasks/tool-registry.js +0 -1
- package/dist/tools/fetch-url.d.ts +1 -2
- package/dist/tools/fetch-url.js +39 -31
- package/dist/tools/index.d.ts +0 -1
- package/dist/tools/index.js +0 -1
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +24 -641
- package/dist/transform/types.d.ts +9 -11
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +0 -1
- package/dist/transform/workers/shared.js +1 -2
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +0 -1
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +0 -1
- package/package.json +6 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/cache.d.ts +0 -54
- package/dist/lib/cache.d.ts.map +0 -1
- package/dist/lib/cache.js +0 -264
- package/dist/lib/cache.js.map +0 -1
- package/dist/lib/config.d.ts +0 -143
- package/dist/lib/config.d.ts.map +0 -1
- package/dist/lib/config.js +0 -476
- package/dist/lib/config.js.map +0 -1
- package/dist/lib/crypto.d.ts +0 -4
- package/dist/lib/crypto.d.ts.map +0 -1
- package/dist/lib/crypto.js +0 -56
- package/dist/lib/crypto.js.map +0 -1
- package/dist/lib/dom-noise-removal.d.ts +0 -2
- package/dist/lib/dom-noise-removal.d.ts.map +0 -1
- package/dist/lib/dom-noise-removal.js +0 -494
- package/dist/lib/dom-noise-removal.js.map +0 -1
- package/dist/lib/download.d.ts +0 -4
- package/dist/lib/download.d.ts.map +0 -1
- package/dist/lib/download.js +0 -106
- package/dist/lib/download.js.map +0 -1
- package/dist/lib/errors.d.ts +0 -14
- package/dist/lib/errors.d.ts.map +0 -1
- package/dist/lib/errors.js +0 -72
- package/dist/lib/errors.js.map +0 -1
- package/dist/lib/fetch-content.d.ts +0 -5
- package/dist/lib/fetch-content.d.ts.map +0 -1
- package/dist/lib/fetch-content.js +0 -164
- package/dist/lib/fetch-content.js.map +0 -1
- package/dist/lib/fetch-stream.d.ts +0 -5
- package/dist/lib/fetch-stream.d.ts.map +0 -1
- package/dist/lib/fetch-stream.js +0 -29
- package/dist/lib/fetch-stream.js.map +0 -1
- package/dist/lib/fetch.d.ts.map +0 -1
- package/dist/lib/fetch.js.map +0 -1
- package/dist/lib/host-normalization.d.ts +0 -2
- package/dist/lib/host-normalization.d.ts.map +0 -1
- package/dist/lib/host-normalization.js +0 -91
- package/dist/lib/host-normalization.js.map +0 -1
- package/dist/lib/ip-blocklist.d.ts +0 -9
- package/dist/lib/ip-blocklist.d.ts.map +0 -1
- package/dist/lib/ip-blocklist.js +0 -79
- package/dist/lib/ip-blocklist.js.map +0 -1
- package/dist/lib/json.d.ts +0 -2
- package/dist/lib/json.d.ts.map +0 -1
- package/dist/lib/json.js +0 -45
- package/dist/lib/json.js.map +0 -1
- package/dist/lib/language-detection.d.ts +0 -3
- package/dist/lib/language-detection.d.ts.map +0 -1
- package/dist/lib/language-detection.js +0 -355
- package/dist/lib/language-detection.js.map +0 -1
- package/dist/lib/markdown-cleanup.d.ts.map +0 -1
- package/dist/lib/markdown-cleanup.js +0 -532
- package/dist/lib/markdown-cleanup.js.map +0 -1
- package/dist/lib/mcp-lifecycle.d.ts +0 -5
- package/dist/lib/mcp-lifecycle.d.ts.map +0 -1
- package/dist/lib/mcp-lifecycle.js +0 -51
- package/dist/lib/mcp-lifecycle.js.map +0 -1
- package/dist/lib/mcp-validator.d.ts +0 -17
- package/dist/lib/mcp-validator.d.ts.map +0 -1
- package/dist/lib/mcp-validator.js +0 -45
- package/dist/lib/mcp-validator.js.map +0 -1
- package/dist/lib/mcp.d.ts +0 -4
- package/dist/lib/mcp.d.ts.map +0 -1
- package/dist/lib/mcp.js.map +0 -1
- package/dist/lib/observability.d.ts +0 -23
- package/dist/lib/observability.d.ts.map +0 -1
- package/dist/lib/observability.js +0 -238
- package/dist/lib/observability.js.map +0 -1
- package/dist/lib/server-tuning.d.ts +0 -15
- package/dist/lib/server-tuning.d.ts.map +0 -1
- package/dist/lib/server-tuning.js +0 -49
- package/dist/lib/server-tuning.js.map +0 -1
- package/dist/lib/session.d.ts +0 -45
- package/dist/lib/session.d.ts.map +0 -1
- package/dist/lib/session.js +0 -263
- package/dist/lib/session.js.map +0 -1
- package/dist/lib/timer-utils.d.ts +0 -13
- package/dist/lib/timer-utils.d.ts.map +0 -1
- package/dist/lib/timer-utils.js +0 -44
- package/dist/lib/timer-utils.js.map +0 -1
- package/dist/lib/tool-errors.d.ts +0 -12
- package/dist/lib/tool-errors.d.ts.map +0 -1
- package/dist/lib/tool-errors.js +0 -55
- package/dist/lib/tool-errors.js.map +0 -1
- package/dist/lib/tool-pipeline.d.ts.map +0 -1
- package/dist/lib/tool-pipeline.js.map +0 -1
- package/dist/lib/tool-progress.d.ts.map +0 -1
- package/dist/lib/tool-progress.js.map +0 -1
- package/dist/lib/type-guards.d.ts +0 -16
- package/dist/lib/type-guards.d.ts.map +0 -1
- package/dist/lib/type-guards.js +0 -13
- package/dist/lib/type-guards.js.map +0 -1
- package/dist/prompts/index.d.ts.map +0 -1
- package/dist/prompts/index.js.map +0 -1
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js.map +0 -1
- package/dist/resources/instructions.d.ts.map +0 -1
- package/dist/resources/instructions.js.map +0 -1
- package/dist/schemas/inputs.d.ts.map +0 -1
- package/dist/schemas/inputs.js.map +0 -1
- package/dist/schemas/outputs.d.ts.map +0 -1
- package/dist/schemas/outputs.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/tasks/tool-registry.d.ts.map +0 -1
- package/dist/tasks/tool-registry.js.map +0 -1
- package/dist/tools/fetch-url.d.ts.map +0 -1
- package/dist/tools/fetch-url.js.map +0 -1
- package/dist/tools/index.d.ts.map +0 -1
- package/dist/tools/index.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/shared.d.ts.map +0 -1
- package/dist/transform/workers/shared.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
package/dist/lib/url.js
ADDED
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
import dns from 'node:dns';
|
|
2
|
+
import { BlockList, isIP, SocketAddress } from 'node:net';
|
|
3
|
+
import { domainToASCII } from 'node:url';
|
|
4
|
+
import { logDebug } from './core.js';
|
|
5
|
+
import { createErrorWithCode, isError, isSystemError } from './utils.js';
|
|
6
|
+
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
7
|
+
const CNAME_LOOKUP_MAX_DEPTH = 5;
|
|
8
|
+
function normalizeDnsName(value) {
|
|
9
|
+
const normalized = value.trim().toLowerCase().replace(/\.+$/, '');
|
|
10
|
+
return normalized;
|
|
11
|
+
}
|
|
12
|
+
function createSignalAbortRace(signal, isAbort, onTimeout, onAbort) {
|
|
13
|
+
let abortListener = null;
|
|
14
|
+
const abortPromise = new Promise((_, reject) => {
|
|
15
|
+
abortListener = () => {
|
|
16
|
+
reject(isAbort() ? onAbort() : onTimeout());
|
|
17
|
+
};
|
|
18
|
+
signal.addEventListener('abort', abortListener, { once: true });
|
|
19
|
+
if (signal.aborted)
|
|
20
|
+
abortListener();
|
|
21
|
+
});
|
|
22
|
+
const cleanup = () => {
|
|
23
|
+
if (!abortListener)
|
|
24
|
+
return;
|
|
25
|
+
try {
|
|
26
|
+
signal.removeEventListener('abort', abortListener);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
// Ignore listener cleanup failures; they are non-fatal by design.
|
|
30
|
+
}
|
|
31
|
+
abortListener = null;
|
|
32
|
+
};
|
|
33
|
+
return { abortPromise, cleanup };
|
|
34
|
+
}
|
|
35
|
+
async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
|
|
36
|
+
const timeoutSignal = timeoutMs > 0 ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
37
|
+
const raceSignal = signal && timeoutSignal
|
|
38
|
+
? AbortSignal.any([signal, timeoutSignal])
|
|
39
|
+
: (signal ?? timeoutSignal);
|
|
40
|
+
if (!raceSignal)
|
|
41
|
+
return promise;
|
|
42
|
+
const abortRace = createSignalAbortRace(raceSignal, () => signal?.aborted === true, onTimeout, onAbort ?? (() => new Error('Request was canceled')));
|
|
43
|
+
try {
|
|
44
|
+
return await Promise.race([promise, abortRace.abortPromise]);
|
|
45
|
+
}
|
|
46
|
+
finally {
|
|
47
|
+
abortRace.cleanup();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function createAbortSignalError() {
|
|
51
|
+
const err = new Error('Request was canceled');
|
|
52
|
+
err.name = 'AbortError';
|
|
53
|
+
return err;
|
|
54
|
+
}
|
|
55
|
+
export class SafeDnsResolver {
|
|
56
|
+
ipBlocker;
|
|
57
|
+
security;
|
|
58
|
+
blockedHostSuffixes;
|
|
59
|
+
constructor(ipBlocker, security, blockedHostSuffixes) {
|
|
60
|
+
this.ipBlocker = ipBlocker;
|
|
61
|
+
this.security = security;
|
|
62
|
+
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
63
|
+
}
|
|
64
|
+
async resolveAndValidate(hostname, signal) {
|
|
65
|
+
const normalizedHostname = normalizeDnsName(hostname.replace(/^\[|\]$/g, ''));
|
|
66
|
+
if (!normalizedHostname) {
|
|
67
|
+
throw createErrorWithCode('Invalid hostname provided', 'EINVAL');
|
|
68
|
+
}
|
|
69
|
+
if (signal?.aborted) {
|
|
70
|
+
throw createAbortSignalError();
|
|
71
|
+
}
|
|
72
|
+
if (this.isBlockedHostname(normalizedHostname)) {
|
|
73
|
+
throw createErrorWithCode(`Blocked host: ${normalizedHostname}. Internal hosts are not allowed`, 'EBLOCKED');
|
|
74
|
+
}
|
|
75
|
+
if (isIP(normalizedHostname)) {
|
|
76
|
+
if (isCloudMetadataHost(normalizedHostname)) {
|
|
77
|
+
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
|
|
78
|
+
}
|
|
79
|
+
if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
|
|
80
|
+
this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
81
|
+
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
82
|
+
}
|
|
83
|
+
return normalizedHostname;
|
|
84
|
+
}
|
|
85
|
+
await this.assertNoBlockedCname(normalizedHostname, signal);
|
|
86
|
+
const resultPromise = dns.promises.lookup(normalizedHostname, {
|
|
87
|
+
all: true,
|
|
88
|
+
order: 'verbatim',
|
|
89
|
+
});
|
|
90
|
+
const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
91
|
+
if (addresses.length === 0 || !addresses[0]) {
|
|
92
|
+
throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
|
|
93
|
+
}
|
|
94
|
+
for (const addr of addresses) {
|
|
95
|
+
if (addr.family !== 4 && addr.family !== 6) {
|
|
96
|
+
throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
|
|
97
|
+
}
|
|
98
|
+
if (isCloudMetadataHost(addr.address)) {
|
|
99
|
+
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
100
|
+
}
|
|
101
|
+
if (!isLocalFetchAllowed() && this.ipBlocker.isBlockedIp(addr.address)) {
|
|
102
|
+
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return addresses[0].address;
|
|
106
|
+
}
|
|
107
|
+
isBlockedHostname(hostname) {
|
|
108
|
+
if (isCloudMetadataHost(hostname))
|
|
109
|
+
return true;
|
|
110
|
+
if (isLocalFetchAllowed())
|
|
111
|
+
return false;
|
|
112
|
+
if (this.security.blockedHosts.has(hostname))
|
|
113
|
+
return true;
|
|
114
|
+
return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
115
|
+
}
|
|
116
|
+
async assertNoBlockedCname(hostname, signal) {
|
|
117
|
+
let current = hostname;
|
|
118
|
+
const seen = new Set();
|
|
119
|
+
for (let depth = 0; depth < CNAME_LOOKUP_MAX_DEPTH; depth += 1) {
|
|
120
|
+
if (!current || seen.has(current))
|
|
121
|
+
return;
|
|
122
|
+
seen.add(current);
|
|
123
|
+
const cnames = await this.resolveCname(current, signal);
|
|
124
|
+
if (cnames.length === 0)
|
|
125
|
+
return;
|
|
126
|
+
for (const cname of cnames) {
|
|
127
|
+
if (this.isBlockedHostname(cname)) {
|
|
128
|
+
throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
current = cnames[0] ?? '';
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
async resolveCname(hostname, signal) {
|
|
135
|
+
try {
|
|
136
|
+
const resultPromise = dns.promises.resolveCname(hostname);
|
|
137
|
+
const cnames = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS CNAME lookup timed out for ${hostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
138
|
+
return cnames
|
|
139
|
+
.map((value) => normalizeDnsName(value))
|
|
140
|
+
.filter((value) => value.length > 0);
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
if (isError(error) && error.name === 'AbortError') {
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
if (isSystemError(error) &&
|
|
147
|
+
(error.code === 'ENODATA' ||
|
|
148
|
+
error.code === 'ENOTFOUND' ||
|
|
149
|
+
error.code === 'ENODOMAIN')) {
|
|
150
|
+
return [];
|
|
151
|
+
}
|
|
152
|
+
logDebug('DNS CNAME lookup failed; continuing with address lookup', {
|
|
153
|
+
hostname,
|
|
154
|
+
...(isSystemError(error) ? { code: error.code } : {}),
|
|
155
|
+
});
|
|
156
|
+
return [];
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
function extractHostname(url) {
|
|
161
|
+
try {
|
|
162
|
+
return new URL(url).hostname;
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
throw createErrorWithCode('Invalid URL', 'EINVAL');
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
export function createDnsPreflight(dnsResolver) {
|
|
169
|
+
return async (url, signal) => {
|
|
170
|
+
const hostname = extractHostname(url);
|
|
171
|
+
return await dnsResolver.resolveAndValidate(hostname, signal);
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
export function normalizeHost(value) {
|
|
175
|
+
const trimmedLower = trimToNull(value)?.toLowerCase();
|
|
176
|
+
if (!trimmedLower)
|
|
177
|
+
return null;
|
|
178
|
+
const first = takeFirstHostValue(trimmedLower);
|
|
179
|
+
if (!first)
|
|
180
|
+
return null;
|
|
181
|
+
for (const resolveCandidate of [
|
|
182
|
+
() => normalizeSocketAddress(first),
|
|
183
|
+
() => parseHostWithUrl(first),
|
|
184
|
+
() => normalizeBracketedIpv6(first),
|
|
185
|
+
]) {
|
|
186
|
+
const candidate = resolveCandidate();
|
|
187
|
+
if (candidate !== null)
|
|
188
|
+
return candidate;
|
|
189
|
+
}
|
|
190
|
+
if (isIpV6Literal(first)) {
|
|
191
|
+
return normalizeHostname(first);
|
|
192
|
+
}
|
|
193
|
+
return normalizeHostname(stripPortIfPresent(first));
|
|
194
|
+
}
|
|
195
|
+
function takeFirstHostValue(value) {
|
|
196
|
+
// Faster than split(',') for large forwarded headers; preserves behavior.
|
|
197
|
+
const commaIndex = value.indexOf(',');
|
|
198
|
+
const first = commaIndex === -1 ? value : value.slice(0, commaIndex);
|
|
199
|
+
return first ? trimToNull(first) : null;
|
|
200
|
+
}
|
|
201
|
+
function stripIpv6Brackets(value) {
|
|
202
|
+
if (!value.startsWith('['))
|
|
203
|
+
return null;
|
|
204
|
+
const end = value.indexOf(']');
|
|
205
|
+
if (end === -1)
|
|
206
|
+
return null;
|
|
207
|
+
return value.slice(1, end);
|
|
208
|
+
}
|
|
209
|
+
function stripPortIfPresent(value) {
|
|
210
|
+
const colonIndex = value.indexOf(':');
|
|
211
|
+
if (colonIndex === -1)
|
|
212
|
+
return value;
|
|
213
|
+
return value.slice(0, colonIndex);
|
|
214
|
+
}
|
|
215
|
+
function isIpV6Literal(value) {
|
|
216
|
+
return isIP(value) === 6;
|
|
217
|
+
}
|
|
218
|
+
function normalizeSocketAddress(value) {
|
|
219
|
+
const socketAddress = SocketAddress.parse(value);
|
|
220
|
+
if (!socketAddress)
|
|
221
|
+
return null;
|
|
222
|
+
return normalizeHostname(socketAddress.address);
|
|
223
|
+
}
|
|
224
|
+
function normalizeBracketedIpv6(value) {
|
|
225
|
+
const ipv6 = stripIpv6Brackets(value);
|
|
226
|
+
if (!ipv6)
|
|
227
|
+
return null;
|
|
228
|
+
return normalizeHostname(ipv6);
|
|
229
|
+
}
|
|
230
|
+
function normalizeHostname(value) {
|
|
231
|
+
const trimmed = trimToNull(value)?.toLowerCase();
|
|
232
|
+
if (!trimmed)
|
|
233
|
+
return null;
|
|
234
|
+
if (isIP(trimmed))
|
|
235
|
+
return stripTrailingDots(trimmed);
|
|
236
|
+
const ascii = domainToASCII(trimmed);
|
|
237
|
+
return ascii ? stripTrailingDots(ascii) : null;
|
|
238
|
+
}
|
|
239
|
+
function parseHostWithUrl(value) {
|
|
240
|
+
const candidateUrl = `http://${value}`;
|
|
241
|
+
if (!URL.canParse(candidateUrl))
|
|
242
|
+
return null;
|
|
243
|
+
try {
|
|
244
|
+
const parsed = new URL(candidateUrl);
|
|
245
|
+
return normalizeHostname(parsed.hostname);
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
return null;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
function trimToNull(value) {
|
|
252
|
+
const trimmed = value.trim();
|
|
253
|
+
return trimmed ? trimmed : null;
|
|
254
|
+
}
|
|
255
|
+
function stripTrailingDots(value) {
|
|
256
|
+
// Keep loop (rather than regex) to preserve exact behavior and avoid hidden allocations.
|
|
257
|
+
let result = value;
|
|
258
|
+
while (result.endsWith('.'))
|
|
259
|
+
result = result.slice(0, -1);
|
|
260
|
+
return result;
|
|
261
|
+
}
|
|
262
|
+
function buildIpv4(parts) {
|
|
263
|
+
return parts.join('.');
|
|
264
|
+
}
|
|
265
|
+
function buildIpv6(parts) {
|
|
266
|
+
return parts.map(String).join(':');
|
|
267
|
+
}
|
|
268
|
+
const IPV6_ZERO = buildIpv6([0, 0, 0, 0, 0, 0, 0, 0]);
|
|
269
|
+
const IPV6_LOOPBACK = buildIpv6([0, 0, 0, 0, 0, 0, 0, 1]);
|
|
270
|
+
const IPV6_64_FF9B = buildIpv6(['64', 'ff9b', 0, 0, 0, 0, 0, 0]);
|
|
271
|
+
const IPV6_64_FF9B_1 = buildIpv6(['64', 'ff9b', 1, 0, 0, 0, 0, 0]);
|
|
272
|
+
const IPV6_2001 = buildIpv6(['2001', 0, 0, 0, 0, 0, 0, 0]);
|
|
273
|
+
const IPV6_2002 = buildIpv6(['2002', 0, 0, 0, 0, 0, 0, 0]);
|
|
274
|
+
const IPV6_FC00 = buildIpv6(['fc00', 0, 0, 0, 0, 0, 0, 0]);
|
|
275
|
+
const IPV6_FE80 = buildIpv6(['fe80', 0, 0, 0, 0, 0, 0, 0]);
|
|
276
|
+
const IPV6_FF00 = buildIpv6(['ff00', 0, 0, 0, 0, 0, 0, 0]);
|
|
277
|
+
const IPV6_MAPPED_PREFIX = '::ffff:';
|
|
278
|
+
const BLOCKED_SUBNETS = [
|
|
279
|
+
{ subnet: buildIpv4([0, 0, 0, 0]), prefix: 8, family: 'ipv4' },
|
|
280
|
+
{ subnet: buildIpv4([10, 0, 0, 0]), prefix: 8, family: 'ipv4' },
|
|
281
|
+
{ subnet: buildIpv4([100, 64, 0, 0]), prefix: 10, family: 'ipv4' },
|
|
282
|
+
{ subnet: buildIpv4([127, 0, 0, 0]), prefix: 8, family: 'ipv4' },
|
|
283
|
+
{ subnet: buildIpv4([169, 254, 0, 0]), prefix: 16, family: 'ipv4' },
|
|
284
|
+
{ subnet: buildIpv4([172, 16, 0, 0]), prefix: 12, family: 'ipv4' },
|
|
285
|
+
{ subnet: buildIpv4([192, 168, 0, 0]), prefix: 16, family: 'ipv4' },
|
|
286
|
+
{ subnet: buildIpv4([224, 0, 0, 0]), prefix: 4, family: 'ipv4' },
|
|
287
|
+
{ subnet: buildIpv4([240, 0, 0, 0]), prefix: 4, family: 'ipv4' },
|
|
288
|
+
{ subnet: IPV6_ZERO, prefix: 128, family: 'ipv6' },
|
|
289
|
+
{ subnet: IPV6_LOOPBACK, prefix: 128, family: 'ipv6' },
|
|
290
|
+
{ subnet: IPV6_64_FF9B, prefix: 96, family: 'ipv6' },
|
|
291
|
+
{ subnet: IPV6_64_FF9B_1, prefix: 48, family: 'ipv6' },
|
|
292
|
+
{ subnet: IPV6_2001, prefix: 32, family: 'ipv6' },
|
|
293
|
+
{ subnet: IPV6_2002, prefix: 16, family: 'ipv6' },
|
|
294
|
+
{ subnet: IPV6_FC00, prefix: 7, family: 'ipv6' },
|
|
295
|
+
{ subnet: IPV6_FE80, prefix: 10, family: 'ipv6' },
|
|
296
|
+
{ subnet: IPV6_FF00, prefix: 8, family: 'ipv6' },
|
|
297
|
+
{ subnet: '::', prefix: 96, family: 'ipv6' },
|
|
298
|
+
];
|
|
299
|
+
export function createDefaultBlockList() {
|
|
300
|
+
const list = new BlockList();
|
|
301
|
+
for (const entry of BLOCKED_SUBNETS) {
|
|
302
|
+
list.addSubnet(entry.subnet, entry.prefix, entry.family);
|
|
303
|
+
}
|
|
304
|
+
return list;
|
|
305
|
+
}
|
|
306
|
+
function extractMappedIpv4(ip) {
|
|
307
|
+
if (!ip.startsWith(IPV6_MAPPED_PREFIX))
|
|
308
|
+
return null;
|
|
309
|
+
const mapped = ip.slice(IPV6_MAPPED_PREFIX.length);
|
|
310
|
+
return isIP(mapped) === 4 ? mapped : null;
|
|
311
|
+
}
|
|
312
|
+
function stripIpv6ZoneId(ip) {
|
|
313
|
+
const zoneIndex = ip.indexOf('%');
|
|
314
|
+
if (zoneIndex <= 0)
|
|
315
|
+
return ip;
|
|
316
|
+
return ip.slice(0, zoneIndex);
|
|
317
|
+
}
|
|
318
|
+
export function normalizeIpForBlockList(input) {
|
|
319
|
+
const lowered = input.trim().toLowerCase();
|
|
320
|
+
if (!lowered)
|
|
321
|
+
return null;
|
|
322
|
+
const normalizedInput = stripIpv6ZoneId(lowered);
|
|
323
|
+
if (!normalizedInput)
|
|
324
|
+
return null;
|
|
325
|
+
const ipType = isIP(normalizedInput);
|
|
326
|
+
switch (ipType) {
|
|
327
|
+
case 4:
|
|
328
|
+
return { ip: normalizedInput, family: 'ipv4' };
|
|
329
|
+
case 6: {
|
|
330
|
+
const mapped = extractMappedIpv4(normalizedInput);
|
|
331
|
+
return mapped
|
|
332
|
+
? { ip: mapped, family: 'ipv4' }
|
|
333
|
+
: { ip: normalizedInput, family: 'ipv6' };
|
|
334
|
+
}
|
|
335
|
+
default:
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
function getPatternGroup(groups, key) {
|
|
340
|
+
const value = groups[key];
|
|
341
|
+
if (value === undefined)
|
|
342
|
+
return null;
|
|
343
|
+
if (value === '')
|
|
344
|
+
return null;
|
|
345
|
+
return value;
|
|
346
|
+
}
|
|
347
|
+
const GITHUB_BLOB_PATTERN = new URLPattern({
|
|
348
|
+
protocol: 'http{s}?',
|
|
349
|
+
hostname: '{:sub.}?github.com',
|
|
350
|
+
pathname: '/:owner/:repo/blob/:branch/:path+',
|
|
351
|
+
});
|
|
352
|
+
const GITHUB_GIST_PATTERN = new URLPattern({
|
|
353
|
+
protocol: 'http{s}?',
|
|
354
|
+
hostname: 'gist.github.com',
|
|
355
|
+
pathname: '/:user/:gistId',
|
|
356
|
+
});
|
|
357
|
+
const GITHUB_GIST_RAW_PATTERN = new URLPattern({
|
|
358
|
+
protocol: 'http{s}?',
|
|
359
|
+
hostname: 'gist.github.com',
|
|
360
|
+
pathname: '/:user/:gistId/raw/:filePath+',
|
|
361
|
+
});
|
|
362
|
+
const GITLAB_BLOB_PATTERNS = [
|
|
363
|
+
new URLPattern({
|
|
364
|
+
protocol: 'http{s}?',
|
|
365
|
+
hostname: 'gitlab.com',
|
|
366
|
+
pathname: '/:base+/-/blob/:branch/:path+',
|
|
367
|
+
}),
|
|
368
|
+
new URLPattern({
|
|
369
|
+
protocol: 'http{s}?',
|
|
370
|
+
hostname: '*:sub.gitlab.com',
|
|
371
|
+
pathname: '/:base+/-/blob/:branch/:path+',
|
|
372
|
+
}),
|
|
373
|
+
];
|
|
374
|
+
const BITBUCKET_SRC_PATTERN = new URLPattern({
|
|
375
|
+
protocol: 'http{s}?',
|
|
376
|
+
hostname: '{:sub.}?bitbucket.org',
|
|
377
|
+
pathname: '/:owner/:repo/src/:branch/:path+',
|
|
378
|
+
});
|
|
379
|
+
const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
|
|
380
|
+
const RAW_TEXT_EXTENSIONS = new Set([
|
|
381
|
+
'.md',
|
|
382
|
+
'.markdown',
|
|
383
|
+
'.txt',
|
|
384
|
+
'.json',
|
|
385
|
+
'.yaml',
|
|
386
|
+
'.yml',
|
|
387
|
+
'.toml',
|
|
388
|
+
'.xml',
|
|
389
|
+
'.csv',
|
|
390
|
+
'.rst',
|
|
391
|
+
'.adoc',
|
|
392
|
+
'.org',
|
|
393
|
+
]);
|
|
394
|
+
export class RawUrlTransformer {
|
|
395
|
+
logger;
|
|
396
|
+
constructor(logger) {
|
|
397
|
+
this.logger = logger;
|
|
398
|
+
}
|
|
399
|
+
transformToRawUrl(url) {
|
|
400
|
+
if (!url)
|
|
401
|
+
return { url, transformed: false };
|
|
402
|
+
if (this.isRawUrl(url))
|
|
403
|
+
return { url, transformed: false };
|
|
404
|
+
let base;
|
|
405
|
+
let hash;
|
|
406
|
+
let parsed;
|
|
407
|
+
try {
|
|
408
|
+
parsed = new URL(url);
|
|
409
|
+
base = parsed.origin + parsed.pathname;
|
|
410
|
+
({ hash } = parsed);
|
|
411
|
+
}
|
|
412
|
+
catch {
|
|
413
|
+
({ base, hash } = this.splitParams(url));
|
|
414
|
+
}
|
|
415
|
+
const match = this.tryTransformWithUrl(base, hash, parsed);
|
|
416
|
+
if (!match)
|
|
417
|
+
return { url, transformed: false };
|
|
418
|
+
this.logger.debug('URL transformed to raw content URL', {
|
|
419
|
+
platform: match.platform,
|
|
420
|
+
original: url.substring(0, 100),
|
|
421
|
+
transformed: match.url.substring(0, 100),
|
|
422
|
+
});
|
|
423
|
+
return { url: match.url, transformed: true, platform: match.platform };
|
|
424
|
+
}
|
|
425
|
+
isRawTextContentUrl(urlString) {
|
|
426
|
+
if (!urlString)
|
|
427
|
+
return false;
|
|
428
|
+
if (this.isRawUrl(urlString))
|
|
429
|
+
return true;
|
|
430
|
+
try {
|
|
431
|
+
const url = new URL(urlString);
|
|
432
|
+
const pathname = url.pathname.toLowerCase();
|
|
433
|
+
const lastDot = pathname.lastIndexOf('.');
|
|
434
|
+
if (lastDot === -1)
|
|
435
|
+
return false;
|
|
436
|
+
return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
|
|
437
|
+
}
|
|
438
|
+
catch {
|
|
439
|
+
const { base } = this.splitParams(urlString);
|
|
440
|
+
const lowerBase = base.toLowerCase();
|
|
441
|
+
const lastDot = lowerBase.lastIndexOf('.');
|
|
442
|
+
if (lastDot === -1)
|
|
443
|
+
return false;
|
|
444
|
+
return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
isRawUrl(url) {
|
|
448
|
+
const lower = url.toLowerCase();
|
|
449
|
+
return (lower.includes('raw.githubusercontent.com') ||
|
|
450
|
+
lower.includes('gist.githubusercontent.com') ||
|
|
451
|
+
lower.includes('/-/raw/') ||
|
|
452
|
+
BITBUCKET_RAW_RE.test(lower));
|
|
453
|
+
}
|
|
454
|
+
splitParams(urlString) {
|
|
455
|
+
const hashIndex = urlString.indexOf('#');
|
|
456
|
+
const queryIndex = urlString.indexOf('?');
|
|
457
|
+
const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
|
|
458
|
+
const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
|
|
459
|
+
return { base: urlString.slice(0, endIndex), hash };
|
|
460
|
+
}
|
|
461
|
+
tryTransformWithUrl(base, hash, preParsed) {
|
|
462
|
+
let parsed = preParsed ?? null;
|
|
463
|
+
if (!parsed) {
|
|
464
|
+
try {
|
|
465
|
+
parsed = new URL(base);
|
|
466
|
+
}
|
|
467
|
+
catch {
|
|
468
|
+
// Ignore invalid URLs
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
if (!parsed)
|
|
472
|
+
return null;
|
|
473
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
474
|
+
return null;
|
|
475
|
+
const gist = this.transformGithubGist(base, hash);
|
|
476
|
+
if (gist)
|
|
477
|
+
return gist;
|
|
478
|
+
const github = this.transformGithubBlob(base);
|
|
479
|
+
if (github)
|
|
480
|
+
return github;
|
|
481
|
+
const gitlab = this.transformGitLab(base, parsed.origin);
|
|
482
|
+
if (gitlab)
|
|
483
|
+
return gitlab;
|
|
484
|
+
const bitbucket = this.transformBitbucket(base, parsed.origin);
|
|
485
|
+
if (bitbucket)
|
|
486
|
+
return bitbucket;
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
transformGithubBlob(url) {
|
|
490
|
+
const match = GITHUB_BLOB_PATTERN.exec(url);
|
|
491
|
+
if (!match)
|
|
492
|
+
return null;
|
|
493
|
+
const groups = match.pathname.groups;
|
|
494
|
+
const owner = getPatternGroup(groups, 'owner');
|
|
495
|
+
const repo = getPatternGroup(groups, 'repo');
|
|
496
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
497
|
+
const path = getPatternGroup(groups, 'path');
|
|
498
|
+
if (!owner || !repo || !branch || !path)
|
|
499
|
+
return null;
|
|
500
|
+
return {
|
|
501
|
+
url: `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`,
|
|
502
|
+
platform: 'github',
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
transformGithubGist(url, hash) {
|
|
506
|
+
const rawMatch = GITHUB_GIST_RAW_PATTERN.exec(url);
|
|
507
|
+
if (rawMatch) {
|
|
508
|
+
const groups = rawMatch.pathname.groups;
|
|
509
|
+
const user = getPatternGroup(groups, 'user');
|
|
510
|
+
const gistId = getPatternGroup(groups, 'gistId');
|
|
511
|
+
const filePath = getPatternGroup(groups, 'filePath');
|
|
512
|
+
if (!user || !gistId)
|
|
513
|
+
return null;
|
|
514
|
+
const resolvedFilePath = filePath ? `/${filePath}` : '';
|
|
515
|
+
return {
|
|
516
|
+
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${resolvedFilePath}`,
|
|
517
|
+
platform: 'github-gist',
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
const match = GITHUB_GIST_PATTERN.exec(url);
|
|
521
|
+
if (!match)
|
|
522
|
+
return null;
|
|
523
|
+
const groups = match.pathname.groups;
|
|
524
|
+
const user = getPatternGroup(groups, 'user');
|
|
525
|
+
const gistId = getPatternGroup(groups, 'gistId');
|
|
526
|
+
if (!user || !gistId)
|
|
527
|
+
return null;
|
|
528
|
+
let filePath = '';
|
|
529
|
+
if (hash.startsWith('#file-')) {
|
|
530
|
+
const filename = hash.slice('#file-'.length).replace(/-/g, '.');
|
|
531
|
+
if (filename)
|
|
532
|
+
filePath = `/${filename}`;
|
|
533
|
+
}
|
|
534
|
+
return {
|
|
535
|
+
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
|
|
536
|
+
platform: 'github-gist',
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
transformGitLab(url, origin) {
|
|
540
|
+
for (const pattern of GITLAB_BLOB_PATTERNS) {
|
|
541
|
+
const match = pattern.exec(url);
|
|
542
|
+
if (!match)
|
|
543
|
+
continue;
|
|
544
|
+
const groups = match.pathname.groups;
|
|
545
|
+
const base = getPatternGroup(groups, 'base');
|
|
546
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
547
|
+
const path = getPatternGroup(groups, 'path');
|
|
548
|
+
if (!base || !branch || !path)
|
|
549
|
+
return null;
|
|
550
|
+
return {
|
|
551
|
+
url: `${origin}/${base}/-/raw/${branch}/${path}`,
|
|
552
|
+
platform: 'gitlab',
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
return null;
|
|
556
|
+
}
|
|
557
|
+
transformBitbucket(url, origin) {
|
|
558
|
+
const match = BITBUCKET_SRC_PATTERN.exec(url);
|
|
559
|
+
if (!match)
|
|
560
|
+
return null;
|
|
561
|
+
const groups = match.pathname.groups;
|
|
562
|
+
const owner = getPatternGroup(groups, 'owner');
|
|
563
|
+
const repo = getPatternGroup(groups, 'repo');
|
|
564
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
565
|
+
const path = getPatternGroup(groups, 'path');
|
|
566
|
+
if (!owner || !repo || !branch || !path)
|
|
567
|
+
return null;
|
|
568
|
+
return {
|
|
569
|
+
url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
|
|
570
|
+
platform: 'bitbucket',
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
export const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
|
|
575
|
+
function createValidationError(message) {
|
|
576
|
+
return createErrorWithCode(message, VALIDATION_ERROR_CODE);
|
|
577
|
+
}
|
|
578
|
+
export const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
|
|
579
|
+
const CLOUD_METADATA_HOSTS = new Set([
|
|
580
|
+
'169.254.169.254', // AWS / GCP / Azure
|
|
581
|
+
'metadata.google.internal', // GCP
|
|
582
|
+
'100.100.100.200', // Alibaba Cloud
|
|
583
|
+
'fd00:ec2::254', // AWS IPv6
|
|
584
|
+
]);
|
|
585
|
+
function isCloudMetadataHost(hostname) {
|
|
586
|
+
const lowered = hostname.toLowerCase();
|
|
587
|
+
if (CLOUD_METADATA_HOSTS.has(lowered))
|
|
588
|
+
return true;
|
|
589
|
+
const normalized = normalizeIpForBlockList(lowered);
|
|
590
|
+
return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
|
|
591
|
+
}
|
|
592
|
+
function isLocalFetchAllowed() {
|
|
593
|
+
return process.env['ALLOW_LOCAL_FETCH'] === 'true';
|
|
594
|
+
}
|
|
595
|
+
export class IpBlocker {
|
|
596
|
+
security;
|
|
597
|
+
blockList = createDefaultBlockList();
|
|
598
|
+
constructor(security) {
|
|
599
|
+
this.security = security;
|
|
600
|
+
}
|
|
601
|
+
isBlockedIp(candidate) {
|
|
602
|
+
const normalized = candidate.trim().toLowerCase();
|
|
603
|
+
if (isCloudMetadataHost(normalized))
|
|
604
|
+
return true;
|
|
605
|
+
if (isLocalFetchAllowed())
|
|
606
|
+
return false;
|
|
607
|
+
if (!normalized)
|
|
608
|
+
return false;
|
|
609
|
+
if (this.security.blockedHosts.has(normalized))
|
|
610
|
+
return true;
|
|
611
|
+
const normalizedIp = normalizeIpForBlockList(normalized);
|
|
612
|
+
return normalizedIp
|
|
613
|
+
? this.blockList.check(normalizedIp.ip, normalizedIp.family)
|
|
614
|
+
: false;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
export class UrlNormalizer {
|
|
618
|
+
constants;
|
|
619
|
+
security;
|
|
620
|
+
ipBlocker;
|
|
621
|
+
blockedHostSuffixes;
|
|
622
|
+
constructor(constants, security, ipBlocker, blockedHostSuffixes) {
|
|
623
|
+
this.constants = constants;
|
|
624
|
+
this.security = security;
|
|
625
|
+
this.ipBlocker = ipBlocker;
|
|
626
|
+
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
627
|
+
}
|
|
628
|
+
normalize(urlString) {
|
|
629
|
+
const trimmedUrl = this.requireTrimmedUrl(urlString);
|
|
630
|
+
if (trimmedUrl.length > this.constants.maxUrlLength) {
|
|
631
|
+
throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
|
|
632
|
+
}
|
|
633
|
+
let url;
|
|
634
|
+
try {
|
|
635
|
+
url = new URL(trimmedUrl);
|
|
636
|
+
}
|
|
637
|
+
catch {
|
|
638
|
+
throw createValidationError('Invalid URL format');
|
|
639
|
+
}
|
|
640
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
641
|
+
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
642
|
+
}
|
|
643
|
+
if (url.username || url.password) {
|
|
644
|
+
throw createValidationError('URLs with embedded credentials are not allowed');
|
|
645
|
+
}
|
|
646
|
+
const hostname = this.normalizeHostname(url);
|
|
647
|
+
this.assertHostnameAllowed(hostname);
|
|
648
|
+
url.hostname = hostname;
|
|
649
|
+
return { normalizedUrl: url.href, hostname };
|
|
650
|
+
}
|
|
651
|
+
validateAndNormalize(urlString) {
|
|
652
|
+
return this.normalize(urlString).normalizedUrl;
|
|
653
|
+
}
|
|
654
|
+
requireTrimmedUrl(urlString) {
|
|
655
|
+
if (!urlString || typeof urlString !== 'string') {
|
|
656
|
+
throw createValidationError('URL is required');
|
|
657
|
+
}
|
|
658
|
+
const trimmed = urlString.trim();
|
|
659
|
+
if (!trimmed)
|
|
660
|
+
throw createValidationError('URL cannot be empty');
|
|
661
|
+
return trimmed;
|
|
662
|
+
}
|
|
663
|
+
normalizeHostname(url) {
|
|
664
|
+
const hostname = url.hostname.toLowerCase().replace(/\.+$/, '');
|
|
665
|
+
if (!hostname) {
|
|
666
|
+
throw createValidationError('URL must have a valid hostname');
|
|
667
|
+
}
|
|
668
|
+
return hostname;
|
|
669
|
+
}
|
|
670
|
+
assertHostnameAllowed(hostname) {
|
|
671
|
+
if (isCloudMetadataHost(hostname)) {
|
|
672
|
+
throw createValidationError(`Blocked host: ${hostname}. Cloud metadata endpoints are not allowed`);
|
|
673
|
+
}
|
|
674
|
+
if (!isLocalFetchAllowed()) {
|
|
675
|
+
if (this.security.blockedHosts.has(hostname)) {
|
|
676
|
+
throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
677
|
+
}
|
|
678
|
+
if (this.ipBlocker.isBlockedIp(hostname)) {
|
|
679
|
+
throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
if (this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix))) {
|
|
683
|
+
throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|