@j0hanz/fetch-url-mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +570 -0
- package/dist/AGENTS.md +115 -0
- package/dist/assets/logo.svg +24837 -0
- package/dist/cache.d.ts +47 -0
- package/dist/cache.js +316 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +48 -0
- package/dist/config.d.ts +142 -0
- package/dist/config.js +480 -0
- package/dist/crypto.d.ts +3 -0
- package/dist/crypto.js +49 -0
- package/dist/dom-noise-removal.d.ts +1 -0
- package/dist/dom-noise-removal.js +488 -0
- package/dist/errors.d.ts +10 -0
- package/dist/errors.js +61 -0
- package/dist/fetch.d.ts +42 -0
- package/dist/fetch.js +1544 -0
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.js +77 -0
- package/dist/http-native.d.ts +5 -0
- package/dist/http-native.js +1313 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +91 -0
- package/dist/instructions.md +57 -0
- package/dist/ip-blocklist.d.ts +8 -0
- package/dist/ip-blocklist.js +74 -0
- package/dist/json.d.ts +1 -0
- package/dist/json.js +34 -0
- package/dist/language-detection.d.ts +2 -0
- package/dist/language-detection.js +364 -0
- package/dist/markdown-cleanup.d.ts +6 -0
- package/dist/markdown-cleanup.js +474 -0
- package/dist/mcp-validator.d.ts +15 -0
- package/dist/mcp-validator.js +44 -0
- package/dist/mcp.d.ts +4 -0
- package/dist/mcp.js +421 -0
- package/dist/observability.d.ts +21 -0
- package/dist/observability.js +211 -0
- package/dist/prompts.d.ts +7 -0
- package/dist/prompts.js +28 -0
- package/dist/resources.d.ts +8 -0
- package/dist/resources.js +216 -0
- package/dist/server-tuning.d.ts +13 -0
- package/dist/server-tuning.js +47 -0
- package/dist/server.d.ts +4 -0
- package/dist/server.js +174 -0
- package/dist/session.d.ts +39 -0
- package/dist/session.js +218 -0
- package/dist/tasks.d.ts +63 -0
- package/dist/tasks.js +327 -0
- package/dist/timer-utils.d.ts +5 -0
- package/dist/timer-utils.js +20 -0
- package/dist/tools.d.ts +135 -0
- package/dist/tools.js +812 -0
- package/dist/transform-types.d.ts +126 -0
- package/dist/transform-types.js +5 -0
- package/dist/transform.d.ts +36 -0
- package/dist/transform.js +2341 -0
- package/dist/type-guards.d.ts +14 -0
- package/dist/type-guards.js +13 -0
- package/dist/workers/transform-child.d.ts +1 -0
- package/dist/workers/transform-child.js +136 -0
- package/dist/workers/transform-worker.d.ts +1 -0
- package/dist/workers/transform-worker.js +128 -0
- package/package.json +91 -0
package/dist/config.js
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { findPackageJSON } from 'node:module';
|
|
3
|
+
import { isIP } from 'node:net';
|
|
4
|
+
import process from 'node:process';
|
|
5
|
+
import { domainToASCII } from 'node:url';
|
|
6
|
+
function readServerVersion(moduleUrl) {
|
|
7
|
+
const packageJsonPath = findPackageJSON(moduleUrl);
|
|
8
|
+
if (!packageJsonPath)
|
|
9
|
+
throw new Error('package.json not found');
|
|
10
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
if (typeof packageJson.version !== 'string') {
|
|
12
|
+
throw new Error('package.json version is missing');
|
|
13
|
+
}
|
|
14
|
+
return packageJson.version;
|
|
15
|
+
}
|
|
16
|
+
export const serverVersion = readServerVersion(import.meta.url);
|
|
17
|
+
const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
|
|
18
|
+
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
19
|
+
const DEFAULT_HEADING_KEYWORDS = [
|
|
20
|
+
'overview',
|
|
21
|
+
'introduction',
|
|
22
|
+
'summary',
|
|
23
|
+
'conclusion',
|
|
24
|
+
'prerequisites',
|
|
25
|
+
'requirements',
|
|
26
|
+
'installation',
|
|
27
|
+
'configuration',
|
|
28
|
+
'usage',
|
|
29
|
+
'features',
|
|
30
|
+
'limitations',
|
|
31
|
+
'troubleshooting',
|
|
32
|
+
'faq',
|
|
33
|
+
'resources',
|
|
34
|
+
'references',
|
|
35
|
+
'changelog',
|
|
36
|
+
'license',
|
|
37
|
+
'acknowledgments',
|
|
38
|
+
'appendix',
|
|
39
|
+
];
|
|
40
|
+
class ConfigError extends Error {
|
|
41
|
+
name = 'ConfigError';
|
|
42
|
+
}
|
|
43
|
+
function isMissingEnvFileError(error) {
|
|
44
|
+
if (!error || typeof error !== 'object')
|
|
45
|
+
return false;
|
|
46
|
+
const { code } = error;
|
|
47
|
+
return code === 'ENOENT' || code === 'ERR_ENV_FILE_NOT_FOUND';
|
|
48
|
+
}
|
|
49
|
+
function loadEnvFileIfAvailable() {
|
|
50
|
+
if (typeof process.loadEnvFile !== 'function')
|
|
51
|
+
return;
|
|
52
|
+
try {
|
|
53
|
+
process.loadEnvFile();
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
if (isMissingEnvFileError(error))
|
|
57
|
+
return;
|
|
58
|
+
throw error;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
loadEnvFileIfAvailable();
|
|
62
|
+
const { env } = process;
|
|
63
|
+
function buildIpv4(parts) {
|
|
64
|
+
return parts.join('.');
|
|
65
|
+
}
|
|
66
|
+
function stripTrailingDots(value) {
|
|
67
|
+
let result = value;
|
|
68
|
+
while (result.endsWith('.'))
|
|
69
|
+
result = result.slice(0, -1);
|
|
70
|
+
return result;
|
|
71
|
+
}
|
|
72
|
+
function formatHostForUrl(hostname) {
|
|
73
|
+
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
74
|
+
return `[${hostname}]`;
|
|
75
|
+
return hostname;
|
|
76
|
+
}
|
|
77
|
+
function normalizeHostname(value) {
|
|
78
|
+
const trimmed = value.trim();
|
|
79
|
+
if (!trimmed)
|
|
80
|
+
return null;
|
|
81
|
+
const lowered = trimmed.toLowerCase();
|
|
82
|
+
const ipType = isIP(lowered);
|
|
83
|
+
if (ipType)
|
|
84
|
+
return stripTrailingDots(lowered);
|
|
85
|
+
const ascii = domainToASCII(lowered);
|
|
86
|
+
return ascii ? stripTrailingDots(ascii) : null;
|
|
87
|
+
}
|
|
88
|
+
function normalizeHostValue(value) {
|
|
89
|
+
const raw = value.trim();
|
|
90
|
+
if (!raw)
|
|
91
|
+
return null;
|
|
92
|
+
// Full URL
|
|
93
|
+
if (raw.includes('://')) {
|
|
94
|
+
if (!URL.canParse(raw))
|
|
95
|
+
return null;
|
|
96
|
+
return normalizeHostname(new URL(raw).hostname);
|
|
97
|
+
}
|
|
98
|
+
// host[:port]
|
|
99
|
+
const candidateUrl = `http://${raw}`;
|
|
100
|
+
if (URL.canParse(candidateUrl)) {
|
|
101
|
+
return normalizeHostname(new URL(candidateUrl).hostname);
|
|
102
|
+
}
|
|
103
|
+
const lowered = raw.toLowerCase();
|
|
104
|
+
// [::1]:port
|
|
105
|
+
if (lowered.startsWith('[')) {
|
|
106
|
+
const end = lowered.indexOf(']');
|
|
107
|
+
if (end === -1)
|
|
108
|
+
return null;
|
|
109
|
+
return normalizeHostname(lowered.slice(1, end));
|
|
110
|
+
}
|
|
111
|
+
// Bare IPv6
|
|
112
|
+
if (isIP(lowered) === 6)
|
|
113
|
+
return stripTrailingDots(lowered);
|
|
114
|
+
// Split host:port (single colon only)
|
|
115
|
+
const firstColon = lowered.indexOf(':');
|
|
116
|
+
if (firstColon === -1)
|
|
117
|
+
return normalizeHostname(lowered);
|
|
118
|
+
if (lowered.includes(':', firstColon + 1))
|
|
119
|
+
return null;
|
|
120
|
+
const host = lowered.slice(0, firstColon);
|
|
121
|
+
return host ? normalizeHostname(host) : null;
|
|
122
|
+
}
|
|
123
|
+
function parseIntegerValue(envValue, min, max) {
|
|
124
|
+
if (!envValue)
|
|
125
|
+
return null;
|
|
126
|
+
const parsed = Number.parseInt(envValue, 10);
|
|
127
|
+
if (Number.isNaN(parsed))
|
|
128
|
+
return null;
|
|
129
|
+
if (min !== undefined && parsed < min)
|
|
130
|
+
return null;
|
|
131
|
+
if (max !== undefined && parsed > max)
|
|
132
|
+
return null;
|
|
133
|
+
return parsed;
|
|
134
|
+
}
|
|
135
|
+
function parseOptionalInteger(envValue, min, max) {
|
|
136
|
+
const parsed = parseIntegerValue(envValue, min, max);
|
|
137
|
+
return parsed ?? undefined;
|
|
138
|
+
}
|
|
139
|
+
function parseInteger(envValue, defaultValue, min, max) {
|
|
140
|
+
return parseIntegerValue(envValue, min, max) ?? defaultValue;
|
|
141
|
+
}
|
|
142
|
+
function parseBoolean(envValue, defaultValue) {
|
|
143
|
+
if (!envValue)
|
|
144
|
+
return defaultValue;
|
|
145
|
+
return envValue.trim().toLowerCase() !== 'false';
|
|
146
|
+
}
|
|
147
|
+
function parseList(envValue) {
|
|
148
|
+
if (!envValue)
|
|
149
|
+
return [];
|
|
150
|
+
return envValue
|
|
151
|
+
.split(/[\s,]+/)
|
|
152
|
+
.map((entry) => entry.trim())
|
|
153
|
+
.filter((entry) => entry.length > 0);
|
|
154
|
+
}
|
|
155
|
+
function parseListOrDefault(envValue, defaultValue) {
|
|
156
|
+
const parsed = parseList(envValue);
|
|
157
|
+
return parsed.length > 0 ? parsed : [...defaultValue];
|
|
158
|
+
}
|
|
159
|
+
function normalizeLocale(value) {
|
|
160
|
+
if (!value)
|
|
161
|
+
return undefined;
|
|
162
|
+
const trimmed = value.trim();
|
|
163
|
+
if (!trimmed)
|
|
164
|
+
return undefined;
|
|
165
|
+
const lowered = trimmed.toLowerCase();
|
|
166
|
+
if (lowered === 'system' || lowered === 'default')
|
|
167
|
+
return undefined;
|
|
168
|
+
return trimmed;
|
|
169
|
+
}
|
|
170
|
+
function isLogLevel(value) {
|
|
171
|
+
return ALLOWED_LOG_LEVELS.has(value);
|
|
172
|
+
}
|
|
173
|
+
function parseLogLevel(envValue) {
|
|
174
|
+
if (!envValue)
|
|
175
|
+
return 'info';
|
|
176
|
+
const level = envValue.toLowerCase();
|
|
177
|
+
return isLogLevel(level) ? level : 'info';
|
|
178
|
+
}
|
|
179
|
+
function parseTransformWorkerMode(envValue) {
|
|
180
|
+
if (!envValue)
|
|
181
|
+
return 'threads';
|
|
182
|
+
const normalized = envValue.trim().toLowerCase();
|
|
183
|
+
if (normalized === 'process' || normalized === 'fork')
|
|
184
|
+
return 'process';
|
|
185
|
+
return 'threads';
|
|
186
|
+
}
|
|
187
|
+
function parsePort(envValue) {
|
|
188
|
+
if (envValue?.trim() === '0')
|
|
189
|
+
return 0;
|
|
190
|
+
return parseInteger(envValue, 3000, 1024, 65535);
|
|
191
|
+
}
|
|
192
|
+
function parseUrlEnv(value, name) {
|
|
193
|
+
if (!value)
|
|
194
|
+
return undefined;
|
|
195
|
+
if (!URL.canParse(value)) {
|
|
196
|
+
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
197
|
+
}
|
|
198
|
+
return new URL(value);
|
|
199
|
+
}
|
|
200
|
+
function readUrlEnv(name) {
|
|
201
|
+
return parseUrlEnv(env[name], name);
|
|
202
|
+
}
|
|
203
|
+
function parseAllowedHosts(envValue) {
|
|
204
|
+
const hosts = new Set();
|
|
205
|
+
for (const entry of parseList(envValue)) {
|
|
206
|
+
const normalized = normalizeHostValue(entry);
|
|
207
|
+
if (normalized)
|
|
208
|
+
hosts.add(normalized);
|
|
209
|
+
}
|
|
210
|
+
return hosts;
|
|
211
|
+
}
|
|
212
|
+
function readOptionalFilePath(value) {
|
|
213
|
+
if (!value)
|
|
214
|
+
return undefined;
|
|
215
|
+
const trimmed = value.trim();
|
|
216
|
+
return trimmed.length > 0 ? trimmed : undefined;
|
|
217
|
+
}
|
|
218
|
+
const MAX_HTML_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
219
|
+
const MAX_INLINE_CONTENT_CHARS = parseInteger(env.MAX_INLINE_CONTENT_CHARS, 0, 0, MAX_HTML_BYTES);
|
|
220
|
+
const DEFAULT_SESSION_TTL_MS = 30 * 60 * 1000;
|
|
221
|
+
const DEFAULT_SESSION_INIT_TIMEOUT_MS = 10000;
|
|
222
|
+
const DEFAULT_MAX_SESSIONS = 200;
|
|
223
|
+
const DEFAULT_USER_AGENT = `fetch-url-mcp/${serverVersion}`;
|
|
224
|
+
const DEFAULT_TOOL_TIMEOUT_PADDING_MS = 5000;
|
|
225
|
+
const DEFAULT_TRANSFORM_TIMEOUT_MS = 30000;
|
|
226
|
+
const DEFAULT_FETCH_TIMEOUT_MS = parseInteger(env.FETCH_TIMEOUT_MS, 15000, 1000, 60000);
|
|
227
|
+
const DEFAULT_TOOL_TIMEOUT_MS = DEFAULT_FETCH_TIMEOUT_MS +
|
|
228
|
+
DEFAULT_TRANSFORM_TIMEOUT_MS +
|
|
229
|
+
DEFAULT_TOOL_TIMEOUT_PADDING_MS;
|
|
230
|
+
const DEFAULT_TASKS_MAX_TOTAL = parseInteger(env.TASKS_MAX_TOTAL, 5000, 1);
|
|
231
|
+
const DEFAULT_TASKS_MAX_PER_OWNER = parseInteger(env.TASKS_MAX_PER_OWNER, 1000, 1);
|
|
232
|
+
const RESOLVED_TASKS_MAX_PER_OWNER = Math.min(DEFAULT_TASKS_MAX_PER_OWNER, DEFAULT_TASKS_MAX_TOTAL);
|
|
233
|
+
function resolveWorkerResourceLimits() {
|
|
234
|
+
const limits = {};
|
|
235
|
+
let hasAny = false;
|
|
236
|
+
const maxOldGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_OLD_GENERATION_MB, 1);
|
|
237
|
+
const maxYoungGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB, 1);
|
|
238
|
+
const codeRangeSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_CODE_RANGE_MB, 1);
|
|
239
|
+
const stackSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_STACK_MB, 1);
|
|
240
|
+
if (maxOldGenerationSizeMb !== undefined) {
|
|
241
|
+
limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
|
|
242
|
+
hasAny = true;
|
|
243
|
+
}
|
|
244
|
+
if (maxYoungGenerationSizeMb !== undefined) {
|
|
245
|
+
limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
|
|
246
|
+
hasAny = true;
|
|
247
|
+
}
|
|
248
|
+
if (codeRangeSizeMb !== undefined) {
|
|
249
|
+
limits.codeRangeSizeMb = codeRangeSizeMb;
|
|
250
|
+
hasAny = true;
|
|
251
|
+
}
|
|
252
|
+
if (stackSizeMb !== undefined) {
|
|
253
|
+
limits.stackSizeMb = stackSizeMb;
|
|
254
|
+
hasAny = true;
|
|
255
|
+
}
|
|
256
|
+
return hasAny ? limits : undefined;
|
|
257
|
+
}
|
|
258
|
+
function readOAuthUrls(baseUrl) {
|
|
259
|
+
const issuerUrl = readUrlEnv('OAUTH_ISSUER_URL');
|
|
260
|
+
const authorizationUrl = readUrlEnv('OAUTH_AUTHORIZATION_URL');
|
|
261
|
+
const tokenUrl = readUrlEnv('OAUTH_TOKEN_URL');
|
|
262
|
+
const revocationUrl = readUrlEnv('OAUTH_REVOCATION_URL');
|
|
263
|
+
const registrationUrl = readUrlEnv('OAUTH_REGISTRATION_URL');
|
|
264
|
+
const introspectionUrl = readUrlEnv('OAUTH_INTROSPECTION_URL');
|
|
265
|
+
const resourceUrl = new URL('/mcp', baseUrl);
|
|
266
|
+
return {
|
|
267
|
+
issuerUrl,
|
|
268
|
+
authorizationUrl,
|
|
269
|
+
tokenUrl,
|
|
270
|
+
revocationUrl,
|
|
271
|
+
registrationUrl,
|
|
272
|
+
introspectionUrl,
|
|
273
|
+
resourceUrl,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
function resolveAuthMode(urls) {
|
|
277
|
+
const oauthConfigured = [
|
|
278
|
+
urls.issuerUrl,
|
|
279
|
+
urls.authorizationUrl,
|
|
280
|
+
urls.tokenUrl,
|
|
281
|
+
urls.introspectionUrl,
|
|
282
|
+
].some((value) => value !== undefined);
|
|
283
|
+
return oauthConfigured ? 'oauth' : 'static';
|
|
284
|
+
}
|
|
285
|
+
function collectStaticTokens() {
|
|
286
|
+
const staticTokens = new Set(parseList(env.ACCESS_TOKENS));
|
|
287
|
+
if (env.API_KEY)
|
|
288
|
+
staticTokens.add(env.API_KEY);
|
|
289
|
+
return [...staticTokens];
|
|
290
|
+
}
|
|
291
|
+
function buildAuthConfig(baseUrl) {
|
|
292
|
+
const urls = readOAuthUrls(baseUrl);
|
|
293
|
+
const mode = resolveAuthMode(urls);
|
|
294
|
+
return {
|
|
295
|
+
mode,
|
|
296
|
+
...urls,
|
|
297
|
+
requiredScopes: parseList(env.OAUTH_REQUIRED_SCOPES),
|
|
298
|
+
clientId: env.OAUTH_CLIENT_ID,
|
|
299
|
+
clientSecret: env.OAUTH_CLIENT_SECRET,
|
|
300
|
+
introspectionTimeoutMs: 5000,
|
|
301
|
+
staticTokens: collectStaticTokens(),
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
function buildHttpsConfig() {
|
|
305
|
+
const keyFile = readOptionalFilePath(env.SERVER_TLS_KEY_FILE);
|
|
306
|
+
const certFile = readOptionalFilePath(env.SERVER_TLS_CERT_FILE);
|
|
307
|
+
const caFile = readOptionalFilePath(env.SERVER_TLS_CA_FILE);
|
|
308
|
+
if ((keyFile && !certFile) || (!keyFile && certFile)) {
|
|
309
|
+
throw new ConfigError('Both SERVER_TLS_KEY_FILE and SERVER_TLS_CERT_FILE must be set together');
|
|
310
|
+
}
|
|
311
|
+
return {
|
|
312
|
+
enabled: Boolean(keyFile && certFile),
|
|
313
|
+
keyFile,
|
|
314
|
+
certFile,
|
|
315
|
+
caFile,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
const LOOPBACK_V4 = buildIpv4([127, 0, 0, 1]);
|
|
319
|
+
const ANY_V4 = buildIpv4([0, 0, 0, 0]);
|
|
320
|
+
const METADATA_V4_AWS = buildIpv4([169, 254, 169, 254]);
|
|
321
|
+
const METADATA_V4_AZURE = buildIpv4([100, 100, 100, 200]);
|
|
322
|
+
const BLOCKED_HOSTS = new Set([
|
|
323
|
+
'localhost',
|
|
324
|
+
LOOPBACK_V4,
|
|
325
|
+
ANY_V4,
|
|
326
|
+
'::1',
|
|
327
|
+
METADATA_V4_AWS,
|
|
328
|
+
'metadata.google.internal',
|
|
329
|
+
'metadata.azure.com',
|
|
330
|
+
METADATA_V4_AZURE,
|
|
331
|
+
'instance-data',
|
|
332
|
+
]);
|
|
333
|
+
const BLOCKED_IP_PATTERNS = [
|
|
334
|
+
/^10\./,
|
|
335
|
+
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
336
|
+
/^192\.168\./,
|
|
337
|
+
/^127\./,
|
|
338
|
+
/^0\./,
|
|
339
|
+
/^169\.254\./,
|
|
340
|
+
/^100\.64\./,
|
|
341
|
+
/^fc00:/i,
|
|
342
|
+
/^fd00:/i,
|
|
343
|
+
/^fe80:/i,
|
|
344
|
+
/^::ffff:127\./,
|
|
345
|
+
/^::ffff:10\./,
|
|
346
|
+
/^::ffff:172\.(1[6-9]|2\d|3[01])\./,
|
|
347
|
+
/^::ffff:192\.168\./,
|
|
348
|
+
/^::ffff:169\.254\./,
|
|
349
|
+
];
|
|
350
|
+
const BLOCKED_IP_PATTERN = /^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.|169\.254\.|100\.64\.|fc00:|fd00:|fe80:)/i;
|
|
351
|
+
const BLOCKED_IPV4_MAPPED_PATTERN = /^::ffff:(?:127\.|10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/i;
|
|
352
|
+
const host = (env.HOST ?? LOOPBACK_V4).trim();
|
|
353
|
+
const port = parsePort(env.PORT);
|
|
354
|
+
const httpsConfig = buildHttpsConfig();
|
|
355
|
+
const maxConnections = parseInteger(env.SERVER_MAX_CONNECTIONS, 0, 0);
|
|
356
|
+
const headersTimeoutMs = parseOptionalInteger(env.SERVER_HEADERS_TIMEOUT_MS, 1);
|
|
357
|
+
const requestTimeoutMs = parseOptionalInteger(env.SERVER_REQUEST_TIMEOUT_MS, 0);
|
|
358
|
+
const keepAliveTimeoutMs = parseOptionalInteger(env.SERVER_KEEP_ALIVE_TIMEOUT_MS, 1);
|
|
359
|
+
const keepAliveTimeoutBufferMs = parseOptionalInteger(env.SERVER_KEEP_ALIVE_TIMEOUT_BUFFER_MS, 0);
|
|
360
|
+
const maxHeadersCount = parseOptionalInteger(env.SERVER_MAX_HEADERS_COUNT, 1);
|
|
361
|
+
const blockPrivateConnections = parseBoolean(env.SERVER_BLOCK_PRIVATE_CONNECTIONS, false);
|
|
362
|
+
const allowRemote = parseBoolean(env.ALLOW_REMOTE, false);
|
|
363
|
+
const baseUrl = new URL(`${httpsConfig.enabled ? 'https' : 'http'}://${formatHostForUrl(host)}:${port}`);
|
|
364
|
+
const runtimeState = {
|
|
365
|
+
httpMode: false,
|
|
366
|
+
};
|
|
367
|
+
export const config = {
|
|
368
|
+
server: {
|
|
369
|
+
name: 'fetch-url-mcp',
|
|
370
|
+
version: serverVersion,
|
|
371
|
+
port,
|
|
372
|
+
host,
|
|
373
|
+
https: httpsConfig,
|
|
374
|
+
sessionTtlMs: DEFAULT_SESSION_TTL_MS,
|
|
375
|
+
sessionInitTimeoutMs: DEFAULT_SESSION_INIT_TIMEOUT_MS,
|
|
376
|
+
maxSessions: DEFAULT_MAX_SESSIONS,
|
|
377
|
+
http: {
|
|
378
|
+
headersTimeoutMs,
|
|
379
|
+
requestTimeoutMs,
|
|
380
|
+
keepAliveTimeoutMs,
|
|
381
|
+
keepAliveTimeoutBufferMs,
|
|
382
|
+
maxHeadersCount,
|
|
383
|
+
maxConnections,
|
|
384
|
+
blockPrivateConnections,
|
|
385
|
+
shutdownCloseIdleConnections: true,
|
|
386
|
+
shutdownCloseAllConnections: false,
|
|
387
|
+
},
|
|
388
|
+
},
|
|
389
|
+
fetcher: {
|
|
390
|
+
timeout: DEFAULT_FETCH_TIMEOUT_MS,
|
|
391
|
+
maxRedirects: 5,
|
|
392
|
+
userAgent: env.USER_AGENT ?? DEFAULT_USER_AGENT,
|
|
393
|
+
maxContentLength: MAX_HTML_BYTES,
|
|
394
|
+
},
|
|
395
|
+
transform: {
|
|
396
|
+
timeoutMs: DEFAULT_TRANSFORM_TIMEOUT_MS,
|
|
397
|
+
stageWarnRatio: 0.5,
|
|
398
|
+
metadataFormat: 'markdown',
|
|
399
|
+
maxWorkerScale: 4,
|
|
400
|
+
workerMode: parseTransformWorkerMode(env.TRANSFORM_WORKER_MODE),
|
|
401
|
+
workerResourceLimits: resolveWorkerResourceLimits(),
|
|
402
|
+
},
|
|
403
|
+
tools: {
|
|
404
|
+
enabled: ['fetch-url'],
|
|
405
|
+
timeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
|
|
406
|
+
},
|
|
407
|
+
tasks: {
|
|
408
|
+
maxTotal: DEFAULT_TASKS_MAX_TOTAL,
|
|
409
|
+
maxPerOwner: RESOLVED_TASKS_MAX_PER_OWNER,
|
|
410
|
+
},
|
|
411
|
+
cache: {
|
|
412
|
+
enabled: parseBoolean(env.CACHE_ENABLED, true),
|
|
413
|
+
ttl: 86400,
|
|
414
|
+
maxKeys: 100,
|
|
415
|
+
maxSizeBytes: 50 * 1024 * 1024, // 50MB
|
|
416
|
+
},
|
|
417
|
+
extraction: {
|
|
418
|
+
maxBlockLength: 5000,
|
|
419
|
+
minParagraphLength: 10,
|
|
420
|
+
},
|
|
421
|
+
noiseRemoval: {
|
|
422
|
+
extraTokens: parseList(env.FETCH_URL_MCP_EXTRA_NOISE_TOKENS),
|
|
423
|
+
extraSelectors: parseList(env.FETCH_URL_MCP_EXTRA_NOISE_SELECTORS),
|
|
424
|
+
enabledCategories: [
|
|
425
|
+
'cookie-banners',
|
|
426
|
+
'newsletters',
|
|
427
|
+
'social-share',
|
|
428
|
+
'nav-footer',
|
|
429
|
+
],
|
|
430
|
+
debug: false,
|
|
431
|
+
aggressiveMode: false,
|
|
432
|
+
preserveSvgCanvas: false,
|
|
433
|
+
weights: {
|
|
434
|
+
hidden: 50,
|
|
435
|
+
structural: 50,
|
|
436
|
+
promo: 35,
|
|
437
|
+
stickyFixed: 30,
|
|
438
|
+
threshold: 50,
|
|
439
|
+
},
|
|
440
|
+
},
|
|
441
|
+
markdownCleanup: {
|
|
442
|
+
promoteOrphanHeadings: true,
|
|
443
|
+
removeSkipLinks: true,
|
|
444
|
+
removeTocBlocks: true,
|
|
445
|
+
removeTypeDocComments: true,
|
|
446
|
+
headingKeywords: parseListOrDefault(env.MARKDOWN_HEADING_KEYWORDS, DEFAULT_HEADING_KEYWORDS),
|
|
447
|
+
},
|
|
448
|
+
i18n: {
|
|
449
|
+
locale: normalizeLocale(env.FETCH_URL_MCP_LOCALE),
|
|
450
|
+
},
|
|
451
|
+
logging: {
|
|
452
|
+
level: parseLogLevel(env.LOG_LEVEL),
|
|
453
|
+
format: env.LOG_FORMAT?.toLowerCase() === 'json' ? 'json' : 'text',
|
|
454
|
+
},
|
|
455
|
+
constants: {
|
|
456
|
+
maxHtmlSize: MAX_HTML_BYTES,
|
|
457
|
+
maxUrlLength: 2048,
|
|
458
|
+
maxInlineContentChars: MAX_INLINE_CONTENT_CHARS,
|
|
459
|
+
},
|
|
460
|
+
security: {
|
|
461
|
+
blockedHosts: BLOCKED_HOSTS,
|
|
462
|
+
blockedIpPatterns: BLOCKED_IP_PATTERNS,
|
|
463
|
+
blockedIpPattern: BLOCKED_IP_PATTERN,
|
|
464
|
+
blockedIpv4MappedPattern: BLOCKED_IPV4_MAPPED_PATTERN,
|
|
465
|
+
allowedHosts: parseAllowedHosts(env.ALLOWED_HOSTS),
|
|
466
|
+
apiKey: env.API_KEY,
|
|
467
|
+
allowRemote,
|
|
468
|
+
},
|
|
469
|
+
auth: buildAuthConfig(baseUrl),
|
|
470
|
+
rateLimit: {
|
|
471
|
+
enabled: true,
|
|
472
|
+
maxRequests: 100,
|
|
473
|
+
windowMs: 60000,
|
|
474
|
+
cleanupIntervalMs: 60000,
|
|
475
|
+
},
|
|
476
|
+
runtime: runtimeState,
|
|
477
|
+
};
|
|
478
|
+
export function enableHttpMode() {
|
|
479
|
+
runtimeState.httpMode = true;
|
|
480
|
+
}
|
package/dist/crypto.d.ts
ADDED
package/dist/crypto.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
import { createHash, createHmac, hash as oneShotHash, timingSafeEqual, } from 'node:crypto';
|
|
3
|
+
const MAX_HASH_INPUT_BYTES = 5 * 1024 * 1024;
|
|
4
|
+
const ALLOWED_HASH_ALGORITHMS = new Set([
|
|
5
|
+
'sha256',
|
|
6
|
+
'sha512',
|
|
7
|
+
]);
|
|
8
|
+
function byteLengthUtf8(input) {
|
|
9
|
+
// Avoid allocating (unlike TextEncoder().encode()).
|
|
10
|
+
return Buffer.byteLength(input, 'utf8');
|
|
11
|
+
}
|
|
12
|
+
function byteLength(input) {
|
|
13
|
+
return typeof input === 'string' ? byteLengthUtf8(input) : input.byteLength;
|
|
14
|
+
}
|
|
15
|
+
function assertAllowedAlgorithm(algorithm) {
|
|
16
|
+
// Defensive: protects against `any` / unchecked external inputs.
|
|
17
|
+
if (!ALLOWED_HASH_ALGORITHMS.has(algorithm)) {
|
|
18
|
+
throw new Error(`Hash algorithm not allowed: ${algorithm}`);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export function timingSafeEqualUtf8(a, b) {
|
|
22
|
+
const aBuffer = Buffer.from(a, 'utf8');
|
|
23
|
+
const bBuffer = Buffer.from(b, 'utf8');
|
|
24
|
+
if (aBuffer.length === bBuffer.length) {
|
|
25
|
+
return timingSafeEqual(aBuffer, bBuffer);
|
|
26
|
+
}
|
|
27
|
+
// Avoid early return timing differences on length mismatch.
|
|
28
|
+
const maxLength = Math.max(aBuffer.length, bBuffer.length);
|
|
29
|
+
const paddedA = Buffer.alloc(maxLength);
|
|
30
|
+
const paddedB = Buffer.alloc(maxLength);
|
|
31
|
+
aBuffer.copy(paddedA);
|
|
32
|
+
bBuffer.copy(paddedB);
|
|
33
|
+
return timingSafeEqual(paddedA, paddedB) && aBuffer.length === bBuffer.length;
|
|
34
|
+
}
|
|
35
|
+
function hashHex(algorithm, input) {
|
|
36
|
+
assertAllowedAlgorithm(algorithm);
|
|
37
|
+
if (byteLength(input) <= MAX_HASH_INPUT_BYTES) {
|
|
38
|
+
return oneShotHash(algorithm, input, 'hex');
|
|
39
|
+
}
|
|
40
|
+
const hasher = createHash(algorithm);
|
|
41
|
+
hasher.update(input);
|
|
42
|
+
return hasher.digest('hex');
|
|
43
|
+
}
|
|
44
|
+
export function sha256Hex(input) {
|
|
45
|
+
return hashHex('sha256', input);
|
|
46
|
+
}
|
|
47
|
+
export function hmacSha256Hex(key, input) {
|
|
48
|
+
return createHmac('sha256', key).update(input).digest('hex');
|
|
49
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function removeNoiseFromHtml(html: string, document?: Document, baseUrl?: string): string;
|