@j0hanz/superfetch 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -152
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +25 -59
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +98 -26
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +0 -1
- package/dist/http/mcp-routes.js +43 -30
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session.js +60 -73
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +5 -2
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +2 -9
- package/dist/http/server-middleware.js +96 -43
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.js +52 -64
- package/dist/http/session-cleanup.js +1 -1
- package/dist/middleware/error-handler.js +1 -3
- package/dist/resources/cached-content.js +50 -108
- package/dist/resources/index.js +0 -82
- package/dist/server.js +51 -30
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +1 -7
- package/dist/services/cache.js +53 -119
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +10 -82
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +34 -95
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +30 -13
- package/dist/services/fetcher/redirects.js +4 -3
- package/dist/services/fetcher/response.js +66 -31
- package/dist/services/fetcher.d.ts +1 -3
- package/dist/services/fetcher.js +14 -33
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
- package/dist/tools/handlers/fetch-single.shared.js +44 -87
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +46 -123
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +2 -108
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +52 -0
- package/dist/tools/utils/content-transform.d.ts +2 -17
- package/dist/tools/utils/content-transform.js +120 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +65 -62
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/transformers/markdown.transformer.js +109 -34
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +21 -10
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +11 -38
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +20 -93
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +13 -16
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { lookup } from 'node:dns/promises';
|
|
2
1
|
import { BlockList, isIP } from 'node:net';
|
|
3
2
|
import { config } from '../config/index.js';
|
|
4
3
|
import { createErrorWithCode } from './error-utils.js';
|
|
@@ -31,36 +30,6 @@ for (const entry of BLOCKED_IPV4_SUBNETS) {
|
|
|
31
30
|
for (const entry of BLOCKED_IPV6_SUBNETS) {
|
|
32
31
|
BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv6');
|
|
33
32
|
}
|
|
34
|
-
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
35
|
-
const DNS_DECISION_TTL_MS = 60000;
|
|
36
|
-
const DNS_DECISION_MAX = 1000;
|
|
37
|
-
const dnsDecisionCache = new Map();
|
|
38
|
-
function getCachedDnsDecision(hostname) {
|
|
39
|
-
const cached = dnsDecisionCache.get(hostname);
|
|
40
|
-
if (!cached)
|
|
41
|
-
return null;
|
|
42
|
-
if (cached.expiresAt <= Date.now()) {
|
|
43
|
-
dnsDecisionCache.delete(hostname);
|
|
44
|
-
return null;
|
|
45
|
-
}
|
|
46
|
-
return cached;
|
|
47
|
-
}
|
|
48
|
-
function setCachedDnsDecision(hostname, ok) {
|
|
49
|
-
dnsDecisionCache.set(hostname, {
|
|
50
|
-
ok,
|
|
51
|
-
expiresAt: Date.now() + DNS_DECISION_TTL_MS,
|
|
52
|
-
});
|
|
53
|
-
if (dnsDecisionCache.size <= DNS_DECISION_MAX)
|
|
54
|
-
return;
|
|
55
|
-
const evictCount = Math.ceil(DNS_DECISION_MAX * 0.05);
|
|
56
|
-
const iterator = dnsDecisionCache.keys();
|
|
57
|
-
for (let i = 0; i < evictCount; i++) {
|
|
58
|
-
const { value, done } = iterator.next();
|
|
59
|
-
if (done)
|
|
60
|
-
break;
|
|
61
|
-
dnsDecisionCache.delete(value);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
33
|
function matchesBlockedIpPatterns(resolvedIp) {
|
|
65
34
|
for (const pattern of config.security.blockedIpPatterns) {
|
|
66
35
|
if (pattern.test(resolvedIp)) {
|
|
@@ -91,55 +60,6 @@ function isBlockedByList(ip, ipType) {
|
|
|
91
60
|
}
|
|
92
61
|
return BLOCK_LIST.check(ip, 'ipv6');
|
|
93
62
|
}
|
|
94
|
-
function lookupWithTimeout(hostname) {
|
|
95
|
-
return new Promise((resolve, reject) => {
|
|
96
|
-
const timer = setTimeout(() => {
|
|
97
|
-
reject(createValidationError(`DNS lookup timed out for ${hostname}`));
|
|
98
|
-
}, DNS_LOOKUP_TIMEOUT_MS);
|
|
99
|
-
lookup(hostname, { all: true })
|
|
100
|
-
.then((result) => {
|
|
101
|
-
clearTimeout(timer);
|
|
102
|
-
resolve(result);
|
|
103
|
-
})
|
|
104
|
-
.catch((error) => {
|
|
105
|
-
clearTimeout(timer);
|
|
106
|
-
reject(error instanceof Error ? error : createValidationError(String(error)));
|
|
107
|
-
});
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
export async function assertResolvedAddressesAllowed(hostname) {
|
|
111
|
-
const cached = getCachedDnsDecision(hostname);
|
|
112
|
-
if (cached) {
|
|
113
|
-
if (!cached.ok) {
|
|
114
|
-
throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
|
|
115
|
-
}
|
|
116
|
-
return;
|
|
117
|
-
}
|
|
118
|
-
try {
|
|
119
|
-
const result = await lookupWithTimeout(hostname);
|
|
120
|
-
const addresses = Array.isArray(result) ? result : [result];
|
|
121
|
-
if (addresses.length === 0) {
|
|
122
|
-
throw createValidationError(`Unable to resolve hostname: ${hostname}`);
|
|
123
|
-
}
|
|
124
|
-
for (const { address } of addresses) {
|
|
125
|
-
if (isBlockedIp(address.toLowerCase())) {
|
|
126
|
-
setCachedDnsDecision(hostname, false);
|
|
127
|
-
throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
setCachedDnsDecision(hostname, true);
|
|
131
|
-
}
|
|
132
|
-
catch (error) {
|
|
133
|
-
const code = error?.code;
|
|
134
|
-
if (code === 'ENOTFOUND' || code === 'EAI_AGAIN') {
|
|
135
|
-
throw createValidationError(`Unable to resolve hostname: ${hostname}`);
|
|
136
|
-
}
|
|
137
|
-
if (error instanceof Error) {
|
|
138
|
-
throw error;
|
|
139
|
-
}
|
|
140
|
-
throw createValidationError(String(error));
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
63
|
export function normalizeUrl(urlString) {
|
|
144
64
|
const trimmedUrl = requireTrimmedUrl(urlString);
|
|
145
65
|
assertUrlLength(trimmedUrl);
|
|
@@ -150,10 +70,8 @@ export function normalizeUrl(urlString) {
|
|
|
150
70
|
assertHostnameAllowed(hostname);
|
|
151
71
|
return { normalizedUrl: url.href, hostname };
|
|
152
72
|
}
|
|
153
|
-
export
|
|
154
|
-
|
|
155
|
-
await assertResolvedAddressesAllowed(hostname);
|
|
156
|
-
return normalizedUrl;
|
|
73
|
+
export function validateAndNormalizeUrl(urlString) {
|
|
74
|
+
return normalizeUrl(urlString).normalizedUrl;
|
|
157
75
|
}
|
|
158
76
|
const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
|
|
159
77
|
function createValidationError(message) {
|
|
@@ -199,15 +117,24 @@ function normalizeHostname(url) {
|
|
|
199
117
|
}
|
|
200
118
|
const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
|
|
201
119
|
function assertHostnameAllowed(hostname) {
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
120
|
+
assertNotBlockedHost(hostname);
|
|
121
|
+
assertNotBlockedIp(hostname);
|
|
122
|
+
assertNotBlockedHostnameSuffix(hostname);
|
|
123
|
+
}
|
|
124
|
+
function assertNotBlockedHost(hostname) {
|
|
125
|
+
if (!config.security.blockedHosts.has(hostname))
|
|
126
|
+
return;
|
|
127
|
+
throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
128
|
+
}
|
|
129
|
+
function assertNotBlockedIp(hostname) {
|
|
130
|
+
if (!isBlockedIp(hostname))
|
|
131
|
+
return;
|
|
132
|
+
throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
133
|
+
}
|
|
134
|
+
function assertNotBlockedHostnameSuffix(hostname) {
|
|
135
|
+
if (!matchesBlockedSuffix(hostname))
|
|
136
|
+
return;
|
|
137
|
+
throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
211
138
|
}
|
|
212
139
|
function matchesBlockedSuffix(hostname) {
|
|
213
140
|
return BLOCKED_HOST_SUFFIXES.some((suffix) => hostname.endsWith(suffix));
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { parentPort } from 'node:worker_threads';
|
|
2
|
+
import { isRecord } from '../utils/guards.js';
|
|
3
|
+
import { transformHtmlToMarkdownSync } from '../tools/utils/content-transform.js';
|
|
4
|
+
const port = parentPort;
|
|
5
|
+
function isWorkerTransformRequest(value) {
|
|
6
|
+
if (!isRecord(value))
|
|
7
|
+
return false;
|
|
8
|
+
return (typeof value.id === 'number' &&
|
|
9
|
+
typeof value.html === 'string' &&
|
|
10
|
+
typeof value.url === 'string' &&
|
|
11
|
+
typeof value.options === 'object');
|
|
12
|
+
}
|
|
13
|
+
function handleMessage(value) {
|
|
14
|
+
if (!port)
|
|
15
|
+
return;
|
|
16
|
+
if (!isWorkerTransformRequest(value))
|
|
17
|
+
return;
|
|
18
|
+
const { id, html, url, options } = value;
|
|
19
|
+
try {
|
|
20
|
+
const result = transformHtmlToMarkdownSync(html, url, options);
|
|
21
|
+
const response = {
|
|
22
|
+
id,
|
|
23
|
+
ok: true,
|
|
24
|
+
result,
|
|
25
|
+
};
|
|
26
|
+
port.postMessage(response);
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
const response = {
|
|
30
|
+
id,
|
|
31
|
+
ok: false,
|
|
32
|
+
error: error instanceof Error ? error.message : String(error),
|
|
33
|
+
};
|
|
34
|
+
port.postMessage(response);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (!port) {
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
port.on('message', handleMessage);
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/superfetch",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"mcpName": "io.github.j0hanz/superfetch",
|
|
5
|
-
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable
|
|
5
|
+
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"main": "dist/index.js",
|
|
8
8
|
"bin": {
|
|
@@ -28,54 +28,51 @@
|
|
|
28
28
|
"web-fetching",
|
|
29
29
|
"content-extraction",
|
|
30
30
|
"readability",
|
|
31
|
-
"
|
|
31
|
+
"markdown",
|
|
32
32
|
"ai-tools",
|
|
33
33
|
"model-context-protocol",
|
|
34
34
|
"superfetch"
|
|
35
35
|
],
|
|
36
36
|
"scripts": {
|
|
37
|
-
"
|
|
38
|
-
"build": "tsc -p tsconfig.build.json && shx chmod +x dist/*.js",
|
|
37
|
+
"build": "tsc -p tsconfig.build.json && node -e \"require('fs').chmodSync('dist/index.js', '755')\"",
|
|
39
38
|
"prepare": "npm run build",
|
|
40
|
-
"
|
|
39
|
+
"dev": "tsx watch src/index.ts",
|
|
41
40
|
"start": "node dist/index.js",
|
|
42
|
-
"release": "node scripts/release.js",
|
|
43
41
|
"format": "prettier --write .",
|
|
44
42
|
"type-check": "tsc --noEmit",
|
|
45
43
|
"lint": "eslint .",
|
|
46
44
|
"lint:fix": "eslint . --fix",
|
|
47
45
|
"test": "npm run build --silent && node --test --experimental-transform-types",
|
|
48
46
|
"test:coverage": "npm run build --silent && node --test --experimental-transform-types --experimental-test-coverage",
|
|
49
|
-
"bench": "npm run build && node scripts/bench.mjs",
|
|
50
47
|
"knip": "knip",
|
|
51
|
-
"knip:fix": "knip --fix"
|
|
48
|
+
"knip:fix": "knip --fix",
|
|
49
|
+
"inspector": "npx @modelcontextprotocol/inspector",
|
|
50
|
+
"prepublishOnly": "npm run lint && npm run type-check && npm run build"
|
|
52
51
|
},
|
|
53
52
|
"dependencies": {
|
|
54
53
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
55
54
|
"@mozilla/readability": "^0.6.0",
|
|
56
|
-
"cheerio": "^1.1.2",
|
|
57
|
-
"domhandler": "^5.0.3",
|
|
58
55
|
"express": "^5.2.1",
|
|
59
56
|
"linkedom": "^0.18.12",
|
|
60
57
|
"turndown": "^7.2.2",
|
|
61
|
-
"undici": "^6.
|
|
62
|
-
"zod": "^4.3.
|
|
58
|
+
"undici": "^6.23.0",
|
|
59
|
+
"zod": "^4.3.5"
|
|
63
60
|
},
|
|
64
61
|
"devDependencies": {
|
|
65
62
|
"@eslint/js": "^9.39.2",
|
|
66
|
-
"@trivago/prettier-plugin-sort-imports": "^6.0.
|
|
63
|
+
"@trivago/prettier-plugin-sort-imports": "^6.0.1",
|
|
67
64
|
"@types/express": "^5.0.6",
|
|
68
65
|
"@types/node": "^22.19.3",
|
|
69
66
|
"@types/turndown": "^5.0.6",
|
|
70
67
|
"eslint": "^9.23.2",
|
|
71
68
|
"eslint-config-prettier": "^10.1.8",
|
|
72
69
|
"eslint-plugin-unused-imports": "^4.3.0",
|
|
73
|
-
"knip": "^5.
|
|
70
|
+
"knip": "^5.80.0",
|
|
74
71
|
"prettier": "^3.7.4",
|
|
75
72
|
"shx": "^0.4.0",
|
|
76
73
|
"tsx": "^4.21.0",
|
|
77
74
|
"typescript": "^5.9.3",
|
|
78
|
-
"typescript-eslint": "^8.
|
|
75
|
+
"typescript-eslint": "^8.52.0"
|
|
79
76
|
},
|
|
80
77
|
"engines": {
|
|
81
78
|
"node": ">=20.12.0"
|