@j0hanz/superfetch 1.0.6 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +228 -36
- package/dist/config/index.d.ts +10 -5
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +73 -19
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +98 -57
- package/dist/config/types.d.ts.map +1 -1
- package/dist/errors/app-error.d.ts +4 -28
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +10 -51
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +10 -55
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +2 -2
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +12 -14
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +0 -8
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/parsers/base-html-element-parser.d.ts +43 -0
- package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
- package/dist/parsers/base-html-element-parser.js +59 -0
- package/dist/parsers/base-html-element-parser.js.map +1 -0
- package/dist/parsers/heading-element-parser.d.ts +14 -0
- package/dist/parsers/heading-element-parser.d.ts.map +1 -0
- package/dist/parsers/heading-element-parser.js +26 -0
- package/dist/parsers/heading-element-parser.js.map +1 -0
- package/dist/parsers/image-element-parser.d.ts +16 -0
- package/dist/parsers/image-element-parser.d.ts.map +1 -0
- package/dist/parsers/image-element-parser.js +33 -0
- package/dist/parsers/image-element-parser.js.map +1 -0
- package/dist/parsers/link-element-parser.d.ts +15 -0
- package/dist/parsers/link-element-parser.d.ts.map +1 -0
- package/dist/parsers/link-element-parser.js +28 -0
- package/dist/parsers/link-element-parser.js.map +1 -0
- package/dist/parsers/open-graph-parser.d.ts +17 -0
- package/dist/parsers/open-graph-parser.d.ts.map +1 -0
- package/dist/parsers/open-graph-parser.js +41 -0
- package/dist/parsers/open-graph-parser.js.map +1 -0
- package/dist/parsers/schema-org-parser.d.ts +17 -0
- package/dist/parsers/schema-org-parser.d.ts.map +1 -0
- package/dist/parsers/schema-org-parser.js +32 -0
- package/dist/parsers/schema-org-parser.js.map +1 -0
- package/dist/parsers/standard-meta-parser.d.ts +18 -0
- package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
- package/dist/parsers/standard-meta-parser.js +32 -0
- package/dist/parsers/standard-meta-parser.js.map +1 -0
- package/dist/parsers/twitter-card-parser.d.ts +17 -0
- package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
- package/dist/parsers/twitter-card-parser.js +41 -0
- package/dist/parsers/twitter-card-parser.js.map +1 -0
- package/dist/resources/cached-content.d.ts +0 -1
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +3 -9
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +8 -8
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +10 -10
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +0 -28
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +10 -173
- package/dist/services/cache.js.map +1 -1
- package/dist/services/extractor.d.ts +1 -11
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +86 -84
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +2 -13
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +195 -211
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts +5 -4
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +27 -42
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +35 -26
- package/dist/services/parser.js.map +1 -1
- package/dist/services/session-manager.d.ts +18 -0
- package/dist/services/session-manager.d.ts.map +1 -0
- package/dist/services/session-manager.js +73 -0
- package/dist/services/session-manager.js.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.js +32 -0
- package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +0 -1
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +15 -20
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +124 -105
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +0 -4
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +6 -7
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +8 -8
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +47 -79
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +15 -10
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +58 -62
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -5
- package/dist/utils/concurrency.d.ts.map +1 -1
- package/dist/utils/concurrency.js +19 -19
- package/dist/utils/concurrency.js.map +1 -1
- package/dist/utils/content-cleaner.d.ts +0 -25
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +12 -187
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/html-truncator.d.ts +2 -0
- package/dist/utils/html-truncator.d.ts.map +1 -0
- package/dist/utils/html-truncator.js +14 -0
- package/dist/utils/html-truncator.js.map +1 -0
- package/dist/utils/language-detector.d.ts +0 -3
- package/dist/utils/language-detector.d.ts.map +1 -1
- package/dist/utils/language-detector.js +0 -11
- package/dist/utils/language-detector.js.map +1 -1
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +7 -5
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +15 -42
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -6
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +12 -81
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +5 -6
|
@@ -1,115 +1,46 @@
|
|
|
1
|
-
import dns from 'dns/promises';
|
|
2
1
|
import { config } from '../config/index.js';
|
|
3
|
-
import { UrlValidationError, ValidationError } from '../errors/app-error.js';
|
|
4
|
-
const BLOCKED_HOSTS = new Set([
|
|
5
|
-
'localhost',
|
|
6
|
-
'127.0.0.1',
|
|
7
|
-
'0.0.0.0',
|
|
8
|
-
'::1',
|
|
9
|
-
'169.254.169.254',
|
|
10
|
-
'metadata.google.internal',
|
|
11
|
-
'metadata.azure.com',
|
|
12
|
-
'100.100.100.200',
|
|
13
|
-
'instance-data',
|
|
14
|
-
]);
|
|
15
|
-
const BLOCKED_IP_PATTERNS = [
|
|
16
|
-
/^10\./,
|
|
17
|
-
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
18
|
-
/^192\.168\./,
|
|
19
|
-
/^127\./,
|
|
20
|
-
/^0\./,
|
|
21
|
-
/^169\.254\./,
|
|
22
|
-
/^fc00:/i,
|
|
23
|
-
/^fe80:/i,
|
|
24
|
-
/^::ffff:127\./,
|
|
25
|
-
/^::ffff:10\./,
|
|
26
|
-
/^::ffff:172\.(1[6-9]|2\d|3[01])\./,
|
|
27
|
-
/^::ffff:192\.168\./,
|
|
28
|
-
];
|
|
29
2
|
/**
|
|
30
3
|
* Check if an IP address is in a blocked private range
|
|
31
4
|
*/
|
|
32
5
|
export function isBlockedIp(ip) {
|
|
33
|
-
return
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Validate resolved IP addresses to prevent DNS rebinding attacks.
|
|
37
|
-
* This should be called after DNS resolution to ensure the resolved
|
|
38
|
-
* IPs are not in blocked private ranges.
|
|
39
|
-
*/
|
|
40
|
-
export async function validateResolvedIps(hostname) {
|
|
41
|
-
// Skip validation for direct IP addresses (already validated in validateAndNormalizeUrl)
|
|
42
|
-
if (/^[\d.]+$/.test(hostname) || hostname.includes(':')) {
|
|
43
|
-
return;
|
|
44
|
-
}
|
|
45
|
-
try {
|
|
46
|
-
// Resolve IPv4 addresses
|
|
47
|
-
const ipv4Addresses = await dns.resolve4(hostname).catch(() => []);
|
|
48
|
-
for (const ip of ipv4Addresses) {
|
|
49
|
-
if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
|
|
50
|
-
throw new UrlValidationError(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`, hostname);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
// Resolve IPv6 addresses
|
|
54
|
-
const ipv6Addresses = await dns.resolve6(hostname).catch(() => []);
|
|
55
|
-
for (const ip of ipv6Addresses) {
|
|
56
|
-
if (isBlockedIp(ip) || BLOCKED_HOSTS.has(ip)) {
|
|
57
|
-
throw new UrlValidationError(`DNS rebinding detected: ${hostname} resolves to blocked IP ${ip}`, hostname);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
catch (error) {
|
|
62
|
-
// Re-throw UrlValidationError, ignore DNS resolution errors
|
|
63
|
-
if (error instanceof UrlValidationError) {
|
|
64
|
-
throw error;
|
|
65
|
-
}
|
|
66
|
-
// DNS resolution failed - let the actual request handle the error
|
|
67
|
-
}
|
|
6
|
+
return config.security.blockedIpPatterns.some((pattern) => pattern.test(ip));
|
|
68
7
|
}
|
|
69
8
|
export function validateAndNormalizeUrl(urlString) {
|
|
70
|
-
// Check for empty or whitespace-only input
|
|
71
9
|
if (!urlString || typeof urlString !== 'string') {
|
|
72
|
-
throw new
|
|
10
|
+
throw new Error('URL is required');
|
|
73
11
|
}
|
|
74
12
|
const trimmedUrl = urlString.trim();
|
|
75
13
|
if (!trimmedUrl) {
|
|
76
|
-
throw new
|
|
14
|
+
throw new Error('URL cannot be empty');
|
|
77
15
|
}
|
|
78
|
-
// Check URL length to prevent DoS
|
|
79
16
|
if (trimmedUrl.length > config.constants.maxUrlLength) {
|
|
80
|
-
throw new
|
|
17
|
+
throw new Error(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
|
|
81
18
|
}
|
|
82
19
|
let url;
|
|
83
20
|
try {
|
|
84
21
|
url = new URL(trimmedUrl);
|
|
85
22
|
}
|
|
86
23
|
catch {
|
|
87
|
-
throw new
|
|
24
|
+
throw new Error('Invalid URL format');
|
|
88
25
|
}
|
|
89
|
-
// Only allow HTTP(S) protocols
|
|
90
26
|
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
91
|
-
throw new
|
|
27
|
+
throw new Error(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
92
28
|
}
|
|
93
|
-
// Block URLs with credentials (user:pass@host)
|
|
94
29
|
if (url.username || url.password) {
|
|
95
|
-
throw new
|
|
30
|
+
throw new Error('URLs with embedded credentials are not allowed');
|
|
96
31
|
}
|
|
97
32
|
const hostname = url.hostname.toLowerCase();
|
|
98
|
-
// Block empty hostname
|
|
99
33
|
if (!hostname) {
|
|
100
|
-
throw new
|
|
34
|
+
throw new Error('URL must have a valid hostname');
|
|
101
35
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
throw new UrlValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`, trimmedUrl);
|
|
36
|
+
if (config.security.blockedHosts.has(hostname)) {
|
|
37
|
+
throw new Error(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
105
38
|
}
|
|
106
|
-
// Block private IP ranges
|
|
107
39
|
if (isBlockedIp(hostname)) {
|
|
108
|
-
throw new
|
|
40
|
+
throw new Error(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
109
41
|
}
|
|
110
|
-
// Block hostnames that look like they might resolve to internal addresses
|
|
111
42
|
if (hostname.endsWith('.local') || hostname.endsWith('.internal')) {
|
|
112
|
-
throw new
|
|
43
|
+
throw new Error(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
113
44
|
}
|
|
114
45
|
return url.href;
|
|
115
46
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"url-validator.js","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"url-validator.js","sourceRoot":"","sources":["../../src/utils/url-validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,EAAU;IACpC,OAAO,MAAM,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;AAC/E,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,SAAiB;IACvD,IAAI,CAAC,SAAS,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACrC,CAAC;IAED,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACpC,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,UAAU,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CACb,iCAAiC,MAAM,CAAC,SAAS,CAAC,YAAY,aAAa,CAC5E,CAAC;IACJ,CAAC;IAED,IAAI,GAAQ,CAAC;IAEb,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACb,qBAAqB,GAAG,CAAC,QAAQ,qCAAqC,CACvE,CAAC;IACJ,CAAC;IAED,IAAI,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAE5C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/C,MAAM,IAAI,KAAK,CACb,iBAAiB,QAAQ,kCAAkC,CAC5D,CAAC;IACJ,CAAC;IAED,IAAI,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CACb,qBAAqB,QAAQ,+BAA+B,CAC7D,CAAC;IACJ,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CACb,6BAA6B,QAAQ,4CAA4C,CAClF,CAAC;IACJ,CAAC;IAED,OAAO,GAAG,CAAC,IAAI,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,OAAe;IACxD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC,QAAQ,KAAK,UAAU,CAAC,QAAQ,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/superfetch",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"mcpName": "io.github.j0hanz/superfetch",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable JSONL format",
|
|
6
6
|
"type": "module",
|
|
@@ -50,27 +50,26 @@
|
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
52
52
|
"@mozilla/readability": "^0.6.0",
|
|
53
|
-
"axios": "^1.
|
|
53
|
+
"axios": "^1.7.9",
|
|
54
54
|
"cheerio": "^1.1.2",
|
|
55
55
|
"domhandler": "^5.0.3",
|
|
56
56
|
"express": "^5.2.1",
|
|
57
57
|
"jsdom": "^27.3.0",
|
|
58
58
|
"node-cache": "^5.1.2",
|
|
59
59
|
"turndown": "^7.2.2",
|
|
60
|
-
"
|
|
61
|
-
"zod": "^3.25.76"
|
|
60
|
+
"zod": "^3.24.1"
|
|
62
61
|
},
|
|
63
62
|
"devDependencies": {
|
|
64
63
|
"@eslint/js": "^9.39.2",
|
|
65
64
|
"@trivago/prettier-plugin-sort-imports": "^6.0.0",
|
|
66
65
|
"@types/express": "^5.0.6",
|
|
67
66
|
"@types/jsdom": "^27.0.0",
|
|
68
|
-
"@types/node": "^22.
|
|
67
|
+
"@types/node": "^22.19.3",
|
|
69
68
|
"@types/turndown": "^5.0.6",
|
|
70
69
|
"eslint": "^9.23.2",
|
|
71
70
|
"eslint-config-prettier": "^10.1.8",
|
|
72
71
|
"eslint-plugin-unused-imports": "^4.3.0",
|
|
73
|
-
"knip": "^5.
|
|
72
|
+
"knip": "^5.75.1",
|
|
74
73
|
"prettier": "^3.7.4",
|
|
75
74
|
"shx": "^0.4.0",
|
|
76
75
|
"tsx": "^4.21.0",
|