@j0hanz/superfetch 1.2.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -152
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +25 -59
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +98 -26
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +0 -1
- package/dist/http/mcp-routes.js +43 -30
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session.js +60 -73
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +5 -2
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +2 -9
- package/dist/http/server-middleware.js +96 -43
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.js +52 -64
- package/dist/http/session-cleanup.js +1 -1
- package/dist/middleware/error-handler.js +1 -3
- package/dist/resources/cached-content.js +50 -108
- package/dist/resources/index.js +0 -82
- package/dist/server.js +51 -30
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +1 -7
- package/dist/services/cache.js +53 -119
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +10 -82
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +34 -95
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +30 -13
- package/dist/services/fetcher/redirects.js +4 -3
- package/dist/services/fetcher/response.js +66 -31
- package/dist/services/fetcher.d.ts +1 -3
- package/dist/services/fetcher.js +14 -33
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
- package/dist/tools/handlers/fetch-single.shared.js +44 -87
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +46 -123
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +1 -107
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +52 -0
- package/dist/tools/utils/content-transform.d.ts +2 -17
- package/dist/tools/utils/content-transform.js +120 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +65 -62
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/transformers/markdown.transformer.js +109 -34
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +21 -10
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +11 -38
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +20 -93
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +13 -16
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
2
|
import { CODE_BLOCK, FRONTMATTER_DELIMITER, joinLines, } from '../config/formatting.js';
|
|
3
3
|
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../utils/code-language.js';
|
|
4
|
+
import { isRecord } from '../utils/guards.js';
|
|
4
5
|
let turndownInstance = null;
|
|
5
|
-
function getTurndown() {
|
|
6
|
-
if (turndownInstance)
|
|
7
|
-
return turndownInstance;
|
|
8
|
-
turndownInstance = createTurndownInstance();
|
|
9
|
-
return turndownInstance;
|
|
10
|
-
}
|
|
11
6
|
function createTurndownInstance() {
|
|
12
7
|
const instance = new TurndownService({
|
|
13
8
|
headingStyle: 'atx',
|
|
@@ -19,12 +14,97 @@ function createTurndownInstance() {
|
|
|
19
14
|
addFencedCodeRule(instance);
|
|
20
15
|
return instance;
|
|
21
16
|
}
|
|
17
|
+
function getTurndown() {
|
|
18
|
+
turndownInstance ??= createTurndownInstance();
|
|
19
|
+
return turndownInstance;
|
|
20
|
+
}
|
|
21
|
+
function isElement(node) {
|
|
22
|
+
if (!isRecord(node))
|
|
23
|
+
return false;
|
|
24
|
+
return 'getAttribute' in node && typeof node.getAttribute === 'function';
|
|
25
|
+
}
|
|
26
|
+
const STRUCTURAL_TAGS = new Set([
|
|
27
|
+
'script',
|
|
28
|
+
'style',
|
|
29
|
+
'noscript',
|
|
30
|
+
'iframe',
|
|
31
|
+
'nav',
|
|
32
|
+
'footer',
|
|
33
|
+
'aside',
|
|
34
|
+
'header',
|
|
35
|
+
'form',
|
|
36
|
+
'button',
|
|
37
|
+
'input',
|
|
38
|
+
'select',
|
|
39
|
+
'textarea',
|
|
40
|
+
]);
|
|
41
|
+
const NAVIGATION_ROLES = new Set([
|
|
42
|
+
'navigation',
|
|
43
|
+
'banner',
|
|
44
|
+
'complementary',
|
|
45
|
+
'contentinfo',
|
|
46
|
+
'tree',
|
|
47
|
+
'menubar',
|
|
48
|
+
'menu',
|
|
49
|
+
]);
|
|
50
|
+
const PROMO_PATTERN = /banner|promo|announcement|cta|callout|advert|newsletter|subscribe|cookie|consent|popup|modal|overlay|toast/;
|
|
51
|
+
const FIXED_PATTERN = /\b(fixed|sticky)\b/;
|
|
52
|
+
const HIGH_Z_PATTERN = /\bz-(?:4[0-9]|50)\b/;
|
|
53
|
+
const ISOLATE_PATTERN = /\bisolate\b/;
|
|
54
|
+
function isStructuralNoiseTag(tagName) {
|
|
55
|
+
return (STRUCTURAL_TAGS.has(tagName) || tagName === 'svg' || tagName === 'canvas');
|
|
56
|
+
}
|
|
57
|
+
function isElementHidden(element) {
|
|
58
|
+
return (element.getAttribute('hidden') !== null ||
|
|
59
|
+
element.getAttribute('aria-hidden') === 'true');
|
|
60
|
+
}
|
|
61
|
+
function hasNoiseRole(role) {
|
|
62
|
+
return role ? NAVIGATION_ROLES.has(role) : false;
|
|
63
|
+
}
|
|
64
|
+
function matchesPromoIdOrClass(className, id) {
|
|
65
|
+
const combined = `${className} ${id}`.toLowerCase();
|
|
66
|
+
return PROMO_PATTERN.test(combined);
|
|
67
|
+
}
|
|
68
|
+
function matchesHighZIsolate(className) {
|
|
69
|
+
return HIGH_Z_PATTERN.test(className) && ISOLATE_PATTERN.test(className);
|
|
70
|
+
}
|
|
71
|
+
function matchesFixedOrHighZIsolate(className) {
|
|
72
|
+
if (FIXED_PATTERN.test(className))
|
|
73
|
+
return true;
|
|
74
|
+
return matchesHighZIsolate(className);
|
|
75
|
+
}
|
|
22
76
|
function addNoiseRule(instance) {
|
|
23
77
|
instance.addRule('removeNoise', {
|
|
24
|
-
filter:
|
|
78
|
+
filter: (node) => isNoiseNode(node),
|
|
25
79
|
replacement: () => '',
|
|
26
80
|
});
|
|
27
81
|
}
|
|
82
|
+
function isNoiseNode(node) {
|
|
83
|
+
if (!isElement(node))
|
|
84
|
+
return false;
|
|
85
|
+
return isNoiseElement(node);
|
|
86
|
+
}
|
|
87
|
+
function readElementMetadata(element) {
|
|
88
|
+
return {
|
|
89
|
+
tagName: element.tagName.toLowerCase(),
|
|
90
|
+
className: element.getAttribute('class') ?? '',
|
|
91
|
+
id: element.getAttribute('id') ?? '',
|
|
92
|
+
role: element.getAttribute('role'),
|
|
93
|
+
isHidden: isElementHidden(element),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
function isNoiseElement(node) {
|
|
97
|
+
const metadata = readElementMetadata(node);
|
|
98
|
+
if (isStructuralNoiseTag(metadata.tagName))
|
|
99
|
+
return true;
|
|
100
|
+
if (metadata.isHidden)
|
|
101
|
+
return true;
|
|
102
|
+
if (hasNoiseRole(metadata.role))
|
|
103
|
+
return true;
|
|
104
|
+
if (matchesFixedOrHighZIsolate(metadata.className))
|
|
105
|
+
return true;
|
|
106
|
+
return matchesPromoIdOrClass(metadata.className, metadata.id);
|
|
107
|
+
}
|
|
28
108
|
function addFencedCodeRule(instance) {
|
|
29
109
|
instance.addRule('fencedCodeBlockWithLanguage', {
|
|
30
110
|
filter: (node, options) => isFencedCodeBlock(node, options),
|
|
@@ -41,12 +121,6 @@ function isFencedCodeBlock(node, options) {
|
|
|
41
121
|
return false;
|
|
42
122
|
return firstChild.nodeName === 'CODE';
|
|
43
123
|
}
|
|
44
|
-
function isElement(node) {
|
|
45
|
-
return (node !== null &&
|
|
46
|
-
typeof node === 'object' &&
|
|
47
|
-
'getAttribute' in node &&
|
|
48
|
-
typeof node.getAttribute === 'function');
|
|
49
|
-
}
|
|
50
124
|
function formatFencedCodeBlock(node) {
|
|
51
125
|
const codeNode = node.firstChild;
|
|
52
126
|
if (!isElement(codeNode))
|
|
@@ -56,11 +130,16 @@ function formatFencedCodeBlock(node) {
|
|
|
56
130
|
return CODE_BLOCK.format(code, language);
|
|
57
131
|
}
|
|
58
132
|
function resolveCodeLanguage(codeNode, code) {
|
|
59
|
-
const className = codeNode
|
|
60
|
-
const
|
|
61
|
-
const attributeLanguage = resolveLanguageFromAttributes(className, dataLang);
|
|
133
|
+
const { className, dataLanguage } = readCodeAttributes(codeNode);
|
|
134
|
+
const attributeLanguage = resolveLanguageFromAttributes(className, dataLanguage);
|
|
62
135
|
return attributeLanguage ?? detectLanguageFromCode(code) ?? '';
|
|
63
136
|
}
|
|
137
|
+
function readCodeAttributes(codeNode) {
|
|
138
|
+
return {
|
|
139
|
+
className: codeNode.getAttribute('class') ?? '',
|
|
140
|
+
dataLanguage: codeNode.getAttribute('data-language') ?? '',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
64
143
|
const YAML_SPECIAL_CHARS = /[:[\]{}"\r\t'|>&*!?,#]|\n/;
|
|
65
144
|
const YAML_NUMERIC = /^[\d.]+$/;
|
|
66
145
|
const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
|
|
@@ -91,36 +170,32 @@ function escapeYamlValue(value) {
|
|
|
91
170
|
.replace(ESCAPE_PATTERNS.tab, '\\t');
|
|
92
171
|
return `"${escaped}"`;
|
|
93
172
|
}
|
|
173
|
+
function appendFrontmatterField(lines, key, value) {
|
|
174
|
+
if (!value)
|
|
175
|
+
return;
|
|
176
|
+
lines.push(`${key}: ${escapeYamlValue(value)}`);
|
|
177
|
+
}
|
|
94
178
|
function createFrontmatter(metadata) {
|
|
95
179
|
const lines = [FRONTMATTER_DELIMITER];
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
if (metadata.url) {
|
|
100
|
-
lines.push(`source: ${escapeYamlValue(metadata.url)}`);
|
|
101
|
-
}
|
|
180
|
+
appendFrontmatterField(lines, 'title', metadata.title);
|
|
181
|
+
appendFrontmatterField(lines, 'source', metadata.url);
|
|
102
182
|
lines.push(FRONTMATTER_DELIMITER);
|
|
103
183
|
return joinLines(lines);
|
|
104
184
|
}
|
|
105
|
-
function convertHtmlToMarkdown(html) {
|
|
106
|
-
return getTurndown().turndown(html).trim();
|
|
107
|
-
}
|
|
108
|
-
function buildFrontmatterBlock(metadata) {
|
|
109
|
-
return metadata ? createFrontmatter(metadata) : '';
|
|
110
|
-
}
|
|
111
185
|
export function htmlToMarkdown(html, metadata) {
|
|
112
|
-
const frontmatter =
|
|
113
|
-
if (!
|
|
186
|
+
const frontmatter = buildFrontmatter(metadata);
|
|
187
|
+
if (!html)
|
|
114
188
|
return frontmatter;
|
|
115
|
-
}
|
|
116
189
|
try {
|
|
117
|
-
const content =
|
|
190
|
+
const content = getTurndown().turndown(html).trim();
|
|
118
191
|
return frontmatter ? `${frontmatter}\n${content}` : content;
|
|
119
192
|
}
|
|
120
193
|
catch {
|
|
121
194
|
return frontmatter;
|
|
122
195
|
}
|
|
123
196
|
}
|
|
124
|
-
function
|
|
125
|
-
|
|
197
|
+
function buildFrontmatter(metadata) {
|
|
198
|
+
if (!metadata)
|
|
199
|
+
return '';
|
|
200
|
+
return createFrontmatter(metadata);
|
|
126
201
|
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface CachedPayload {
|
|
2
|
+
content?: string;
|
|
3
|
+
markdown?: string;
|
|
4
|
+
title?: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseCachedPayload(raw: string): CachedPayload | null;
|
|
7
|
+
export declare function resolveCachedPayloadContent(payload: CachedPayload): string | null;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { isRecord } from './guards.js';
|
|
2
|
+
export function parseCachedPayload(raw) {
|
|
3
|
+
try {
|
|
4
|
+
const parsed = JSON.parse(raw);
|
|
5
|
+
return isCachedPayload(parsed) ? parsed : null;
|
|
6
|
+
}
|
|
7
|
+
catch {
|
|
8
|
+
return null;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
export function resolveCachedPayloadContent(payload) {
|
|
12
|
+
if (typeof payload.markdown === 'string') {
|
|
13
|
+
return payload.markdown;
|
|
14
|
+
}
|
|
15
|
+
if (typeof payload.content === 'string') {
|
|
16
|
+
return payload.content;
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
function hasOptionalStringProperty(value, key) {
|
|
21
|
+
const prop = value[key];
|
|
22
|
+
if (prop === undefined)
|
|
23
|
+
return true;
|
|
24
|
+
return typeof prop === 'string';
|
|
25
|
+
}
|
|
26
|
+
function isCachedPayload(value) {
|
|
27
|
+
if (!isRecord(value))
|
|
28
|
+
return false;
|
|
29
|
+
if (!hasOptionalStringProperty(value, 'content'))
|
|
30
|
+
return false;
|
|
31
|
+
if (!hasOptionalStringProperty(value, 'markdown'))
|
|
32
|
+
return false;
|
|
33
|
+
if (!hasOptionalStringProperty(value, 'title'))
|
|
34
|
+
return false;
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
@@ -16,20 +16,31 @@ export function generateSafeFilename(url, title, hashFallback, extension = DEFAU
|
|
|
16
16
|
}
|
|
17
17
|
return `download-${Date.now()}${extension}`;
|
|
18
18
|
}
|
|
19
|
+
function getLastPathSegment(url) {
|
|
20
|
+
const segments = url.pathname.split('/').filter(Boolean);
|
|
21
|
+
if (segments.length === 0)
|
|
22
|
+
return null;
|
|
23
|
+
const lastSegment = segments[segments.length - 1];
|
|
24
|
+
return lastSegment ?? null;
|
|
25
|
+
}
|
|
26
|
+
function stripCommonPageExtension(segment) {
|
|
27
|
+
return segment.replace(/\.(html?|php|aspx?|jsp)$/i, '');
|
|
28
|
+
}
|
|
29
|
+
function normalizeUrlFilenameSegment(segment) {
|
|
30
|
+
const cleaned = stripCommonPageExtension(segment);
|
|
31
|
+
if (!cleaned)
|
|
32
|
+
return null;
|
|
33
|
+
if (cleaned === 'index')
|
|
34
|
+
return null;
|
|
35
|
+
return cleaned;
|
|
36
|
+
}
|
|
19
37
|
function extractFilenameFromUrl(url) {
|
|
20
38
|
try {
|
|
21
39
|
const urlObj = new URL(url);
|
|
22
|
-
const
|
|
23
|
-
const segments = pathname.split('/').filter(Boolean);
|
|
24
|
-
if (segments.length === 0)
|
|
25
|
-
return null;
|
|
26
|
-
const lastSegment = segments[segments.length - 1];
|
|
40
|
+
const lastSegment = getLastPathSegment(urlObj);
|
|
27
41
|
if (!lastSegment)
|
|
28
42
|
return null;
|
|
29
|
-
|
|
30
|
-
if (!cleaned || cleaned === 'index')
|
|
31
|
-
return null;
|
|
32
|
-
return cleaned;
|
|
43
|
+
return normalizeUrlFilenameSegment(lastSegment);
|
|
33
44
|
}
|
|
34
45
|
catch {
|
|
35
46
|
return null;
|
|
@@ -43,7 +54,7 @@ function slugifyTitle(title) {
|
|
|
43
54
|
.replace(WHITESPACE_REGEX, '-')
|
|
44
55
|
.replace(/-+/g, '-')
|
|
45
56
|
.replace(/^-|-$/g, '');
|
|
46
|
-
return slug
|
|
57
|
+
return slug || null;
|
|
47
58
|
}
|
|
48
59
|
function sanitizeFilename(name, extension) {
|
|
49
60
|
let sanitized = name
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function isRecord(value: unknown): value is Record<string, unknown>;
|
|
@@ -2,7 +2,4 @@ interface NormalizeOptions {
|
|
|
2
2
|
readonly trimValues?: boolean;
|
|
3
3
|
}
|
|
4
4
|
export declare function normalizeHeaderRecord(headers: Record<string, string> | undefined, blockedHeaders: Set<string>, options?: NormalizeOptions): Record<string, string> | undefined;
|
|
5
|
-
export declare function normalizeHeaderEntries(headers: Record<string, string>, blockedHeaders: Set<string>, options?: NormalizeOptions): Headers;
|
|
6
|
-
export declare function hasHeaderEntries(headers: Headers): boolean;
|
|
7
|
-
export declare function headersToRecord(headers: Headers): Record<string, string>;
|
|
8
5
|
export {};
|
|
@@ -6,7 +6,7 @@ export function normalizeHeaderRecord(headers, blockedHeaders, options = {}) {
|
|
|
6
6
|
return undefined;
|
|
7
7
|
return headersToRecord(normalized);
|
|
8
8
|
}
|
|
9
|
-
|
|
9
|
+
function normalizeHeaderEntries(headers, blockedHeaders, options = {}) {
|
|
10
10
|
const normalized = new Headers();
|
|
11
11
|
for (const [key, value] of Object.entries(headers)) {
|
|
12
12
|
if (blockedHeaders.has(key.toLowerCase()))
|
|
@@ -15,10 +15,10 @@ export function normalizeHeaderEntries(headers, blockedHeaders, options = {}) {
|
|
|
15
15
|
}
|
|
16
16
|
return normalized;
|
|
17
17
|
}
|
|
18
|
-
|
|
18
|
+
function hasHeaderEntries(headers) {
|
|
19
19
|
return !headers.keys().next().done;
|
|
20
20
|
}
|
|
21
|
-
|
|
21
|
+
function headersToRecord(headers) {
|
|
22
22
|
return Object.fromEntries(headers.entries());
|
|
23
23
|
}
|
|
24
24
|
function setHeaderValue(headers, key, value, trimValue) {
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { ToolErrorResponse } from '../config/types/tools.js';
|
|
2
|
-
export declare function createToolErrorResponse(message: string, url: string
|
|
3
|
-
export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string
|
|
2
|
+
export declare function createToolErrorResponse(message: string, url: string): ToolErrorResponse;
|
|
3
|
+
export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
|
|
@@ -1,33 +1,15 @@
|
|
|
1
|
-
import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
|
2
1
|
import { FetchError } from '../errors/app-error.js';
|
|
3
2
|
import { isSystemError } from './error-utils.js';
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const MCP_ERROR_CODE_MAP = {
|
|
7
|
-
VALIDATION_ERROR: String(ErrorCode.InvalidParams),
|
|
8
|
-
INVALID_PARAMS: String(ErrorCode.InvalidParams),
|
|
9
|
-
INTERNAL_ERROR: String(ErrorCode.InternalError),
|
|
10
|
-
FETCH_ERROR: String(ErrorCode.InternalError),
|
|
11
|
-
BATCH_ERROR: String(ErrorCode.InternalError),
|
|
12
|
-
PROMISE_REJECTED: String(ErrorCode.InternalError),
|
|
13
|
-
UNKNOWN_ERROR: String(ErrorCode.InternalError),
|
|
14
|
-
};
|
|
15
|
-
const NUMERIC_ERROR_CODE = /^-?\d+$/;
|
|
16
|
-
function normalizeToolErrorCode(code) {
|
|
17
|
-
if (!code)
|
|
18
|
-
return String(ErrorCode.InternalError);
|
|
19
|
-
if (NUMERIC_ERROR_CODE.test(code))
|
|
20
|
-
return code;
|
|
21
|
-
if (code.startsWith('HTTP_'))
|
|
22
|
-
return String(ErrorCode.InternalError);
|
|
23
|
-
return MCP_ERROR_CODE_MAP[code] ?? code;
|
|
3
|
+
function createFallbackErrorResponse(fallbackMessage, url, error) {
|
|
4
|
+
return createToolErrorResponse(`${fallbackMessage}: ${error.message}`, url);
|
|
24
5
|
}
|
|
25
|
-
|
|
6
|
+
function createUnknownErrorResponse(fallbackMessage, url) {
|
|
7
|
+
return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url);
|
|
8
|
+
}
|
|
9
|
+
export function createToolErrorResponse(message, url) {
|
|
26
10
|
const structuredContent = {
|
|
27
|
-
...details,
|
|
28
11
|
error: message,
|
|
29
12
|
url,
|
|
30
|
-
errorCode: normalizeToolErrorCode(code),
|
|
31
13
|
};
|
|
32
14
|
return {
|
|
33
15
|
content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
|
|
@@ -35,26 +17,17 @@ export function createToolErrorResponse(message, url, code, details = {}) {
|
|
|
35
17
|
isError: true,
|
|
36
18
|
};
|
|
37
19
|
}
|
|
38
|
-
function
|
|
39
|
-
const message = fallback ? `${fallback}: ${error.message}` : error.message;
|
|
40
|
-
if (IS_DEVELOPMENT_WITH_STACK_TRACES && error.stack) {
|
|
41
|
-
return `${message}\n${error.stack}`;
|
|
42
|
-
}
|
|
43
|
-
return message;
|
|
44
|
-
}
|
|
45
|
-
export function handleToolError(error, url, fallbackMessage = 'Operation failed', details = {}) {
|
|
20
|
+
export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
|
|
46
21
|
if (isValidationError(error)) {
|
|
47
|
-
return createToolErrorResponse(error.message, url
|
|
22
|
+
return createToolErrorResponse(error.message, url);
|
|
48
23
|
}
|
|
49
24
|
if (error instanceof FetchError) {
|
|
50
|
-
|
|
51
|
-
return createToolErrorResponse(message, url, error.code, details);
|
|
25
|
+
return createToolErrorResponse(error.message, url);
|
|
52
26
|
}
|
|
53
27
|
if (error instanceof Error) {
|
|
54
|
-
|
|
55
|
-
return createToolErrorResponse(message, url, 'UNKNOWN_ERROR', details);
|
|
28
|
+
return createFallbackErrorResponse(fallbackMessage, url, error);
|
|
56
29
|
}
|
|
57
|
-
return
|
|
30
|
+
return createUnknownErrorResponse(fallbackMessage, url);
|
|
58
31
|
}
|
|
59
32
|
function isValidationError(error) {
|
|
60
33
|
return (error instanceof Error &&
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface TransformResult {
|
|
2
|
+
readonly url: string;
|
|
3
|
+
readonly transformed: boolean;
|
|
4
|
+
readonly platform?: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function transformToRawUrl(url: string): TransformResult;
|
|
7
|
+
export declare function isRawTextContentUrl(url: string): boolean;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { logDebug } from '../services/logger.js';
|
|
2
|
+
const GITHUB_BLOB_RULE = {
|
|
3
|
+
name: 'github',
|
|
4
|
+
pattern: /^https?:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/i,
|
|
5
|
+
transform: (match) => {
|
|
6
|
+
const owner = match[1] ?? '';
|
|
7
|
+
const repo = match[2] ?? '';
|
|
8
|
+
const branch = match[3] ?? '';
|
|
9
|
+
const path = match[4] ?? '';
|
|
10
|
+
return `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`;
|
|
11
|
+
},
|
|
12
|
+
};
|
|
13
|
+
const GITHUB_GIST_RULE = {
|
|
14
|
+
name: 'github-gist',
|
|
15
|
+
pattern: /^https?:\/\/gist\.github\.com\/([^/]+)\/([a-f0-9]+)(?:#file-(.+)|\/raw\/([^/]+))?$/i,
|
|
16
|
+
transform: (match) => {
|
|
17
|
+
const user = match[1] ?? '';
|
|
18
|
+
const gistId = match[2] ?? '';
|
|
19
|
+
const hashFile = match[3];
|
|
20
|
+
const rawFile = match[4];
|
|
21
|
+
const filename = rawFile ?? hashFile?.replace(/-/g, '.');
|
|
22
|
+
const filePath = filename ? `/${filename}` : '';
|
|
23
|
+
return `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`;
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
const GITLAB_BLOB_RULE = {
|
|
27
|
+
name: 'gitlab',
|
|
28
|
+
pattern: /^(https?:\/\/(?:[^/]+\.)?gitlab\.com\/[^/]+\/[^/]+)\/-\/blob\/([^/]+)\/(.+)$/i,
|
|
29
|
+
transform: (match) => {
|
|
30
|
+
const baseUrl = match[1] ?? '';
|
|
31
|
+
const branch = match[2] ?? '';
|
|
32
|
+
const path = match[3] ?? '';
|
|
33
|
+
return `${baseUrl}/-/raw/${branch}/${path}`;
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
const BITBUCKET_SRC_RULE = {
|
|
37
|
+
name: 'bitbucket',
|
|
38
|
+
pattern: /^(https?:\/\/(?:www\.)?bitbucket\.org\/[^/]+\/[^/]+)\/src\/([^/]+)\/(.+)$/i,
|
|
39
|
+
transform: (match) => {
|
|
40
|
+
const baseUrl = match[1] ?? '';
|
|
41
|
+
const branch = match[2] ?? '';
|
|
42
|
+
const path = match[3] ?? '';
|
|
43
|
+
return `${baseUrl}/raw/${branch}/${path}`;
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
const TRANSFORM_RULES = [
|
|
47
|
+
GITHUB_BLOB_RULE,
|
|
48
|
+
GITHUB_GIST_RULE,
|
|
49
|
+
GITLAB_BLOB_RULE,
|
|
50
|
+
BITBUCKET_SRC_RULE,
|
|
51
|
+
];
|
|
52
|
+
function isRawUrl(url) {
|
|
53
|
+
const lowerUrl = url.toLowerCase();
|
|
54
|
+
return (lowerUrl.includes('raw.githubusercontent.com') ||
|
|
55
|
+
lowerUrl.includes('gist.githubusercontent.com') ||
|
|
56
|
+
lowerUrl.includes('/-/raw/') ||
|
|
57
|
+
/bitbucket\.org\/[^/]+\/[^/]+\/raw\//.test(lowerUrl));
|
|
58
|
+
}
|
|
59
|
+
function getUrlWithoutParams(url) {
|
|
60
|
+
const hashIndex = url.indexOf('#');
|
|
61
|
+
const queryIndex = url.indexOf('?');
|
|
62
|
+
let endIndex = url.length;
|
|
63
|
+
if (queryIndex !== -1) {
|
|
64
|
+
if (hashIndex !== -1) {
|
|
65
|
+
endIndex = Math.min(queryIndex, hashIndex);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
endIndex = queryIndex;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
else if (hashIndex !== -1) {
|
|
72
|
+
endIndex = hashIndex;
|
|
73
|
+
}
|
|
74
|
+
const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
|
|
75
|
+
return {
|
|
76
|
+
base: url.slice(0, endIndex),
|
|
77
|
+
hash,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
function resolveUrlToMatch(rule, base, hash) {
|
|
81
|
+
if (rule.name !== 'github-gist')
|
|
82
|
+
return base;
|
|
83
|
+
if (!hash.startsWith('#file-'))
|
|
84
|
+
return base;
|
|
85
|
+
return base + hash;
|
|
86
|
+
}
|
|
87
|
+
function applyTransformRules(base, hash) {
|
|
88
|
+
for (const rule of TRANSFORM_RULES) {
|
|
89
|
+
const urlToMatch = resolveUrlToMatch(rule, base, hash);
|
|
90
|
+
const match = rule.pattern.exec(urlToMatch);
|
|
91
|
+
if (match) {
|
|
92
|
+
return { url: rule.transform(match), platform: rule.name };
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
export function transformToRawUrl(url) {
|
|
98
|
+
if (!url)
|
|
99
|
+
return { url, transformed: false };
|
|
100
|
+
if (isRawUrl(url)) {
|
|
101
|
+
return { url, transformed: false };
|
|
102
|
+
}
|
|
103
|
+
const { base, hash } = getUrlWithoutParams(url);
|
|
104
|
+
const result = applyTransformRules(base, hash);
|
|
105
|
+
if (!result)
|
|
106
|
+
return { url, transformed: false };
|
|
107
|
+
logDebug('URL transformed to raw content URL', {
|
|
108
|
+
platform: result.platform,
|
|
109
|
+
original: url.substring(0, 100),
|
|
110
|
+
transformed: result.url.substring(0, 100),
|
|
111
|
+
});
|
|
112
|
+
return {
|
|
113
|
+
url: result.url,
|
|
114
|
+
transformed: true,
|
|
115
|
+
platform: result.platform,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
const RAW_TEXT_EXTENSIONS = new Set([
|
|
119
|
+
'.md',
|
|
120
|
+
'.markdown',
|
|
121
|
+
'.txt',
|
|
122
|
+
'.json',
|
|
123
|
+
'.yaml',
|
|
124
|
+
'.yml',
|
|
125
|
+
'.toml',
|
|
126
|
+
'.xml',
|
|
127
|
+
'.csv',
|
|
128
|
+
'.rst',
|
|
129
|
+
'.adoc',
|
|
130
|
+
'.org',
|
|
131
|
+
]);
|
|
132
|
+
export function isRawTextContentUrl(url) {
|
|
133
|
+
if (!url)
|
|
134
|
+
return false;
|
|
135
|
+
if (isRawUrl(url))
|
|
136
|
+
return true;
|
|
137
|
+
const { base } = getUrlWithoutParams(url);
|
|
138
|
+
const lowerBase = base.toLowerCase();
|
|
139
|
+
return hasKnownRawTextExtension(lowerBase);
|
|
140
|
+
}
|
|
141
|
+
function hasKnownRawTextExtension(urlBaseLower) {
|
|
142
|
+
for (const ext of RAW_TEXT_EXTENSIONS) {
|
|
143
|
+
if (urlBaseLower.endsWith(ext))
|
|
144
|
+
return true;
|
|
145
|
+
}
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
export declare function isBlockedIp(ip: string): boolean;
|
|
2
|
-
export declare function assertResolvedAddressesAllowed(hostname: string): Promise<void>;
|
|
3
2
|
export declare function normalizeUrl(urlString: string): {
|
|
4
3
|
normalizedUrl: string;
|
|
5
4
|
hostname: string;
|
|
6
5
|
};
|
|
7
|
-
export declare function validateAndNormalizeUrl(urlString: string):
|
|
6
|
+
export declare function validateAndNormalizeUrl(urlString: string): string;
|