@j0hanz/superfetch 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -152
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +25 -59
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +98 -26
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +0 -1
- package/dist/http/mcp-routes.js +43 -30
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session.js +60 -73
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +5 -2
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +2 -9
- package/dist/http/server-middleware.js +96 -43
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.js +52 -64
- package/dist/http/session-cleanup.js +1 -1
- package/dist/middleware/error-handler.js +1 -3
- package/dist/resources/cached-content.js +50 -108
- package/dist/resources/index.js +0 -82
- package/dist/server.js +51 -30
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +1 -7
- package/dist/services/cache.js +53 -119
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +10 -82
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +34 -95
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +30 -13
- package/dist/services/fetcher/redirects.js +4 -3
- package/dist/services/fetcher/response.js +66 -31
- package/dist/services/fetcher.d.ts +1 -3
- package/dist/services/fetcher.js +14 -33
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
- package/dist/tools/handlers/fetch-single.shared.js +44 -87
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +46 -123
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +2 -108
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +52 -0
- package/dist/tools/utils/content-transform.d.ts +2 -17
- package/dist/tools/utils/content-transform.js +120 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +65 -62
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/transformers/markdown.transformer.js +109 -34
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +21 -10
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +11 -38
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +20 -93
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +13 -16
|
@@ -1,79 +1,26 @@
|
|
|
1
1
|
import { config } from '../../config/index.js';
|
|
2
|
-
import { buildFileDownloadInfo } from '../../utils/download-url.js';
|
|
3
2
|
import { generateSafeFilename } from '../../utils/filename-generator.js';
|
|
4
|
-
import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
|
|
5
|
-
import { appendHeaderVary } from '../utils/cache-vary.js';
|
|
6
3
|
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
7
4
|
import { applyInlineContentLimit } from '../utils/inline-content.js';
|
|
8
|
-
|
|
9
|
-
const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
|
|
10
|
-
const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
|
|
11
|
-
const cacheVary = appendHeaderVary({
|
|
12
|
-
format: options.format,
|
|
13
|
-
extractMainContent: options.extractMainContent,
|
|
14
|
-
includeMetadata: options.includeMetadata,
|
|
15
|
-
maxContentLength: options.maxContentLength,
|
|
16
|
-
...(options.cacheVariant ? { variant: options.cacheVariant } : {}),
|
|
17
|
-
...(options.format === 'markdown'
|
|
18
|
-
? { includeContentBlocks: options.includeContentBlocks }
|
|
19
|
-
: { contentBlocks: true }),
|
|
20
|
-
}, options.customHeaders);
|
|
21
|
-
const pipelineOptions = {
|
|
22
|
-
url: options.url,
|
|
23
|
-
cacheNamespace,
|
|
24
|
-
transform: options.transform,
|
|
25
|
-
};
|
|
26
|
-
if (options.customHeaders !== undefined) {
|
|
27
|
-
pipelineOptions.customHeaders = options.customHeaders;
|
|
28
|
-
}
|
|
29
|
-
if (options.retries !== undefined) {
|
|
30
|
-
pipelineOptions.retries = options.retries;
|
|
31
|
-
}
|
|
32
|
-
if (options.timeout !== undefined) {
|
|
33
|
-
pipelineOptions.timeout = options.timeout;
|
|
34
|
-
}
|
|
35
|
-
if (cacheVary !== undefined) {
|
|
36
|
-
pipelineOptions.cacheVary = cacheVary;
|
|
37
|
-
}
|
|
5
|
+
function applyOptionalPipelineSerialization(pipelineOptions, options) {
|
|
38
6
|
if (options.serialize !== undefined) {
|
|
39
7
|
pipelineOptions.serialize = options.serialize;
|
|
40
8
|
}
|
|
41
9
|
if (options.deserialize !== undefined) {
|
|
42
10
|
pipelineOptions.deserialize = options.deserialize;
|
|
43
11
|
}
|
|
44
|
-
const pipeline = await executePipeline(pipelineOptions);
|
|
45
|
-
const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null, options.format);
|
|
46
|
-
return { pipeline, inlineResult };
|
|
47
12
|
}
|
|
48
|
-
export function
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
url:
|
|
13
|
+
export async function performSharedFetch(options, deps = {}) {
|
|
14
|
+
const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
|
|
15
|
+
const pipelineOptions = {
|
|
16
|
+
url: options.url,
|
|
17
|
+
cacheNamespace: 'markdown',
|
|
18
|
+
transform: options.transform,
|
|
52
19
|
};
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
return buildFileDownloadInfo(infoOptions);
|
|
60
|
-
}
|
|
61
|
-
export function getInlineErrorResponse(inlineResult, url, details) {
|
|
62
|
-
if (!inlineResult.error)
|
|
63
|
-
return null;
|
|
64
|
-
return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR', details);
|
|
65
|
-
}
|
|
66
|
-
export function applyInlineResultToStructuredContent(structuredContent, inlineResult, contentKey) {
|
|
67
|
-
if (inlineResult.truncated) {
|
|
68
|
-
structuredContent.truncated = true;
|
|
69
|
-
}
|
|
70
|
-
if (typeof inlineResult.content === 'string') {
|
|
71
|
-
structuredContent[contentKey] = inlineResult.content;
|
|
72
|
-
}
|
|
73
|
-
if (inlineResult.resourceUri) {
|
|
74
|
-
structuredContent.resourceUri = inlineResult.resourceUri;
|
|
75
|
-
structuredContent.resourceMimeType = inlineResult.resourceMimeType;
|
|
76
|
-
}
|
|
20
|
+
applyOptionalPipelineSerialization(pipelineOptions, options);
|
|
21
|
+
const pipeline = await executePipeline(pipelineOptions);
|
|
22
|
+
const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
|
|
23
|
+
return { pipeline, inlineResult };
|
|
77
24
|
}
|
|
78
25
|
function serializeStructuredContent(structuredContent, fromCache) {
|
|
79
26
|
return JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
|
|
@@ -93,45 +40,55 @@ function buildResourceLink(inlineResult, name) {
|
|
|
93
40
|
}
|
|
94
41
|
return block;
|
|
95
42
|
}
|
|
96
|
-
function buildEmbeddedResource(content,
|
|
43
|
+
function buildEmbeddedResource(content, url, title) {
|
|
97
44
|
if (!content) {
|
|
98
45
|
return null;
|
|
99
46
|
}
|
|
100
|
-
|
|
101
|
-
const extension = mimeType === 'text/markdown' ? '.md' : '.jsonl';
|
|
102
|
-
const filename = generateSafeFilename(url, title, undefined, extension);
|
|
103
|
-
// Use file: URI scheme with filename for better VS Code integration
|
|
47
|
+
const filename = generateSafeFilename(url, title, undefined, '.md');
|
|
104
48
|
const uri = `file:///${filename}`;
|
|
105
49
|
return {
|
|
106
50
|
type: 'resource',
|
|
107
51
|
resource: {
|
|
108
52
|
uri,
|
|
109
|
-
mimeType,
|
|
53
|
+
mimeType: 'text/markdown',
|
|
110
54
|
text: content,
|
|
111
55
|
},
|
|
112
56
|
};
|
|
113
57
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
text: serializeStructuredContent(structuredContent, fromCache),
|
|
118
|
-
};
|
|
119
|
-
const blocks = [textBlock];
|
|
120
|
-
// Embed full content in stdio mode; HTTP mode relies on inline content or links.
|
|
121
|
-
const mimeType = format === 'markdown' ? 'text/markdown' : 'application/jsonl';
|
|
122
|
-
const contentToEmbed = config.runtime.httpMode
|
|
123
|
-
? inlineResult.content
|
|
124
|
-
: (fullContent ?? inlineResult.content);
|
|
125
|
-
if (typeof contentToEmbed === 'string' && url) {
|
|
126
|
-
const embeddedResource = buildEmbeddedResource(contentToEmbed, mimeType, url, title);
|
|
127
|
-
if (embeddedResource) {
|
|
128
|
-
blocks.push(embeddedResource);
|
|
129
|
-
}
|
|
58
|
+
function resolveContentToEmbed(inlineResult, fullContent, useInlineInHttpMode) {
|
|
59
|
+
if (useInlineInHttpMode) {
|
|
60
|
+
return inlineResult.content;
|
|
130
61
|
}
|
|
131
|
-
|
|
62
|
+
return fullContent ?? inlineResult.content;
|
|
63
|
+
}
|
|
64
|
+
function maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title) {
|
|
65
|
+
if (typeof contentToEmbed !== 'string')
|
|
66
|
+
return;
|
|
67
|
+
if (!url)
|
|
68
|
+
return;
|
|
69
|
+
const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
|
|
70
|
+
if (embeddedResource) {
|
|
71
|
+
blocks.push(embeddedResource);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function maybeAppendResourceLink(blocks, inlineResult, resourceName) {
|
|
132
75
|
const resourceLink = buildResourceLink(inlineResult, resourceName);
|
|
133
76
|
if (resourceLink) {
|
|
134
77
|
blocks.push(resourceLink);
|
|
135
78
|
}
|
|
79
|
+
}
|
|
80
|
+
function buildTextBlock(structuredContent, fromCache) {
|
|
81
|
+
return {
|
|
82
|
+
type: 'text',
|
|
83
|
+
text: serializeStructuredContent(structuredContent, fromCache),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
|
|
87
|
+
const blocks = [
|
|
88
|
+
buildTextBlock(structuredContent, fromCache),
|
|
89
|
+
];
|
|
90
|
+
const contentToEmbed = resolveContentToEmbed(inlineResult, fullContent, config.runtime.httpMode);
|
|
91
|
+
maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title);
|
|
92
|
+
maybeAppendResourceLink(blocks, inlineResult, resourceName);
|
|
136
93
|
return blocks;
|
|
137
94
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import type { FetchUrlInput, ToolResponseBase } from '../../config/types/tools.js';
|
|
2
2
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
3
|
-
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to
|
|
3
|
+
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
|
|
4
4
|
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<ToolResponseBase>;
|
|
@@ -1,152 +1,75 @@
|
|
|
1
|
-
import { config } from '../../config/index.js';
|
|
2
1
|
import { logDebug, logError } from '../../services/logger.js';
|
|
3
2
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
3
|
+
import { parseCachedMarkdownResult } from '../utils/cached-markdown.js';
|
|
4
|
+
import { transformHtmlToMarkdown } from '../utils/content-transform.js';
|
|
5
|
+
import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
|
|
6
6
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
7
|
-
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to
|
|
8
|
-
function
|
|
9
|
-
return
|
|
7
|
+
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
|
|
8
|
+
function deserializeMarkdownResult(cached) {
|
|
9
|
+
return parseCachedMarkdownResult(cached);
|
|
10
10
|
}
|
|
11
|
-
function
|
|
12
|
-
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
if (typeof content !== 'string')
|
|
18
|
-
return undefined;
|
|
19
|
-
if (typeof contentBlocks !== 'number' || !Number.isFinite(contentBlocks)) {
|
|
20
|
-
return undefined;
|
|
21
|
-
}
|
|
22
|
-
if (title !== undefined && typeof title !== 'string')
|
|
23
|
-
return undefined;
|
|
24
|
-
if (truncated !== undefined && typeof truncated !== 'boolean') {
|
|
25
|
-
return undefined;
|
|
26
|
-
}
|
|
27
|
-
const resolvedTitle = typeof title === 'string' ? title : undefined;
|
|
28
|
-
return {
|
|
29
|
-
content,
|
|
30
|
-
contentBlocks,
|
|
31
|
-
title: resolvedTitle,
|
|
32
|
-
...(truncated !== undefined ? { truncated } : {}),
|
|
33
|
-
};
|
|
34
|
-
}
|
|
35
|
-
catch {
|
|
36
|
-
return undefined;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
function resolveFetchUrlOptions(input) {
|
|
40
|
-
const format = input.format ?? 'jsonl';
|
|
41
|
-
return {
|
|
42
|
-
extractMainContent: input.extractMainContent ?? config.extraction.extractMainContent,
|
|
43
|
-
includeMetadata: input.includeMetadata ?? config.extraction.includeMetadata,
|
|
44
|
-
format,
|
|
45
|
-
includeContentBlocks: input.includeContentBlocks ?? (format === 'markdown' ? false : true),
|
|
46
|
-
...(input.maxContentLength !== undefined && {
|
|
47
|
-
maxContentLength: input.maxContentLength,
|
|
48
|
-
}),
|
|
11
|
+
function buildMarkdownTransform() {
|
|
12
|
+
return (html, url) => {
|
|
13
|
+
const result = transformHtmlToMarkdown(html, url, {
|
|
14
|
+
includeMetadata: true,
|
|
15
|
+
});
|
|
16
|
+
return { ...result, content: result.markdown };
|
|
49
17
|
};
|
|
50
18
|
}
|
|
51
|
-
function
|
|
52
|
-
return {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
function buildFetchUrlTransform(options) {
|
|
60
|
-
return async (html, url) => options.format === 'markdown'
|
|
61
|
-
? transformHtmlToMarkdownWithBlocksAsync(html, url, {
|
|
62
|
-
extractMainContent: options.extractMainContent,
|
|
63
|
-
includeMetadata: options.includeMetadata,
|
|
64
|
-
...(options.maxContentLength !== undefined && {
|
|
65
|
-
maxContentLength: options.maxContentLength,
|
|
66
|
-
}),
|
|
67
|
-
includeContentBlocks: options.includeContentBlocks,
|
|
68
|
-
})
|
|
69
|
-
: transformHtmlToJsonlAsync(html, url, options);
|
|
19
|
+
function serializeMarkdownResult(result) {
|
|
20
|
+
return JSON.stringify({
|
|
21
|
+
markdown: result.markdown,
|
|
22
|
+
title: result.title,
|
|
23
|
+
truncated: result.truncated,
|
|
24
|
+
});
|
|
70
25
|
}
|
|
71
|
-
function
|
|
72
|
-
|
|
26
|
+
function buildStructuredContent(pipeline, inlineResult) {
|
|
27
|
+
return {
|
|
73
28
|
url: pipeline.url,
|
|
74
29
|
title: pipeline.data.title,
|
|
75
|
-
|
|
76
|
-
fetchedAt: pipeline.fetchedAt,
|
|
77
|
-
format,
|
|
78
|
-
contentSize: inlineResult.contentSize,
|
|
79
|
-
cached: pipeline.fromCache,
|
|
30
|
+
markdown: inlineResult.content,
|
|
80
31
|
};
|
|
81
|
-
if (pipeline.data.truncated) {
|
|
82
|
-
structuredContent.truncated = true;
|
|
83
|
-
}
|
|
84
|
-
if (inlineResult.truncated) {
|
|
85
|
-
structuredContent.truncated = true;
|
|
86
|
-
}
|
|
87
|
-
applyInlineResultToStructuredContent(structuredContent, inlineResult, 'content');
|
|
88
|
-
return structuredContent;
|
|
89
32
|
}
|
|
90
|
-
function
|
|
91
|
-
|
|
92
|
-
url,
|
|
93
|
-
extractMainContent: options.extractMainContent,
|
|
94
|
-
includeMetadata: options.includeMetadata,
|
|
95
|
-
format: options.format,
|
|
96
|
-
includeContentBlocks: options.includeContentBlocks,
|
|
97
|
-
});
|
|
33
|
+
function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
|
|
34
|
+
return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
|
|
98
35
|
}
|
|
99
|
-
|
|
100
|
-
|
|
36
|
+
function logFetchStart(url) {
|
|
37
|
+
logDebug('Fetching URL', { url });
|
|
38
|
+
}
|
|
39
|
+
async function fetchPipeline(url) {
|
|
40
|
+
return performSharedFetch({
|
|
101
41
|
url,
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
...(options.maxContentLength !== undefined && {
|
|
107
|
-
maxContentLength: options.maxContentLength,
|
|
108
|
-
}),
|
|
109
|
-
...(input.customHeaders !== undefined && {
|
|
110
|
-
customHeaders: input.customHeaders,
|
|
111
|
-
}),
|
|
112
|
-
...(input.retries !== undefined && { retries: input.retries }),
|
|
113
|
-
...(input.timeout !== undefined && { timeout: input.timeout }),
|
|
114
|
-
...(options.format === 'markdown' && {
|
|
115
|
-
cacheVariant: 'markdown-with-blocks',
|
|
116
|
-
}),
|
|
117
|
-
transform: buildFetchUrlTransform(options),
|
|
118
|
-
deserialize: deserializeJsonlTransformResult,
|
|
119
|
-
};
|
|
120
|
-
return performSharedFetch(sharedOptions);
|
|
42
|
+
transform: buildMarkdownTransform(),
|
|
43
|
+
serialize: serializeMarkdownResult,
|
|
44
|
+
deserialize: deserializeMarkdownResult,
|
|
45
|
+
});
|
|
121
46
|
}
|
|
122
|
-
function
|
|
123
|
-
const structuredContent =
|
|
47
|
+
function buildResponse(pipeline, inlineResult) {
|
|
48
|
+
const structuredContent = buildStructuredContent(pipeline, inlineResult);
|
|
49
|
+
const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
|
|
124
50
|
return {
|
|
125
|
-
content
|
|
51
|
+
content,
|
|
126
52
|
structuredContent,
|
|
127
53
|
};
|
|
128
54
|
}
|
|
129
55
|
export async function fetchUrlToolHandler(input) {
|
|
130
56
|
try {
|
|
131
|
-
return await
|
|
57
|
+
return await executeFetch(input);
|
|
132
58
|
}
|
|
133
59
|
catch (error) {
|
|
134
60
|
logError('fetch-url tool error', error instanceof Error ? error : undefined);
|
|
135
|
-
|
|
136
|
-
return handleToolError(error, input.url, 'Failed to fetch URL', errorDetails);
|
|
61
|
+
return handleToolError(error, input.url, 'Failed to fetch URL');
|
|
137
62
|
}
|
|
138
63
|
}
|
|
139
|
-
async function
|
|
64
|
+
async function executeFetch(input) {
|
|
140
65
|
const { url } = input;
|
|
141
|
-
const format = input.format ?? 'jsonl';
|
|
142
66
|
if (!url) {
|
|
143
|
-
return createToolErrorResponse('URL is required', ''
|
|
67
|
+
return createToolErrorResponse('URL is required', '');
|
|
68
|
+
}
|
|
69
|
+
logFetchStart(url);
|
|
70
|
+
const { pipeline, inlineResult } = await fetchPipeline(url);
|
|
71
|
+
if (inlineResult.error) {
|
|
72
|
+
return createToolErrorResponse(inlineResult.error, url);
|
|
144
73
|
}
|
|
145
|
-
|
|
146
|
-
logFetchUrlStart(url, options);
|
|
147
|
-
const { pipeline, inlineResult } = await fetchUrlPipeline(url, input, options);
|
|
148
|
-
const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchUrlErrorDetails(options.format));
|
|
149
|
-
if (inlineError)
|
|
150
|
-
return inlineError;
|
|
151
|
-
return buildFetchUrlResponse(pipeline, inlineResult, options.format);
|
|
74
|
+
return buildResponse(pipeline, inlineResult);
|
|
152
75
|
}
|
package/dist/tools/index.js
CHANGED
|
@@ -1,44 +1,25 @@
|
|
|
1
|
-
import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
|
|
2
1
|
import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
|
|
3
|
-
import {
|
|
4
|
-
const
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
openWorldHint: true,
|
|
17
|
-
},
|
|
2
|
+
import { fetchUrlInputSchema, fetchUrlOutputSchema } from './schemas.js';
|
|
3
|
+
const TOOL_DEFINITION = {
|
|
4
|
+
name: FETCH_URL_TOOL_NAME,
|
|
5
|
+
title: 'Fetch URL',
|
|
6
|
+
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
7
|
+
inputSchema: fetchUrlInputSchema,
|
|
8
|
+
outputSchema: fetchUrlOutputSchema,
|
|
9
|
+
handler: fetchUrlToolHandler,
|
|
10
|
+
annotations: {
|
|
11
|
+
readOnlyHint: true,
|
|
12
|
+
destructiveHint: false,
|
|
13
|
+
idempotentHint: true,
|
|
14
|
+
openWorldHint: true,
|
|
18
15
|
},
|
|
19
|
-
|
|
20
|
-
name: FETCH_MARKDOWN_TOOL_NAME,
|
|
21
|
-
title: 'Fetch Markdown',
|
|
22
|
-
description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
|
|
23
|
-
inputSchema: fetchMarkdownInputSchema,
|
|
24
|
-
outputSchema: fetchMarkdownOutputSchema,
|
|
25
|
-
handler: fetchMarkdownToolHandler,
|
|
26
|
-
annotations: {
|
|
27
|
-
readOnlyHint: true,
|
|
28
|
-
destructiveHint: false,
|
|
29
|
-
idempotentHint: true,
|
|
30
|
-
openWorldHint: true,
|
|
31
|
-
},
|
|
32
|
-
},
|
|
33
|
-
];
|
|
16
|
+
};
|
|
34
17
|
export function registerTools(server) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}, tool.handler);
|
|
43
|
-
}
|
|
18
|
+
server.registerTool(TOOL_DEFINITION.name, {
|
|
19
|
+
title: TOOL_DEFINITION.title,
|
|
20
|
+
description: TOOL_DEFINITION.description,
|
|
21
|
+
inputSchema: TOOL_DEFINITION.inputSchema,
|
|
22
|
+
outputSchema: TOOL_DEFINITION.outputSchema,
|
|
23
|
+
annotations: TOOL_DEFINITION.annotations,
|
|
24
|
+
}, TOOL_DEFINITION.handler);
|
|
44
25
|
}
|
package/dist/tools/schemas.d.ts
CHANGED
|
@@ -1,60 +1,10 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
export declare const fetchUrlInputSchema: z.ZodObject<{
|
|
3
|
-
format: z.ZodDefault<z.ZodEnum<{
|
|
4
|
-
jsonl: "jsonl";
|
|
5
|
-
markdown: "markdown";
|
|
6
|
-
}>>;
|
|
7
|
-
includeContentBlocks: z.ZodOptional<z.ZodBoolean>;
|
|
8
|
-
extractMainContent: z.ZodDefault<z.ZodBoolean>;
|
|
9
|
-
includeMetadata: z.ZodDefault<z.ZodBoolean>;
|
|
10
|
-
maxContentLength: z.ZodOptional<z.ZodNumber>;
|
|
11
3
|
url: z.ZodURL;
|
|
12
|
-
customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
13
|
-
timeout: z.ZodDefault<z.ZodNumber>;
|
|
14
|
-
retries: z.ZodDefault<z.ZodNumber>;
|
|
15
|
-
}, z.core.$strict>;
|
|
16
|
-
export declare const fetchMarkdownInputSchema: z.ZodObject<{
|
|
17
|
-
extractMainContent: z.ZodDefault<z.ZodBoolean>;
|
|
18
|
-
includeMetadata: z.ZodDefault<z.ZodBoolean>;
|
|
19
|
-
maxContentLength: z.ZodOptional<z.ZodNumber>;
|
|
20
|
-
url: z.ZodURL;
|
|
21
|
-
customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
22
|
-
timeout: z.ZodDefault<z.ZodNumber>;
|
|
23
|
-
retries: z.ZodDefault<z.ZodNumber>;
|
|
24
4
|
}, z.core.$strict>;
|
|
25
5
|
export declare const fetchUrlOutputSchema: z.ZodObject<{
|
|
26
|
-
contentSize: z.ZodOptional<z.ZodNumber>;
|
|
27
|
-
resourceUri: z.ZodOptional<z.ZodString>;
|
|
28
|
-
resourceMimeType: z.ZodOptional<z.ZodString>;
|
|
29
|
-
cached: z.ZodBoolean;
|
|
30
|
-
truncated: z.ZodOptional<z.ZodBoolean>;
|
|
31
|
-
error: z.ZodOptional<z.ZodString>;
|
|
32
|
-
errorCode: z.ZodOptional<z.ZodString>;
|
|
33
|
-
url: z.ZodString;
|
|
34
|
-
title: z.ZodOptional<z.ZodString>;
|
|
35
|
-
contentBlocks: z.ZodNumber;
|
|
36
|
-
fetchedAt: z.ZodString;
|
|
37
|
-
format: z.ZodEnum<{
|
|
38
|
-
jsonl: "jsonl";
|
|
39
|
-
markdown: "markdown";
|
|
40
|
-
}>;
|
|
41
|
-
content: z.ZodOptional<z.ZodString>;
|
|
42
|
-
}, z.core.$strict>;
|
|
43
|
-
export declare const fetchMarkdownOutputSchema: z.ZodObject<{
|
|
44
|
-
contentSize: z.ZodOptional<z.ZodNumber>;
|
|
45
|
-
resourceUri: z.ZodOptional<z.ZodString>;
|
|
46
|
-
resourceMimeType: z.ZodOptional<z.ZodString>;
|
|
47
|
-
cached: z.ZodBoolean;
|
|
48
|
-
truncated: z.ZodOptional<z.ZodBoolean>;
|
|
49
|
-
error: z.ZodOptional<z.ZodString>;
|
|
50
|
-
errorCode: z.ZodOptional<z.ZodString>;
|
|
51
6
|
url: z.ZodString;
|
|
52
7
|
title: z.ZodOptional<z.ZodString>;
|
|
53
|
-
fetchedAt: z.ZodString;
|
|
54
8
|
markdown: z.ZodOptional<z.ZodString>;
|
|
55
|
-
|
|
56
|
-
downloadUrl: z.ZodString;
|
|
57
|
-
fileName: z.ZodString;
|
|
58
|
-
expiresAt: z.ZodString;
|
|
59
|
-
}, z.core.$strip>>;
|
|
9
|
+
error: z.ZodOptional<z.ZodString>;
|
|
60
10
|
}, z.core.$strict>;
|
package/dist/tools/schemas.js
CHANGED
|
@@ -1,119 +1,13 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { config } from '../config/index.js';
|
|
3
|
-
const MAX_HEADER_NAME_LENGTH = 128;
|
|
4
|
-
const MAX_HEADER_VALUE_LENGTH = 2048;
|
|
5
|
-
const MAX_HEADER_COUNT = 50;
|
|
6
|
-
const MAX_CONTENT_LENGTH = config.constants.maxContentSize;
|
|
7
|
-
const customHeadersSchema = z
|
|
8
|
-
.record(z.string().max(MAX_HEADER_NAME_LENGTH), z.string().max(MAX_HEADER_VALUE_LENGTH))
|
|
9
|
-
.refine((headers) => Object.keys(headers).length <= MAX_HEADER_COUNT, {
|
|
10
|
-
error: `customHeaders must have at most ${MAX_HEADER_COUNT} entries`,
|
|
11
|
-
});
|
|
12
|
-
const requestOptionsSchema = z.object({
|
|
13
|
-
customHeaders: customHeadersSchema
|
|
14
|
-
.optional()
|
|
15
|
-
.describe('Custom HTTP headers for the request'),
|
|
16
|
-
timeout: z
|
|
17
|
-
.number()
|
|
18
|
-
.min(1000)
|
|
19
|
-
.max(120000)
|
|
20
|
-
.default(config.fetcher.timeout)
|
|
21
|
-
.describe('Request timeout in milliseconds (1000-120000)'),
|
|
22
|
-
retries: z
|
|
23
|
-
.number()
|
|
24
|
-
.min(1)
|
|
25
|
-
.max(10)
|
|
26
|
-
.default(3)
|
|
27
|
-
.describe('Number of retry attempts (1-10)'),
|
|
28
|
-
});
|
|
29
|
-
const extractionOptionsSchema = z.object({
|
|
30
|
-
extractMainContent: z
|
|
31
|
-
.boolean()
|
|
32
|
-
.default(true)
|
|
33
|
-
.describe('Use Readability to extract main article content'),
|
|
34
|
-
includeMetadata: z
|
|
35
|
-
.boolean()
|
|
36
|
-
.default(true)
|
|
37
|
-
.describe('Include page metadata (title, description, etc.)'),
|
|
38
|
-
maxContentLength: z
|
|
39
|
-
.number()
|
|
40
|
-
.positive()
|
|
41
|
-
.max(MAX_CONTENT_LENGTH)
|
|
42
|
-
.optional()
|
|
43
|
-
.describe('Maximum content length in characters'),
|
|
44
|
-
});
|
|
45
|
-
const formatOptionsSchema = z.object({
|
|
46
|
-
format: z
|
|
47
|
-
.enum(['jsonl', 'markdown'])
|
|
48
|
-
.default('jsonl')
|
|
49
|
-
.describe('Output format'),
|
|
50
|
-
includeContentBlocks: z
|
|
51
|
-
.boolean()
|
|
52
|
-
.optional()
|
|
53
|
-
.describe('Include content block counts when format=markdown'),
|
|
54
|
-
});
|
|
55
|
-
const resourceFieldsSchema = z.object({
|
|
56
|
-
contentSize: z.number().optional().describe('Content length in characters'),
|
|
57
|
-
resourceUri: z
|
|
58
|
-
.string()
|
|
59
|
-
.optional()
|
|
60
|
-
.describe('Resource URI when content is too large to inline'),
|
|
61
|
-
resourceMimeType: z
|
|
62
|
-
.string()
|
|
63
|
-
.optional()
|
|
64
|
-
.describe('MIME type for the resource URI'),
|
|
65
|
-
cached: z.boolean().describe('Whether the result was served from cache'),
|
|
66
|
-
truncated: z
|
|
67
|
-
.boolean()
|
|
68
|
-
.optional()
|
|
69
|
-
.describe('Whether content was truncated by maxContentLength'),
|
|
70
|
-
error: z.string().optional().describe('Error message if the request failed'),
|
|
71
|
-
errorCode: z.string().optional().describe('Error code if the request failed'),
|
|
72
|
-
});
|
|
73
|
-
const fileDownloadSchema = z.object({
|
|
74
|
-
downloadUrl: z.string().describe('Relative URL to download the .md file'),
|
|
75
|
-
fileName: z.string().describe('Suggested filename for download'),
|
|
76
|
-
expiresAt: z.string().describe('ISO timestamp when download expires'),
|
|
77
|
-
});
|
|
78
2
|
export const fetchUrlInputSchema = z.strictObject({
|
|
79
|
-
|
|
80
|
-
url: z.url({ protocol: /^https?:$/i }).describe('The URL to fetch'),
|
|
81
|
-
...extractionOptionsSchema.shape,
|
|
82
|
-
...formatOptionsSchema.shape,
|
|
83
|
-
});
|
|
84
|
-
export const fetchMarkdownInputSchema = z.strictObject({
|
|
85
|
-
...requestOptionsSchema.shape,
|
|
86
|
-
url: z.url({ protocol: /^https?:$/i }).describe('The URL to fetch'),
|
|
87
|
-
...extractionOptionsSchema.shape,
|
|
3
|
+
url: z.url({ protocol: /^https?$/i }).describe('The URL to fetch'),
|
|
88
4
|
});
|
|
89
5
|
export const fetchUrlOutputSchema = z.strictObject({
|
|
90
6
|
url: z.string().describe('The fetched URL'),
|
|
91
7
|
title: z.string().optional().describe('Page title'),
|
|
92
|
-
contentBlocks: z
|
|
93
|
-
.number()
|
|
94
|
-
.describe('Number of content blocks extracted (JSONL only)'),
|
|
95
|
-
fetchedAt: z
|
|
96
|
-
.string()
|
|
97
|
-
.describe('ISO timestamp of when the content was fetched'),
|
|
98
|
-
format: z.enum(['jsonl', 'markdown']).describe('Output format used'),
|
|
99
|
-
content: z
|
|
100
|
-
.string()
|
|
101
|
-
.optional()
|
|
102
|
-
.describe('The extracted content in JSONL or Markdown format'),
|
|
103
|
-
...resourceFieldsSchema.shape,
|
|
104
|
-
});
|
|
105
|
-
export const fetchMarkdownOutputSchema = z.strictObject({
|
|
106
|
-
url: z.string().describe('The fetched URL'),
|
|
107
|
-
title: z.string().optional().describe('Page title'),
|
|
108
|
-
fetchedAt: z
|
|
109
|
-
.string()
|
|
110
|
-
.describe('ISO timestamp of when the content was fetched'),
|
|
111
8
|
markdown: z
|
|
112
9
|
.string()
|
|
113
10
|
.optional()
|
|
114
11
|
.describe('The extracted content in Markdown format'),
|
|
115
|
-
|
|
116
|
-
.optional()
|
|
117
|
-
.describe('Download information when content is cached'),
|
|
118
|
-
...resourceFieldsSchema.shape,
|
|
12
|
+
error: z.string().optional().describe('Error message if the request failed'),
|
|
119
13
|
});
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { MarkdownTransformResult } from '../../config/types/content.js';
|
|
2
|
+
export type CachedMarkdownResult = MarkdownTransformResult & {
|
|
3
|
+
readonly content: string;
|
|
4
|
+
};
|
|
5
|
+
export declare function parseCachedMarkdownResult(cached: string): CachedMarkdownResult | undefined;
|