@j0hanz/superfetch 1.2.5 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -156
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +35 -64
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +254 -23
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/host-allowlist.d.ts +3 -0
- package/dist/http/host-allowlist.js +117 -0
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +8 -3
- package/dist/http/mcp-routes.js +137 -31
- package/dist/http/mcp-session-eviction.d.ts +3 -0
- package/dist/http/mcp-session-eviction.js +24 -0
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-init.d.ts +7 -0
- package/dist/http/mcp-session-init.js +94 -0
- package/dist/http/mcp-session-slots.d.ts +17 -0
- package/dist/http/mcp-session-slots.js +55 -0
- package/dist/http/mcp-session-transport-init.d.ts +7 -0
- package/dist/http/mcp-session-transport-init.js +41 -0
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session-types.d.ts +5 -0
- package/dist/http/mcp-session-types.js +1 -0
- package/dist/http/mcp-session.d.ts +9 -9
- package/dist/http/mcp-session.js +15 -137
- package/dist/http/mcp-sessions.d.ts +43 -0
- package/dist/http/mcp-sessions.js +392 -0
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +7 -4
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +7 -9
- package/dist/http/server-middleware.js +9 -70
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.d.ts +10 -0
- package/dist/http/server.js +546 -61
- package/dist/http/session-cleanup.js +8 -5
- package/dist/middleware/error-handler.d.ts +1 -1
- package/dist/middleware/error-handler.js +32 -33
- package/dist/resources/cached-content-params.d.ts +5 -0
- package/dist/resources/cached-content-params.js +36 -0
- package/dist/resources/cached-content.js +67 -125
- package/dist/resources/index.js +0 -82
- package/dist/server.js +50 -29
- package/dist/services/cache-events.d.ts +8 -0
- package/dist/services/cache-events.js +19 -0
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +4 -9
- package/dist/services/cache.js +77 -139
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +55 -116
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +35 -96
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +18 -32
- package/dist/services/fetcher/redirects.js +16 -7
- package/dist/services/fetcher/response.js +79 -34
- package/dist/services/fetcher.d.ts +22 -3
- package/dist/services/fetcher.js +544 -44
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +11 -22
- package/dist/tools/handlers/fetch-single.shared.js +175 -89
- package/dist/tools/handlers/fetch-url.tool.d.ts +7 -1
- package/dist/tools/handlers/fetch-url.tool.js +84 -119
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +1 -107
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +67 -0
- package/dist/tools/utils/content-transform.d.ts +5 -17
- package/dist/tools/utils/content-transform.js +134 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +57 -63
- package/dist/tools/utils/frontmatter.d.ts +3 -0
- package/dist/tools/utils/frontmatter.js +73 -0
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
- package/dist/tools/utils/markdown-heuristics.js +19 -0
- package/dist/tools/utils/markdown-signals.d.ts +1 -0
- package/dist/tools/utils/markdown-signals.js +19 -0
- package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
- package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
- package/dist/tools/utils/raw-markdown.d.ts +6 -0
- package/dist/tools/utils/raw-markdown.js +135 -0
- package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
- package/dist/transformers/markdown/fenced-code-rule.js +38 -0
- package/dist/transformers/markdown/frontmatter.d.ts +2 -0
- package/dist/transformers/markdown/frontmatter.js +45 -0
- package/dist/transformers/markdown/noise-rule.d.ts +2 -0
- package/dist/transformers/markdown/noise-rule.js +80 -0
- package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
- package/dist/transformers/markdown/turndown-instance.js +19 -0
- package/dist/transformers/markdown.d.ts +2 -0
- package/dist/transformers/markdown.js +185 -0
- package/dist/transformers/markdown.transformer.js +5 -117
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/code-language-bash.d.ts +1 -0
- package/dist/utils/code-language-bash.js +48 -0
- package/dist/utils/code-language-core.d.ts +2 -0
- package/dist/utils/code-language-core.js +13 -0
- package/dist/utils/code-language-detectors.d.ts +5 -0
- package/dist/utils/code-language-detectors.js +142 -0
- package/dist/utils/code-language-helpers.d.ts +5 -0
- package/dist/utils/code-language-helpers.js +62 -0
- package/dist/utils/code-language-parsing.d.ts +5 -0
- package/dist/utils/code-language-parsing.js +62 -0
- package/dist/utils/code-language.d.ts +9 -0
- package/dist/utils/code-language.js +250 -46
- package/dist/utils/error-details.d.ts +3 -0
- package/dist/utils/error-details.js +12 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +34 -12
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/ip-address.d.ts +4 -0
- package/dist/utils/ip-address.js +6 -0
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +14 -46
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +53 -114
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +17 -18
|
@@ -1,21 +1,8 @@
|
|
|
1
|
-
import type { PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
|
|
2
|
-
import type { FileDownloadInfo, ToolResponseBase } from '../../config/types/tools.js';
|
|
3
|
-
import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
|
|
4
|
-
import { applyInlineContentLimit } from '../utils/inline-content.js';
|
|
5
|
-
type SharedFetchFormat = 'jsonl' | 'markdown';
|
|
1
|
+
import type { FetchPipelineOptions, PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
|
|
6
2
|
interface SharedFetchOptions<T extends {
|
|
7
3
|
content: string;
|
|
8
4
|
}> {
|
|
9
5
|
readonly url: string;
|
|
10
|
-
readonly format: SharedFetchFormat;
|
|
11
|
-
readonly extractMainContent: boolean;
|
|
12
|
-
readonly includeMetadata: boolean;
|
|
13
|
-
readonly maxContentLength?: number;
|
|
14
|
-
readonly includeContentBlocks?: boolean;
|
|
15
|
-
readonly cacheVariant?: string;
|
|
16
|
-
readonly customHeaders?: Record<string, string>;
|
|
17
|
-
readonly retries?: number;
|
|
18
|
-
readonly timeout?: number;
|
|
19
6
|
readonly transform: (html: string, normalizedUrl: string) => T | Promise<T>;
|
|
20
7
|
readonly serialize?: (result: T) => string;
|
|
21
8
|
readonly deserialize?: (cached: string) => T | undefined;
|
|
@@ -30,13 +17,15 @@ export declare function performSharedFetch<T extends {
|
|
|
30
17
|
inlineResult: ReturnType<typeof applyInlineContentLimit>;
|
|
31
18
|
}>;
|
|
32
19
|
export type InlineResult = ReturnType<typeof applyInlineContentLimit>;
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
20
|
+
export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, url?: string, title?: string): ToolContentBlock[];
|
|
21
|
+
interface InlineContentResult {
|
|
22
|
+
content?: string;
|
|
23
|
+
contentSize: number;
|
|
24
|
+
resourceUri?: string;
|
|
25
|
+
resourceMimeType?: string;
|
|
26
|
+
error?: string;
|
|
27
|
+
truncated?: boolean;
|
|
37
28
|
}
|
|
38
|
-
|
|
39
|
-
export declare function
|
|
40
|
-
export declare function applyInlineResultToStructuredContent(structuredContent: Record<string, unknown>, inlineResult: InlineResult, contentKey: string): void;
|
|
41
|
-
export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, format?: SharedFetchFormat, url?: string, title?: string): ToolContentBlock[];
|
|
29
|
+
declare function applyInlineContentLimit(content: string, cacheKey: string | null): InlineContentResult;
|
|
30
|
+
export declare function executeFetchPipeline<T>(options: FetchPipelineOptions<T>): Promise<PipelineResult<T>>;
|
|
42
31
|
export {};
|
|
@@ -1,79 +1,32 @@
|
|
|
1
|
+
import { TRUNCATION_MARKER } from '../../config/formatting.js';
|
|
1
2
|
import { config } from '../../config/index.js';
|
|
2
|
-
import
|
|
3
|
+
import * as cache from '../../services/cache.js';
|
|
4
|
+
import { createCacheKey, toResourceUri } from '../../services/cache-keys.js';
|
|
5
|
+
import { fetchNormalizedUrl } from '../../services/fetcher.js';
|
|
6
|
+
import { logDebug } from '../../services/logger.js';
|
|
3
7
|
import { generateSafeFilename } from '../../utils/filename-generator.js';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
export async function performSharedFetch(options, deps = {}) {
|
|
9
|
-
const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
|
|
10
|
-
const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
|
|
11
|
-
const cacheVary = appendHeaderVary({
|
|
12
|
-
format: options.format,
|
|
13
|
-
extractMainContent: options.extractMainContent,
|
|
14
|
-
includeMetadata: options.includeMetadata,
|
|
15
|
-
maxContentLength: options.maxContentLength,
|
|
16
|
-
...(options.cacheVariant ? { variant: options.cacheVariant } : {}),
|
|
17
|
-
...(options.format === 'markdown'
|
|
18
|
-
? { includeContentBlocks: options.includeContentBlocks }
|
|
19
|
-
: { contentBlocks: true }),
|
|
20
|
-
}, options.customHeaders);
|
|
21
|
-
const pipelineOptions = {
|
|
22
|
-
url: options.url,
|
|
23
|
-
cacheNamespace,
|
|
24
|
-
transform: options.transform,
|
|
25
|
-
};
|
|
26
|
-
if (options.customHeaders !== undefined) {
|
|
27
|
-
pipelineOptions.customHeaders = options.customHeaders;
|
|
28
|
-
}
|
|
29
|
-
if (options.retries !== undefined) {
|
|
30
|
-
pipelineOptions.retries = options.retries;
|
|
31
|
-
}
|
|
32
|
-
if (options.timeout !== undefined) {
|
|
33
|
-
pipelineOptions.timeout = options.timeout;
|
|
34
|
-
}
|
|
35
|
-
if (cacheVary !== undefined) {
|
|
36
|
-
pipelineOptions.cacheVary = cacheVary;
|
|
37
|
-
}
|
|
8
|
+
import { isRecord } from '../../utils/guards.js';
|
|
9
|
+
import { transformToRawUrl } from '../../utils/url-transformer.js';
|
|
10
|
+
import { normalizeUrl } from '../../utils/url-validator.js';
|
|
11
|
+
function applyOptionalPipelineSerialization(pipelineOptions, options) {
|
|
38
12
|
if (options.serialize !== undefined) {
|
|
39
13
|
pipelineOptions.serialize = options.serialize;
|
|
40
14
|
}
|
|
41
15
|
if (options.deserialize !== undefined) {
|
|
42
16
|
pipelineOptions.deserialize = options.deserialize;
|
|
43
17
|
}
|
|
44
|
-
const pipeline = await executePipeline(pipelineOptions);
|
|
45
|
-
const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null, options.format);
|
|
46
|
-
return { pipeline, inlineResult };
|
|
47
18
|
}
|
|
48
|
-
export function
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
url:
|
|
19
|
+
export async function performSharedFetch(options, deps = {}) {
|
|
20
|
+
const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
|
|
21
|
+
const pipelineOptions = {
|
|
22
|
+
url: options.url,
|
|
23
|
+
cacheNamespace: 'markdown',
|
|
24
|
+
transform: options.transform,
|
|
52
25
|
};
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
return buildFileDownloadInfo(infoOptions);
|
|
60
|
-
}
|
|
61
|
-
export function getInlineErrorResponse(inlineResult, url, details) {
|
|
62
|
-
if (!inlineResult.error)
|
|
63
|
-
return null;
|
|
64
|
-
return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR', details);
|
|
65
|
-
}
|
|
66
|
-
export function applyInlineResultToStructuredContent(structuredContent, inlineResult, contentKey) {
|
|
67
|
-
if (inlineResult.truncated) {
|
|
68
|
-
structuredContent.truncated = true;
|
|
69
|
-
}
|
|
70
|
-
if (typeof inlineResult.content === 'string') {
|
|
71
|
-
structuredContent[contentKey] = inlineResult.content;
|
|
72
|
-
}
|
|
73
|
-
if (inlineResult.resourceUri) {
|
|
74
|
-
structuredContent.resourceUri = inlineResult.resourceUri;
|
|
75
|
-
structuredContent.resourceMimeType = inlineResult.resourceMimeType;
|
|
76
|
-
}
|
|
26
|
+
applyOptionalPipelineSerialization(pipelineOptions, options);
|
|
27
|
+
const pipeline = await executePipeline(pipelineOptions);
|
|
28
|
+
const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
|
|
29
|
+
return { pipeline, inlineResult };
|
|
77
30
|
}
|
|
78
31
|
function serializeStructuredContent(structuredContent, fromCache) {
|
|
79
32
|
return JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
|
|
@@ -93,45 +46,178 @@ function buildResourceLink(inlineResult, name) {
|
|
|
93
46
|
}
|
|
94
47
|
return block;
|
|
95
48
|
}
|
|
96
|
-
function buildEmbeddedResource(content,
|
|
49
|
+
function buildEmbeddedResource(content, url, title) {
|
|
97
50
|
if (!content) {
|
|
98
51
|
return null;
|
|
99
52
|
}
|
|
100
|
-
|
|
101
|
-
const extension = mimeType === 'text/markdown' ? '.md' : '.jsonl';
|
|
102
|
-
const filename = generateSafeFilename(url, title, undefined, extension);
|
|
103
|
-
// Use file: URI scheme with filename for better VS Code integration
|
|
53
|
+
const filename = generateSafeFilename(url, title, undefined, '.md');
|
|
104
54
|
const uri = `file:///${filename}`;
|
|
105
55
|
return {
|
|
106
56
|
type: 'resource',
|
|
107
57
|
resource: {
|
|
108
58
|
uri,
|
|
109
|
-
mimeType,
|
|
59
|
+
mimeType: 'text/markdown',
|
|
110
60
|
text: content,
|
|
111
61
|
},
|
|
112
62
|
};
|
|
113
63
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
blocks.push(embeddedResource);
|
|
129
|
-
}
|
|
64
|
+
function resolveContentToEmbed(inlineResult, fullContent, useInlineInHttpMode) {
|
|
65
|
+
if (useInlineInHttpMode) {
|
|
66
|
+
return inlineResult.content;
|
|
67
|
+
}
|
|
68
|
+
return fullContent ?? inlineResult.content;
|
|
69
|
+
}
|
|
70
|
+
function maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title) {
|
|
71
|
+
if (typeof contentToEmbed !== 'string')
|
|
72
|
+
return;
|
|
73
|
+
if (!url)
|
|
74
|
+
return;
|
|
75
|
+
const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
|
|
76
|
+
if (embeddedResource) {
|
|
77
|
+
blocks.push(embeddedResource);
|
|
130
78
|
}
|
|
131
|
-
|
|
79
|
+
}
|
|
80
|
+
function maybeAppendResourceLink(blocks, inlineResult, resourceName) {
|
|
132
81
|
const resourceLink = buildResourceLink(inlineResult, resourceName);
|
|
133
82
|
if (resourceLink) {
|
|
134
83
|
blocks.push(resourceLink);
|
|
135
84
|
}
|
|
85
|
+
}
|
|
86
|
+
function buildTextBlock(structuredContent, fromCache) {
|
|
87
|
+
return {
|
|
88
|
+
type: 'text',
|
|
89
|
+
text: serializeStructuredContent(structuredContent, fromCache),
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
|
|
93
|
+
const blocks = [
|
|
94
|
+
buildTextBlock(structuredContent, fromCache),
|
|
95
|
+
];
|
|
96
|
+
const contentToEmbed = resolveContentToEmbed(inlineResult, fullContent, config.runtime.httpMode);
|
|
97
|
+
maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title);
|
|
98
|
+
maybeAppendResourceLink(blocks, inlineResult, resourceName);
|
|
136
99
|
return blocks;
|
|
137
100
|
}
|
|
101
|
+
function applyInlineContentLimit(content, cacheKey) {
|
|
102
|
+
const contentSize = content.length;
|
|
103
|
+
const inlineLimit = config.constants.maxInlineContentChars;
|
|
104
|
+
if (contentSize <= inlineLimit) {
|
|
105
|
+
return { content, contentSize };
|
|
106
|
+
}
|
|
107
|
+
const resourceUri = resolveResourceUri(cacheKey);
|
|
108
|
+
if (!resourceUri) {
|
|
109
|
+
return buildTruncatedFallback(content, contentSize, inlineLimit);
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
contentSize,
|
|
113
|
+
resourceUri,
|
|
114
|
+
resourceMimeType: 'text/markdown',
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function resolveResourceUri(cacheKey) {
|
|
118
|
+
if (!config.cache.enabled || !cacheKey)
|
|
119
|
+
return null;
|
|
120
|
+
return toResourceUri(cacheKey);
|
|
121
|
+
}
|
|
122
|
+
function buildTruncatedFallback(content, contentSize, inlineLimit) {
|
|
123
|
+
const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
|
|
124
|
+
const truncatedContent = content.length > inlineLimit
|
|
125
|
+
? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
|
|
126
|
+
: content;
|
|
127
|
+
return {
|
|
128
|
+
content: truncatedContent,
|
|
129
|
+
contentSize,
|
|
130
|
+
truncated: true,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
|
|
134
|
+
if (!cacheKey)
|
|
135
|
+
return null;
|
|
136
|
+
const cached = cache.get(cacheKey);
|
|
137
|
+
if (!cached)
|
|
138
|
+
return null;
|
|
139
|
+
if (!deserialize) {
|
|
140
|
+
logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
const data = deserialize(cached.content);
|
|
144
|
+
if (data === undefined) {
|
|
145
|
+
logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
149
|
+
return {
|
|
150
|
+
data,
|
|
151
|
+
fromCache: true,
|
|
152
|
+
url: normalizedUrl,
|
|
153
|
+
fetchedAt: cached.fetchedAt,
|
|
154
|
+
cacheKey,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
function resolveNormalizedUrl(url) {
|
|
158
|
+
const { normalizedUrl: validatedUrl } = normalizeUrl(url);
|
|
159
|
+
const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
|
|
160
|
+
return { normalizedUrl, originalUrl: validatedUrl, transformed };
|
|
161
|
+
}
|
|
162
|
+
export async function executeFetchPipeline(options) {
|
|
163
|
+
const resolvedUrl = resolveNormalizedUrl(options.url);
|
|
164
|
+
logRawUrlTransformation(resolvedUrl);
|
|
165
|
+
const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
|
|
166
|
+
const cachedResult = attemptCacheRetrieval({
|
|
167
|
+
cacheKey,
|
|
168
|
+
deserialize: options.deserialize,
|
|
169
|
+
cacheNamespace: options.cacheNamespace,
|
|
170
|
+
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
171
|
+
});
|
|
172
|
+
if (cachedResult)
|
|
173
|
+
return cachedResult;
|
|
174
|
+
logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
|
|
175
|
+
const fetchOptions = options.signal === undefined ? {} : { signal: options.signal };
|
|
176
|
+
const html = await fetchNormalizedUrl(resolvedUrl.normalizedUrl, fetchOptions);
|
|
177
|
+
const data = await options.transform(html, resolvedUrl.normalizedUrl);
|
|
178
|
+
if (cache.isEnabled()) {
|
|
179
|
+
persistCache({
|
|
180
|
+
cacheKey,
|
|
181
|
+
data,
|
|
182
|
+
serialize: options.serialize,
|
|
183
|
+
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
return {
|
|
187
|
+
data,
|
|
188
|
+
fromCache: false,
|
|
189
|
+
url: resolvedUrl.normalizedUrl,
|
|
190
|
+
fetchedAt: new Date().toISOString(),
|
|
191
|
+
cacheKey,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
|
|
195
|
+
if (!cacheKey)
|
|
196
|
+
return;
|
|
197
|
+
const serializer = serialize ?? JSON.stringify;
|
|
198
|
+
const title = extractTitle(data);
|
|
199
|
+
const metadata = {
|
|
200
|
+
url: normalizedUrl,
|
|
201
|
+
...(title === undefined ? {} : { title }),
|
|
202
|
+
};
|
|
203
|
+
cache.set(cacheKey, serializer(data), metadata);
|
|
204
|
+
}
|
|
205
|
+
function extractTitle(value) {
|
|
206
|
+
if (!isRecord(value))
|
|
207
|
+
return undefined;
|
|
208
|
+
const { title } = value;
|
|
209
|
+
return typeof title === 'string' ? title : undefined;
|
|
210
|
+
}
|
|
211
|
+
function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
|
|
212
|
+
logDebug(`Cache miss due to ${reason}`, {
|
|
213
|
+
namespace: cacheNamespace,
|
|
214
|
+
url: normalizedUrl,
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
function logRawUrlTransformation(resolvedUrl) {
|
|
218
|
+
if (!resolvedUrl.transformed)
|
|
219
|
+
return;
|
|
220
|
+
logDebug('Using transformed raw content URL', {
|
|
221
|
+
original: resolvedUrl.originalUrl,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
+
import type { MarkdownTransformResult } from '../../config/types/content.js';
|
|
1
2
|
import type { FetchUrlInput, ToolResponseBase } from '../../config/types/tools.js';
|
|
2
3
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
3
|
-
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to
|
|
4
|
+
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
|
|
5
|
+
type MarkdownPipelineResult = MarkdownTransformResult & {
|
|
6
|
+
readonly content: string;
|
|
7
|
+
};
|
|
8
|
+
export declare function parseCachedMarkdownResult(cached: string): MarkdownPipelineResult | undefined;
|
|
4
9
|
export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<ToolResponseBase>;
|
|
10
|
+
export {};
|
|
@@ -1,152 +1,117 @@
|
|
|
1
|
-
import { config } from '../../config/index.js';
|
|
2
1
|
import { logDebug, logError } from '../../services/logger.js';
|
|
2
|
+
import { isRecord } from '../../utils/guards.js';
|
|
3
3
|
import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
4
|
+
import { transformHtmlToMarkdown } from '../utils/content-transform.js';
|
|
5
|
+
import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
|
|
6
6
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
7
|
-
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to
|
|
8
|
-
function
|
|
9
|
-
return value !== null && typeof value === 'object';
|
|
10
|
-
}
|
|
11
|
-
function deserializeJsonlTransformResult(cached) {
|
|
7
|
+
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
|
|
8
|
+
function parseJsonRecord(input) {
|
|
12
9
|
try {
|
|
13
|
-
const parsed = JSON.parse(
|
|
14
|
-
|
|
15
|
-
return undefined;
|
|
16
|
-
const { content, contentBlocks, title, truncated } = parsed;
|
|
17
|
-
if (typeof content !== 'string')
|
|
18
|
-
return undefined;
|
|
19
|
-
if (typeof contentBlocks !== 'number' || !Number.isFinite(contentBlocks)) {
|
|
20
|
-
return undefined;
|
|
21
|
-
}
|
|
22
|
-
if (title !== undefined && typeof title !== 'string')
|
|
23
|
-
return undefined;
|
|
24
|
-
if (truncated !== undefined && typeof truncated !== 'boolean') {
|
|
25
|
-
return undefined;
|
|
26
|
-
}
|
|
27
|
-
const resolvedTitle = typeof title === 'string' ? title : undefined;
|
|
28
|
-
return {
|
|
29
|
-
content,
|
|
30
|
-
contentBlocks,
|
|
31
|
-
title: resolvedTitle,
|
|
32
|
-
...(truncated !== undefined ? { truncated } : {}),
|
|
33
|
-
};
|
|
10
|
+
const parsed = JSON.parse(input);
|
|
11
|
+
return isRecord(parsed) ? parsed : undefined;
|
|
34
12
|
}
|
|
35
13
|
catch {
|
|
36
14
|
return undefined;
|
|
37
15
|
}
|
|
38
16
|
}
|
|
39
|
-
function
|
|
40
|
-
const
|
|
17
|
+
function resolveMarkdownContent(parsed) {
|
|
18
|
+
const { markdown } = parsed;
|
|
19
|
+
if (typeof markdown === 'string')
|
|
20
|
+
return markdown;
|
|
21
|
+
const { content } = parsed;
|
|
22
|
+
if (typeof content === 'string')
|
|
23
|
+
return content;
|
|
24
|
+
return undefined;
|
|
25
|
+
}
|
|
26
|
+
function resolveOptionalTitle(parsed) {
|
|
27
|
+
const { title } = parsed;
|
|
28
|
+
if (title === undefined)
|
|
29
|
+
return undefined;
|
|
30
|
+
return typeof title === 'string' ? title : undefined;
|
|
31
|
+
}
|
|
32
|
+
function resolveTruncatedFlag(parsed) {
|
|
33
|
+
const { truncated } = parsed;
|
|
34
|
+
return typeof truncated === 'boolean' ? truncated : false;
|
|
35
|
+
}
|
|
36
|
+
export function parseCachedMarkdownResult(cached) {
|
|
37
|
+
const parsed = parseJsonRecord(cached);
|
|
38
|
+
if (!parsed)
|
|
39
|
+
return undefined;
|
|
40
|
+
const resolvedContent = resolveMarkdownContent(parsed);
|
|
41
|
+
if (resolvedContent === undefined)
|
|
42
|
+
return undefined;
|
|
43
|
+
const title = resolveOptionalTitle(parsed);
|
|
44
|
+
if (parsed.title !== undefined && title === undefined)
|
|
45
|
+
return undefined;
|
|
41
46
|
return {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
...(input.maxContentLength !== undefined && {
|
|
47
|
-
maxContentLength: input.maxContentLength,
|
|
48
|
-
}),
|
|
47
|
+
content: resolvedContent,
|
|
48
|
+
markdown: resolvedContent,
|
|
49
|
+
title,
|
|
50
|
+
truncated: resolveTruncatedFlag(parsed),
|
|
49
51
|
};
|
|
50
52
|
}
|
|
51
|
-
function
|
|
52
|
-
return
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
function deserializeMarkdownResult(cached) {
|
|
54
|
+
return parseCachedMarkdownResult(cached);
|
|
55
|
+
}
|
|
56
|
+
function buildMarkdownTransform() {
|
|
57
|
+
return (html, url) => {
|
|
58
|
+
const result = transformHtmlToMarkdown(html, url, {
|
|
59
|
+
includeMetadata: true,
|
|
60
|
+
});
|
|
61
|
+
return { ...result, content: result.markdown };
|
|
57
62
|
};
|
|
58
63
|
}
|
|
59
|
-
function
|
|
60
|
-
return
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
maxContentLength: options.maxContentLength,
|
|
66
|
-
}),
|
|
67
|
-
includeContentBlocks: options.includeContentBlocks,
|
|
68
|
-
})
|
|
69
|
-
: transformHtmlToJsonlAsync(html, url, options);
|
|
64
|
+
function serializeMarkdownResult(result) {
|
|
65
|
+
return JSON.stringify({
|
|
66
|
+
markdown: result.markdown,
|
|
67
|
+
title: result.title,
|
|
68
|
+
truncated: result.truncated,
|
|
69
|
+
});
|
|
70
70
|
}
|
|
71
|
-
function
|
|
72
|
-
|
|
71
|
+
function buildStructuredContent(pipeline, inlineResult) {
|
|
72
|
+
return {
|
|
73
73
|
url: pipeline.url,
|
|
74
74
|
title: pipeline.data.title,
|
|
75
|
-
|
|
76
|
-
fetchedAt: pipeline.fetchedAt,
|
|
77
|
-
format,
|
|
78
|
-
contentSize: inlineResult.contentSize,
|
|
79
|
-
cached: pipeline.fromCache,
|
|
75
|
+
markdown: inlineResult.content,
|
|
80
76
|
};
|
|
81
|
-
if (pipeline.data.truncated) {
|
|
82
|
-
structuredContent.truncated = true;
|
|
83
|
-
}
|
|
84
|
-
if (inlineResult.truncated) {
|
|
85
|
-
structuredContent.truncated = true;
|
|
86
|
-
}
|
|
87
|
-
applyInlineResultToStructuredContent(structuredContent, inlineResult, 'content');
|
|
88
|
-
return structuredContent;
|
|
89
77
|
}
|
|
90
|
-
function
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
format: options.format,
|
|
96
|
-
includeContentBlocks: options.includeContentBlocks,
|
|
97
|
-
});
|
|
78
|
+
function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
|
|
79
|
+
return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
|
|
80
|
+
}
|
|
81
|
+
function logFetchStart(url) {
|
|
82
|
+
logDebug('Fetching URL', { url });
|
|
98
83
|
}
|
|
99
|
-
async function
|
|
100
|
-
|
|
84
|
+
async function fetchPipeline(url) {
|
|
85
|
+
return performSharedFetch({
|
|
101
86
|
url,
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
...(options.maxContentLength !== undefined && {
|
|
107
|
-
maxContentLength: options.maxContentLength,
|
|
108
|
-
}),
|
|
109
|
-
...(input.customHeaders !== undefined && {
|
|
110
|
-
customHeaders: input.customHeaders,
|
|
111
|
-
}),
|
|
112
|
-
...(input.retries !== undefined && { retries: input.retries }),
|
|
113
|
-
...(input.timeout !== undefined && { timeout: input.timeout }),
|
|
114
|
-
...(options.format === 'markdown' && {
|
|
115
|
-
cacheVariant: 'markdown-with-blocks',
|
|
116
|
-
}),
|
|
117
|
-
transform: buildFetchUrlTransform(options),
|
|
118
|
-
deserialize: deserializeJsonlTransformResult,
|
|
119
|
-
};
|
|
120
|
-
return performSharedFetch(sharedOptions);
|
|
87
|
+
transform: buildMarkdownTransform(),
|
|
88
|
+
serialize: serializeMarkdownResult,
|
|
89
|
+
deserialize: deserializeMarkdownResult,
|
|
90
|
+
});
|
|
121
91
|
}
|
|
122
|
-
function
|
|
123
|
-
const structuredContent =
|
|
92
|
+
function buildResponse(pipeline, inlineResult) {
|
|
93
|
+
const structuredContent = buildStructuredContent(pipeline, inlineResult);
|
|
94
|
+
const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
|
|
124
95
|
return {
|
|
125
|
-
content
|
|
96
|
+
content,
|
|
126
97
|
structuredContent,
|
|
127
98
|
};
|
|
128
99
|
}
|
|
129
100
|
export async function fetchUrlToolHandler(input) {
|
|
130
|
-
|
|
131
|
-
return await executeFetchUrl(input);
|
|
132
|
-
}
|
|
133
|
-
catch (error) {
|
|
101
|
+
return executeFetch(input).catch((error) => {
|
|
134
102
|
logError('fetch-url tool error', error instanceof Error ? error : undefined);
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
}
|
|
103
|
+
return handleToolError(error, input.url, 'Failed to fetch URL');
|
|
104
|
+
});
|
|
138
105
|
}
|
|
139
|
-
async function
|
|
106
|
+
async function executeFetch(input) {
|
|
140
107
|
const { url } = input;
|
|
141
|
-
const format = input.format ?? 'jsonl';
|
|
142
108
|
if (!url) {
|
|
143
|
-
return createToolErrorResponse('URL is required', ''
|
|
109
|
+
return createToolErrorResponse('URL is required', '');
|
|
110
|
+
}
|
|
111
|
+
logFetchStart(url);
|
|
112
|
+
const { pipeline, inlineResult } = await fetchPipeline(url);
|
|
113
|
+
if (inlineResult.error) {
|
|
114
|
+
return createToolErrorResponse(inlineResult.error, url);
|
|
144
115
|
}
|
|
145
|
-
|
|
146
|
-
logFetchUrlStart(url, options);
|
|
147
|
-
const { pipeline, inlineResult } = await fetchUrlPipeline(url, input, options);
|
|
148
|
-
const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchUrlErrorDetails(options.format));
|
|
149
|
-
if (inlineError)
|
|
150
|
-
return inlineError;
|
|
151
|
-
return buildFetchUrlResponse(pipeline, inlineResult, options.format);
|
|
116
|
+
return buildResponse(pipeline, inlineResult);
|
|
152
117
|
}
|
package/dist/tools/index.js
CHANGED
|
@@ -1,44 +1,25 @@
|
|
|
1
|
-
import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
|
|
2
1
|
import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
|
|
3
|
-
import {
|
|
4
|
-
const
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
openWorldHint: true,
|
|
17
|
-
},
|
|
2
|
+
import { fetchUrlInputSchema, fetchUrlOutputSchema } from './schemas.js';
|
|
3
|
+
const TOOL_DEFINITION = {
|
|
4
|
+
name: FETCH_URL_TOOL_NAME,
|
|
5
|
+
title: 'Fetch URL',
|
|
6
|
+
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
7
|
+
inputSchema: fetchUrlInputSchema,
|
|
8
|
+
outputSchema: fetchUrlOutputSchema,
|
|
9
|
+
handler: fetchUrlToolHandler,
|
|
10
|
+
annotations: {
|
|
11
|
+
readOnlyHint: true,
|
|
12
|
+
destructiveHint: false,
|
|
13
|
+
idempotentHint: true,
|
|
14
|
+
openWorldHint: true,
|
|
18
15
|
},
|
|
19
|
-
|
|
20
|
-
name: FETCH_MARKDOWN_TOOL_NAME,
|
|
21
|
-
title: 'Fetch Markdown',
|
|
22
|
-
description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
|
|
23
|
-
inputSchema: fetchMarkdownInputSchema,
|
|
24
|
-
outputSchema: fetchMarkdownOutputSchema,
|
|
25
|
-
handler: fetchMarkdownToolHandler,
|
|
26
|
-
annotations: {
|
|
27
|
-
readOnlyHint: true,
|
|
28
|
-
destructiveHint: false,
|
|
29
|
-
idempotentHint: true,
|
|
30
|
-
openWorldHint: true,
|
|
31
|
-
},
|
|
32
|
-
},
|
|
33
|
-
];
|
|
16
|
+
};
|
|
34
17
|
export function registerTools(server) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}, tool.handler);
|
|
43
|
-
}
|
|
18
|
+
server.registerTool(TOOL_DEFINITION.name, {
|
|
19
|
+
title: TOOL_DEFINITION.title,
|
|
20
|
+
description: TOOL_DEFINITION.description,
|
|
21
|
+
inputSchema: TOOL_DEFINITION.inputSchema,
|
|
22
|
+
outputSchema: TOOL_DEFINITION.outputSchema,
|
|
23
|
+
annotations: TOOL_DEFINITION.annotations,
|
|
24
|
+
}, TOOL_DEFINITION.handler);
|
|
44
25
|
}
|