@j0hanz/superfetch 1.2.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +116 -152
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +25 -59
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +98 -26
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/jsonrpc-http.d.ts +2 -0
  29. package/dist/http/jsonrpc-http.js +10 -0
  30. package/dist/http/mcp-routes.d.ts +0 -1
  31. package/dist/http/mcp-routes.js +43 -30
  32. package/dist/http/mcp-session-helpers.d.ts +0 -1
  33. package/dist/http/mcp-session-helpers.js +1 -1
  34. package/dist/http/mcp-session-transport.d.ts +7 -0
  35. package/dist/http/mcp-session-transport.js +57 -0
  36. package/dist/http/mcp-session.js +60 -73
  37. package/dist/http/mcp-validation.d.ts +1 -0
  38. package/dist/http/mcp-validation.js +11 -10
  39. package/dist/http/protocol-policy.d.ts +2 -0
  40. package/dist/http/protocol-policy.js +31 -0
  41. package/dist/http/rate-limit.js +5 -2
  42. package/dist/http/server-config.d.ts +1 -0
  43. package/dist/http/server-config.js +40 -0
  44. package/dist/http/server-middleware.d.ts +2 -9
  45. package/dist/http/server-middleware.js +96 -43
  46. package/dist/http/server-shutdown.d.ts +4 -0
  47. package/dist/http/server-shutdown.js +43 -0
  48. package/dist/http/server.js +52 -64
  49. package/dist/http/session-cleanup.js +1 -1
  50. package/dist/middleware/error-handler.js +1 -3
  51. package/dist/resources/cached-content.js +50 -108
  52. package/dist/resources/index.js +0 -82
  53. package/dist/server.js +51 -30
  54. package/dist/services/cache-keys.d.ts +7 -0
  55. package/dist/services/cache-keys.js +57 -0
  56. package/dist/services/cache.d.ts +1 -7
  57. package/dist/services/cache.js +53 -119
  58. package/dist/services/context.d.ts +0 -1
  59. package/dist/services/context.js +0 -7
  60. package/dist/services/extractor.js +10 -82
  61. package/dist/services/fetcher/agents.d.ts +2 -2
  62. package/dist/services/fetcher/agents.js +34 -95
  63. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  64. package/dist/services/fetcher/dns-selection.js +72 -0
  65. package/dist/services/fetcher/interceptors.d.ts +0 -22
  66. package/dist/services/fetcher/interceptors.js +30 -13
  67. package/dist/services/fetcher/redirects.js +4 -3
  68. package/dist/services/fetcher/response.js +66 -31
  69. package/dist/services/fetcher.d.ts +1 -3
  70. package/dist/services/fetcher.js +14 -33
  71. package/dist/services/fifo-queue.d.ts +8 -0
  72. package/dist/services/fifo-queue.js +25 -0
  73. package/dist/services/logger.js +2 -2
  74. package/dist/services/metadata-collector.d.ts +1 -9
  75. package/dist/services/metadata-collector.js +71 -2
  76. package/dist/services/transform-worker-pool.d.ts +4 -14
  77. package/dist/services/transform-worker-pool.js +177 -129
  78. package/dist/services/transform-worker-types.d.ts +32 -0
  79. package/dist/services/transform-worker-types.js +14 -0
  80. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  81. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  82. package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
  83. package/dist/tools/handlers/fetch-single.shared.js +44 -87
  84. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
  85. package/dist/tools/handlers/fetch-url.tool.js +46 -123
  86. package/dist/tools/index.js +21 -40
  87. package/dist/tools/schemas.d.ts +1 -51
  88. package/dist/tools/schemas.js +1 -107
  89. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  90. package/dist/tools/utils/cached-markdown.js +46 -0
  91. package/dist/tools/utils/content-shaping.d.ts +4 -0
  92. package/dist/tools/utils/content-shaping.js +52 -0
  93. package/dist/tools/utils/content-transform.d.ts +2 -17
  94. package/dist/tools/utils/content-transform.js +120 -114
  95. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  96. package/dist/tools/utils/fetch-pipeline.js +65 -62
  97. package/dist/tools/utils/inline-content.d.ts +1 -2
  98. package/dist/tools/utils/inline-content.js +4 -7
  99. package/dist/transformers/markdown.transformer.js +109 -34
  100. package/dist/utils/cached-payload.d.ts +7 -0
  101. package/dist/utils/cached-payload.js +36 -0
  102. package/dist/utils/error-utils.js +1 -1
  103. package/dist/utils/filename-generator.js +21 -10
  104. package/dist/utils/guards.d.ts +1 -0
  105. package/dist/utils/guards.js +3 -0
  106. package/dist/utils/header-normalizer.d.ts +0 -3
  107. package/dist/utils/header-normalizer.js +3 -3
  108. package/dist/utils/tool-error-handler.d.ts +2 -2
  109. package/dist/utils/tool-error-handler.js +11 -38
  110. package/dist/utils/url-transformer.d.ts +7 -0
  111. package/dist/utils/url-transformer.js +147 -0
  112. package/dist/utils/url-validator.d.ts +1 -2
  113. package/dist/utils/url-validator.js +20 -93
  114. package/dist/workers/content-transform.worker.d.ts +1 -0
  115. package/dist/workers/content-transform.worker.js +40 -0
  116. package/package.json +13 -16
@@ -1,79 +1,26 @@
1
1
  import { config } from '../../config/index.js';
2
- import { buildFileDownloadInfo } from '../../utils/download-url.js';
3
2
  import { generateSafeFilename } from '../../utils/filename-generator.js';
4
- import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
5
- import { appendHeaderVary } from '../utils/cache-vary.js';
6
3
  import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
7
4
  import { applyInlineContentLimit } from '../utils/inline-content.js';
8
- export async function performSharedFetch(options, deps = {}) {
9
- const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
10
- const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
11
- const cacheVary = appendHeaderVary({
12
- format: options.format,
13
- extractMainContent: options.extractMainContent,
14
- includeMetadata: options.includeMetadata,
15
- maxContentLength: options.maxContentLength,
16
- ...(options.cacheVariant ? { variant: options.cacheVariant } : {}),
17
- ...(options.format === 'markdown'
18
- ? { includeContentBlocks: options.includeContentBlocks }
19
- : { contentBlocks: true }),
20
- }, options.customHeaders);
21
- const pipelineOptions = {
22
- url: options.url,
23
- cacheNamespace,
24
- transform: options.transform,
25
- };
26
- if (options.customHeaders !== undefined) {
27
- pipelineOptions.customHeaders = options.customHeaders;
28
- }
29
- if (options.retries !== undefined) {
30
- pipelineOptions.retries = options.retries;
31
- }
32
- if (options.timeout !== undefined) {
33
- pipelineOptions.timeout = options.timeout;
34
- }
35
- if (cacheVary !== undefined) {
36
- pipelineOptions.cacheVary = cacheVary;
37
- }
5
+ function applyOptionalPipelineSerialization(pipelineOptions, options) {
38
6
  if (options.serialize !== undefined) {
39
7
  pipelineOptions.serialize = options.serialize;
40
8
  }
41
9
  if (options.deserialize !== undefined) {
42
10
  pipelineOptions.deserialize = options.deserialize;
43
11
  }
44
- const pipeline = await executePipeline(pipelineOptions);
45
- const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null, options.format);
46
- return { pipeline, inlineResult };
47
12
  }
48
- export function getFileDownloadInfo(context) {
49
- const infoOptions = {
50
- cacheKey: context.cacheKey,
51
- url: context.url,
13
+ export async function performSharedFetch(options, deps = {}) {
14
+ const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
15
+ const pipelineOptions = {
16
+ url: options.url,
17
+ cacheNamespace: 'markdown',
18
+ transform: options.transform,
52
19
  };
53
- if (context.title !== undefined) {
54
- return buildFileDownloadInfo({
55
- ...infoOptions,
56
- title: context.title,
57
- });
58
- }
59
- return buildFileDownloadInfo(infoOptions);
60
- }
61
- export function getInlineErrorResponse(inlineResult, url, details) {
62
- if (!inlineResult.error)
63
- return null;
64
- return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR', details);
65
- }
66
- export function applyInlineResultToStructuredContent(structuredContent, inlineResult, contentKey) {
67
- if (inlineResult.truncated) {
68
- structuredContent.truncated = true;
69
- }
70
- if (typeof inlineResult.content === 'string') {
71
- structuredContent[contentKey] = inlineResult.content;
72
- }
73
- if (inlineResult.resourceUri) {
74
- structuredContent.resourceUri = inlineResult.resourceUri;
75
- structuredContent.resourceMimeType = inlineResult.resourceMimeType;
76
- }
20
+ applyOptionalPipelineSerialization(pipelineOptions, options);
21
+ const pipeline = await executePipeline(pipelineOptions);
22
+ const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
23
+ return { pipeline, inlineResult };
77
24
  }
78
25
  function serializeStructuredContent(structuredContent, fromCache) {
79
26
  return JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
@@ -93,45 +40,55 @@ function buildResourceLink(inlineResult, name) {
93
40
  }
94
41
  return block;
95
42
  }
96
- function buildEmbeddedResource(content, mimeType, url, title) {
43
+ function buildEmbeddedResource(content, url, title) {
97
44
  if (!content) {
98
45
  return null;
99
46
  }
100
- // Generate a proper filename with extension
101
- const extension = mimeType === 'text/markdown' ? '.md' : '.jsonl';
102
- const filename = generateSafeFilename(url, title, undefined, extension);
103
- // Use file: URI scheme with filename for better VS Code integration
47
+ const filename = generateSafeFilename(url, title, undefined, '.md');
104
48
  const uri = `file:///${filename}`;
105
49
  return {
106
50
  type: 'resource',
107
51
  resource: {
108
52
  uri,
109
- mimeType,
53
+ mimeType: 'text/markdown',
110
54
  text: content,
111
55
  },
112
56
  };
113
57
  }
114
- export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, format, url, title) {
115
- const textBlock = {
116
- type: 'text',
117
- text: serializeStructuredContent(structuredContent, fromCache),
118
- };
119
- const blocks = [textBlock];
120
- // Embed full content in stdio mode; HTTP mode relies on inline content or links.
121
- const mimeType = format === 'markdown' ? 'text/markdown' : 'application/jsonl';
122
- const contentToEmbed = config.runtime.httpMode
123
- ? inlineResult.content
124
- : (fullContent ?? inlineResult.content);
125
- if (typeof contentToEmbed === 'string' && url) {
126
- const embeddedResource = buildEmbeddedResource(contentToEmbed, mimeType, url, title);
127
- if (embeddedResource) {
128
- blocks.push(embeddedResource);
129
- }
58
+ function resolveContentToEmbed(inlineResult, fullContent, useInlineInHttpMode) {
59
+ if (useInlineInHttpMode) {
60
+ return inlineResult.content;
130
61
  }
131
- // Add resource link for HTTP mode downloads (only when truncated)
62
+ return fullContent ?? inlineResult.content;
63
+ }
64
+ function maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title) {
65
+ if (typeof contentToEmbed !== 'string')
66
+ return;
67
+ if (!url)
68
+ return;
69
+ const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
70
+ if (embeddedResource) {
71
+ blocks.push(embeddedResource);
72
+ }
73
+ }
74
+ function maybeAppendResourceLink(blocks, inlineResult, resourceName) {
132
75
  const resourceLink = buildResourceLink(inlineResult, resourceName);
133
76
  if (resourceLink) {
134
77
  blocks.push(resourceLink);
135
78
  }
79
+ }
80
+ function buildTextBlock(structuredContent, fromCache) {
81
+ return {
82
+ type: 'text',
83
+ text: serializeStructuredContent(structuredContent, fromCache),
84
+ };
85
+ }
86
+ export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
87
+ const blocks = [
88
+ buildTextBlock(structuredContent, fromCache),
89
+ ];
90
+ const contentToEmbed = resolveContentToEmbed(inlineResult, fullContent, config.runtime.httpMode);
91
+ maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title);
92
+ maybeAppendResourceLink(blocks, inlineResult, resourceName);
136
93
  return blocks;
137
94
  }
@@ -1,4 +1,4 @@
1
1
  import type { FetchUrlInput, ToolResponseBase } from '../../config/types/tools.js';
2
2
  export declare const FETCH_URL_TOOL_NAME = "fetch-url";
3
- export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.";
3
+ export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
4
4
  export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<ToolResponseBase>;
@@ -1,152 +1,75 @@
1
- import { config } from '../../config/index.js';
2
1
  import { logDebug, logError } from '../../services/logger.js';
3
2
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
4
- import { transformHtmlToJsonlAsync, transformHtmlToMarkdownWithBlocksAsync, } from '../utils/content-transform-async.js';
5
- import { applyInlineResultToStructuredContent, buildToolContentBlocks, getInlineErrorResponse, performSharedFetch, } from './fetch-single.shared.js';
3
+ import { parseCachedMarkdownResult } from '../utils/cached-markdown.js';
4
+ import { transformHtmlToMarkdown } from '../utils/content-transform.js';
5
+ import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
6
6
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
7
- export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.';
8
- function isRecord(value) {
9
- return value !== null && typeof value === 'object';
7
+ export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
8
+ function deserializeMarkdownResult(cached) {
9
+ return parseCachedMarkdownResult(cached);
10
10
  }
11
- function deserializeJsonlTransformResult(cached) {
12
- try {
13
- const parsed = JSON.parse(cached);
14
- if (!isRecord(parsed))
15
- return undefined;
16
- const { content, contentBlocks, title, truncated } = parsed;
17
- if (typeof content !== 'string')
18
- return undefined;
19
- if (typeof contentBlocks !== 'number' || !Number.isFinite(contentBlocks)) {
20
- return undefined;
21
- }
22
- if (title !== undefined && typeof title !== 'string')
23
- return undefined;
24
- if (truncated !== undefined && typeof truncated !== 'boolean') {
25
- return undefined;
26
- }
27
- const resolvedTitle = typeof title === 'string' ? title : undefined;
28
- return {
29
- content,
30
- contentBlocks,
31
- title: resolvedTitle,
32
- ...(truncated !== undefined ? { truncated } : {}),
33
- };
34
- }
35
- catch {
36
- return undefined;
37
- }
38
- }
39
- function resolveFetchUrlOptions(input) {
40
- const format = input.format ?? 'jsonl';
41
- return {
42
- extractMainContent: input.extractMainContent ?? config.extraction.extractMainContent,
43
- includeMetadata: input.includeMetadata ?? config.extraction.includeMetadata,
44
- format,
45
- includeContentBlocks: input.includeContentBlocks ?? (format === 'markdown' ? false : true),
46
- ...(input.maxContentLength !== undefined && {
47
- maxContentLength: input.maxContentLength,
48
- }),
11
+ function buildMarkdownTransform() {
12
+ return (html, url) => {
13
+ const result = transformHtmlToMarkdown(html, url, {
14
+ includeMetadata: true,
15
+ });
16
+ return { ...result, content: result.markdown };
49
17
  };
50
18
  }
51
- function buildFetchUrlErrorDetails(format) {
52
- return {
53
- contentBlocks: 0,
54
- fetchedAt: new Date().toISOString(),
55
- format,
56
- cached: false,
57
- };
58
- }
59
- function buildFetchUrlTransform(options) {
60
- return async (html, url) => options.format === 'markdown'
61
- ? transformHtmlToMarkdownWithBlocksAsync(html, url, {
62
- extractMainContent: options.extractMainContent,
63
- includeMetadata: options.includeMetadata,
64
- ...(options.maxContentLength !== undefined && {
65
- maxContentLength: options.maxContentLength,
66
- }),
67
- includeContentBlocks: options.includeContentBlocks,
68
- })
69
- : transformHtmlToJsonlAsync(html, url, options);
19
+ function serializeMarkdownResult(result) {
20
+ return JSON.stringify({
21
+ markdown: result.markdown,
22
+ title: result.title,
23
+ truncated: result.truncated,
24
+ });
70
25
  }
71
- function buildFetchUrlStructuredContent(format, pipeline, inlineResult) {
72
- const structuredContent = {
26
+ function buildStructuredContent(pipeline, inlineResult) {
27
+ return {
73
28
  url: pipeline.url,
74
29
  title: pipeline.data.title,
75
- contentBlocks: pipeline.data.contentBlocks,
76
- fetchedAt: pipeline.fetchedAt,
77
- format,
78
- contentSize: inlineResult.contentSize,
79
- cached: pipeline.fromCache,
30
+ markdown: inlineResult.content,
80
31
  };
81
- if (pipeline.data.truncated) {
82
- structuredContent.truncated = true;
83
- }
84
- if (inlineResult.truncated) {
85
- structuredContent.truncated = true;
86
- }
87
- applyInlineResultToStructuredContent(structuredContent, inlineResult, 'content');
88
- return structuredContent;
89
32
  }
90
- function logFetchUrlStart(url, options) {
91
- logDebug('Fetching URL', {
92
- url,
93
- extractMainContent: options.extractMainContent,
94
- includeMetadata: options.includeMetadata,
95
- format: options.format,
96
- includeContentBlocks: options.includeContentBlocks,
97
- });
33
+ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
34
+ return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
98
35
  }
99
- async function fetchUrlPipeline(url, input, options) {
100
- const sharedOptions = {
36
+ function logFetchStart(url) {
37
+ logDebug('Fetching URL', { url });
38
+ }
39
+ async function fetchPipeline(url) {
40
+ return performSharedFetch({
101
41
  url,
102
- format: options.format,
103
- extractMainContent: options.extractMainContent,
104
- includeMetadata: options.includeMetadata,
105
- includeContentBlocks: options.includeContentBlocks,
106
- ...(options.maxContentLength !== undefined && {
107
- maxContentLength: options.maxContentLength,
108
- }),
109
- ...(input.customHeaders !== undefined && {
110
- customHeaders: input.customHeaders,
111
- }),
112
- ...(input.retries !== undefined && { retries: input.retries }),
113
- ...(input.timeout !== undefined && { timeout: input.timeout }),
114
- ...(options.format === 'markdown' && {
115
- cacheVariant: 'markdown-with-blocks',
116
- }),
117
- transform: buildFetchUrlTransform(options),
118
- deserialize: deserializeJsonlTransformResult,
119
- };
120
- return performSharedFetch(sharedOptions);
42
+ transform: buildMarkdownTransform(),
43
+ serialize: serializeMarkdownResult,
44
+ deserialize: deserializeMarkdownResult,
45
+ });
121
46
  }
122
- function buildFetchUrlResponse(pipeline, inlineResult, format) {
123
- const structuredContent = buildFetchUrlStructuredContent(format, pipeline, inlineResult);
47
+ function buildResponse(pipeline, inlineResult) {
48
+ const structuredContent = buildStructuredContent(pipeline, inlineResult);
49
+ const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
124
50
  return {
125
- content: buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched content', pipeline.cacheKey, pipeline.data.content, format, pipeline.url, pipeline.data.title),
51
+ content,
126
52
  structuredContent,
127
53
  };
128
54
  }
129
55
  export async function fetchUrlToolHandler(input) {
130
56
  try {
131
- return await executeFetchUrl(input);
57
+ return await executeFetch(input);
132
58
  }
133
59
  catch (error) {
134
60
  logError('fetch-url tool error', error instanceof Error ? error : undefined);
135
- const errorDetails = buildFetchUrlErrorDetails(input.format ?? 'jsonl');
136
- return handleToolError(error, input.url, 'Failed to fetch URL', errorDetails);
61
+ return handleToolError(error, input.url, 'Failed to fetch URL');
137
62
  }
138
63
  }
139
- async function executeFetchUrl(input) {
64
+ async function executeFetch(input) {
140
65
  const { url } = input;
141
- const format = input.format ?? 'jsonl';
142
66
  if (!url) {
143
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR', buildFetchUrlErrorDetails(format));
67
+ return createToolErrorResponse('URL is required', '');
68
+ }
69
+ logFetchStart(url);
70
+ const { pipeline, inlineResult } = await fetchPipeline(url);
71
+ if (inlineResult.error) {
72
+ return createToolErrorResponse(inlineResult.error, url);
144
73
  }
145
- const options = resolveFetchUrlOptions(input);
146
- logFetchUrlStart(url, options);
147
- const { pipeline, inlineResult } = await fetchUrlPipeline(url, input, options);
148
- const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchUrlErrorDetails(options.format));
149
- if (inlineError)
150
- return inlineError;
151
- return buildFetchUrlResponse(pipeline, inlineResult, options.format);
74
+ return buildResponse(pipeline, inlineResult);
152
75
  }
@@ -1,44 +1,25 @@
1
- import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
2
1
  import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
3
- import { fetchMarkdownInputSchema, fetchMarkdownOutputSchema, fetchUrlInputSchema, fetchUrlOutputSchema, } from './schemas.js';
4
- const TOOL_DEFINITIONS = [
5
- {
6
- name: FETCH_URL_TOOL_NAME,
7
- title: 'Fetch URL',
8
- description: FETCH_URL_TOOL_DESCRIPTION,
9
- inputSchema: fetchUrlInputSchema,
10
- outputSchema: fetchUrlOutputSchema,
11
- handler: fetchUrlToolHandler,
12
- annotations: {
13
- readOnlyHint: true,
14
- destructiveHint: false,
15
- idempotentHint: true,
16
- openWorldHint: true,
17
- },
2
+ import { fetchUrlInputSchema, fetchUrlOutputSchema } from './schemas.js';
3
+ const TOOL_DEFINITION = {
4
+ name: FETCH_URL_TOOL_NAME,
5
+ title: 'Fetch URL',
6
+ description: FETCH_URL_TOOL_DESCRIPTION,
7
+ inputSchema: fetchUrlInputSchema,
8
+ outputSchema: fetchUrlOutputSchema,
9
+ handler: fetchUrlToolHandler,
10
+ annotations: {
11
+ readOnlyHint: true,
12
+ destructiveHint: false,
13
+ idempotentHint: true,
14
+ openWorldHint: true,
18
15
  },
19
- {
20
- name: FETCH_MARKDOWN_TOOL_NAME,
21
- title: 'Fetch Markdown',
22
- description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
23
- inputSchema: fetchMarkdownInputSchema,
24
- outputSchema: fetchMarkdownOutputSchema,
25
- handler: fetchMarkdownToolHandler,
26
- annotations: {
27
- readOnlyHint: true,
28
- destructiveHint: false,
29
- idempotentHint: true,
30
- openWorldHint: true,
31
- },
32
- },
33
- ];
16
+ };
34
17
  export function registerTools(server) {
35
- for (const tool of TOOL_DEFINITIONS) {
36
- server.registerTool(tool.name, {
37
- title: tool.title,
38
- description: tool.description,
39
- inputSchema: tool.inputSchema,
40
- outputSchema: tool.outputSchema,
41
- annotations: tool.annotations,
42
- }, tool.handler);
43
- }
18
+ server.registerTool(TOOL_DEFINITION.name, {
19
+ title: TOOL_DEFINITION.title,
20
+ description: TOOL_DEFINITION.description,
21
+ inputSchema: TOOL_DEFINITION.inputSchema,
22
+ outputSchema: TOOL_DEFINITION.outputSchema,
23
+ annotations: TOOL_DEFINITION.annotations,
24
+ }, TOOL_DEFINITION.handler);
44
25
  }
@@ -1,60 +1,10 @@
1
1
  import { z } from 'zod';
2
2
  export declare const fetchUrlInputSchema: z.ZodObject<{
3
- format: z.ZodDefault<z.ZodEnum<{
4
- jsonl: "jsonl";
5
- markdown: "markdown";
6
- }>>;
7
- includeContentBlocks: z.ZodOptional<z.ZodBoolean>;
8
- extractMainContent: z.ZodDefault<z.ZodBoolean>;
9
- includeMetadata: z.ZodDefault<z.ZodBoolean>;
10
- maxContentLength: z.ZodOptional<z.ZodNumber>;
11
3
  url: z.ZodURL;
12
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
13
- timeout: z.ZodDefault<z.ZodNumber>;
14
- retries: z.ZodDefault<z.ZodNumber>;
15
- }, z.core.$strict>;
16
- export declare const fetchMarkdownInputSchema: z.ZodObject<{
17
- extractMainContent: z.ZodDefault<z.ZodBoolean>;
18
- includeMetadata: z.ZodDefault<z.ZodBoolean>;
19
- maxContentLength: z.ZodOptional<z.ZodNumber>;
20
- url: z.ZodURL;
21
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
22
- timeout: z.ZodDefault<z.ZodNumber>;
23
- retries: z.ZodDefault<z.ZodNumber>;
24
4
  }, z.core.$strict>;
25
5
  export declare const fetchUrlOutputSchema: z.ZodObject<{
26
- contentSize: z.ZodOptional<z.ZodNumber>;
27
- resourceUri: z.ZodOptional<z.ZodString>;
28
- resourceMimeType: z.ZodOptional<z.ZodString>;
29
- cached: z.ZodBoolean;
30
- truncated: z.ZodOptional<z.ZodBoolean>;
31
- error: z.ZodOptional<z.ZodString>;
32
- errorCode: z.ZodOptional<z.ZodString>;
33
- url: z.ZodString;
34
- title: z.ZodOptional<z.ZodString>;
35
- contentBlocks: z.ZodNumber;
36
- fetchedAt: z.ZodString;
37
- format: z.ZodEnum<{
38
- jsonl: "jsonl";
39
- markdown: "markdown";
40
- }>;
41
- content: z.ZodOptional<z.ZodString>;
42
- }, z.core.$strict>;
43
- export declare const fetchMarkdownOutputSchema: z.ZodObject<{
44
- contentSize: z.ZodOptional<z.ZodNumber>;
45
- resourceUri: z.ZodOptional<z.ZodString>;
46
- resourceMimeType: z.ZodOptional<z.ZodString>;
47
- cached: z.ZodBoolean;
48
- truncated: z.ZodOptional<z.ZodBoolean>;
49
- error: z.ZodOptional<z.ZodString>;
50
- errorCode: z.ZodOptional<z.ZodString>;
51
6
  url: z.ZodString;
52
7
  title: z.ZodOptional<z.ZodString>;
53
- fetchedAt: z.ZodString;
54
8
  markdown: z.ZodOptional<z.ZodString>;
55
- file: z.ZodOptional<z.ZodObject<{
56
- downloadUrl: z.ZodString;
57
- fileName: z.ZodString;
58
- expiresAt: z.ZodString;
59
- }, z.core.$strip>>;
9
+ error: z.ZodOptional<z.ZodString>;
60
10
  }, z.core.$strict>;
@@ -1,119 +1,13 @@
1
1
  import { z } from 'zod';
2
- import { config } from '../config/index.js';
3
- const MAX_HEADER_NAME_LENGTH = 128;
4
- const MAX_HEADER_VALUE_LENGTH = 2048;
5
- const MAX_HEADER_COUNT = 50;
6
- const MAX_CONTENT_LENGTH = config.constants.maxContentSize;
7
- const customHeadersSchema = z
8
- .record(z.string().max(MAX_HEADER_NAME_LENGTH), z.string().max(MAX_HEADER_VALUE_LENGTH))
9
- .refine((headers) => Object.keys(headers).length <= MAX_HEADER_COUNT, {
10
- error: `customHeaders must have at most ${MAX_HEADER_COUNT} entries`,
11
- });
12
- const requestOptionsSchema = z.object({
13
- customHeaders: customHeadersSchema
14
- .optional()
15
- .describe('Custom HTTP headers for the request'),
16
- timeout: z
17
- .number()
18
- .min(1000)
19
- .max(120000)
20
- .default(config.fetcher.timeout)
21
- .describe('Request timeout in milliseconds (1000-120000)'),
22
- retries: z
23
- .number()
24
- .min(1)
25
- .max(10)
26
- .default(3)
27
- .describe('Number of retry attempts (1-10)'),
28
- });
29
- const extractionOptionsSchema = z.object({
30
- extractMainContent: z
31
- .boolean()
32
- .default(true)
33
- .describe('Use Readability to extract main article content'),
34
- includeMetadata: z
35
- .boolean()
36
- .default(true)
37
- .describe('Include page metadata (title, description, etc.)'),
38
- maxContentLength: z
39
- .number()
40
- .positive()
41
- .max(MAX_CONTENT_LENGTH)
42
- .optional()
43
- .describe('Maximum content length in characters'),
44
- });
45
- const formatOptionsSchema = z.object({
46
- format: z
47
- .enum(['jsonl', 'markdown'])
48
- .default('jsonl')
49
- .describe('Output format'),
50
- includeContentBlocks: z
51
- .boolean()
52
- .optional()
53
- .describe('Include content block counts when format=markdown'),
54
- });
55
- const resourceFieldsSchema = z.object({
56
- contentSize: z.number().optional().describe('Content length in characters'),
57
- resourceUri: z
58
- .string()
59
- .optional()
60
- .describe('Resource URI when content is too large to inline'),
61
- resourceMimeType: z
62
- .string()
63
- .optional()
64
- .describe('MIME type for the resource URI'),
65
- cached: z.boolean().describe('Whether the result was served from cache'),
66
- truncated: z
67
- .boolean()
68
- .optional()
69
- .describe('Whether content was truncated by maxContentLength'),
70
- error: z.string().optional().describe('Error message if the request failed'),
71
- errorCode: z.string().optional().describe('Error code if the request failed'),
72
- });
73
- const fileDownloadSchema = z.object({
74
- downloadUrl: z.string().describe('Relative URL to download the .md file'),
75
- fileName: z.string().describe('Suggested filename for download'),
76
- expiresAt: z.string().describe('ISO timestamp when download expires'),
77
- });
78
2
  export const fetchUrlInputSchema = z.strictObject({
79
- ...requestOptionsSchema.shape,
80
- url: z.url({ protocol: /^https?$/i }).describe('The URL to fetch'),
81
- ...extractionOptionsSchema.shape,
82
- ...formatOptionsSchema.shape,
83
- });
84
- export const fetchMarkdownInputSchema = z.strictObject({
85
- ...requestOptionsSchema.shape,
86
3
  url: z.url({ protocol: /^https?$/i }).describe('The URL to fetch'),
87
- ...extractionOptionsSchema.shape,
88
4
  });
89
5
  export const fetchUrlOutputSchema = z.strictObject({
90
6
  url: z.string().describe('The fetched URL'),
91
7
  title: z.string().optional().describe('Page title'),
92
- contentBlocks: z
93
- .number()
94
- .describe('Number of content blocks extracted (JSONL only)'),
95
- fetchedAt: z
96
- .string()
97
- .describe('ISO timestamp of when the content was fetched'),
98
- format: z.enum(['jsonl', 'markdown']).describe('Output format used'),
99
- content: z
100
- .string()
101
- .optional()
102
- .describe('The extracted content in JSONL or Markdown format'),
103
- ...resourceFieldsSchema.shape,
104
- });
105
- export const fetchMarkdownOutputSchema = z.strictObject({
106
- url: z.string().describe('The fetched URL'),
107
- title: z.string().optional().describe('Page title'),
108
- fetchedAt: z
109
- .string()
110
- .describe('ISO timestamp of when the content was fetched'),
111
8
  markdown: z
112
9
  .string()
113
10
  .optional()
114
11
  .describe('The extracted content in Markdown format'),
115
- file: fileDownloadSchema
116
- .optional()
117
- .describe('Download information when content is cached'),
118
- ...resourceFieldsSchema.shape,
12
+ error: z.string().optional().describe('Error message if the request failed'),
119
13
  });
@@ -0,0 +1,5 @@
1
+ import type { MarkdownTransformResult } from '../../config/types/content.js';
2
+ export type CachedMarkdownResult = MarkdownTransformResult & {
3
+ readonly content: string;
4
+ };
5
+ export declare function parseCachedMarkdownResult(cached: string): CachedMarkdownResult | undefined;