@j0hanz/superfetch 1.2.5 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +131 -156
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +35 -64
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +254 -23
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/host-allowlist.d.ts +3 -0
  29. package/dist/http/host-allowlist.js +117 -0
  30. package/dist/http/jsonrpc-http.d.ts +2 -0
  31. package/dist/http/jsonrpc-http.js +10 -0
  32. package/dist/http/mcp-routes.d.ts +8 -3
  33. package/dist/http/mcp-routes.js +137 -31
  34. package/dist/http/mcp-session-eviction.d.ts +3 -0
  35. package/dist/http/mcp-session-eviction.js +24 -0
  36. package/dist/http/mcp-session-helpers.d.ts +0 -1
  37. package/dist/http/mcp-session-helpers.js +1 -1
  38. package/dist/http/mcp-session-init.d.ts +7 -0
  39. package/dist/http/mcp-session-init.js +94 -0
  40. package/dist/http/mcp-session-slots.d.ts +17 -0
  41. package/dist/http/mcp-session-slots.js +55 -0
  42. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  43. package/dist/http/mcp-session-transport-init.js +41 -0
  44. package/dist/http/mcp-session-transport.d.ts +7 -0
  45. package/dist/http/mcp-session-transport.js +57 -0
  46. package/dist/http/mcp-session-types.d.ts +5 -0
  47. package/dist/http/mcp-session-types.js +1 -0
  48. package/dist/http/mcp-session.d.ts +9 -9
  49. package/dist/http/mcp-session.js +15 -137
  50. package/dist/http/mcp-sessions.d.ts +43 -0
  51. package/dist/http/mcp-sessions.js +392 -0
  52. package/dist/http/mcp-validation.d.ts +1 -0
  53. package/dist/http/mcp-validation.js +11 -10
  54. package/dist/http/protocol-policy.d.ts +2 -0
  55. package/dist/http/protocol-policy.js +31 -0
  56. package/dist/http/rate-limit.js +7 -4
  57. package/dist/http/server-config.d.ts +1 -0
  58. package/dist/http/server-config.js +40 -0
  59. package/dist/http/server-middleware.d.ts +7 -9
  60. package/dist/http/server-middleware.js +9 -70
  61. package/dist/http/server-shutdown.d.ts +4 -0
  62. package/dist/http/server-shutdown.js +43 -0
  63. package/dist/http/server.d.ts +10 -0
  64. package/dist/http/server.js +546 -61
  65. package/dist/http/session-cleanup.js +8 -5
  66. package/dist/middleware/error-handler.d.ts +1 -1
  67. package/dist/middleware/error-handler.js +32 -33
  68. package/dist/resources/cached-content-params.d.ts +5 -0
  69. package/dist/resources/cached-content-params.js +36 -0
  70. package/dist/resources/cached-content.js +67 -125
  71. package/dist/resources/index.js +0 -82
  72. package/dist/server.js +50 -29
  73. package/dist/services/cache-events.d.ts +8 -0
  74. package/dist/services/cache-events.js +19 -0
  75. package/dist/services/cache-keys.d.ts +7 -0
  76. package/dist/services/cache-keys.js +57 -0
  77. package/dist/services/cache.d.ts +4 -9
  78. package/dist/services/cache.js +77 -139
  79. package/dist/services/context.d.ts +0 -1
  80. package/dist/services/context.js +0 -7
  81. package/dist/services/extractor.js +55 -116
  82. package/dist/services/fetcher/agents.d.ts +2 -2
  83. package/dist/services/fetcher/agents.js +35 -96
  84. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  85. package/dist/services/fetcher/dns-selection.js +72 -0
  86. package/dist/services/fetcher/interceptors.d.ts +0 -22
  87. package/dist/services/fetcher/interceptors.js +18 -32
  88. package/dist/services/fetcher/redirects.js +16 -7
  89. package/dist/services/fetcher/response.js +79 -34
  90. package/dist/services/fetcher.d.ts +22 -3
  91. package/dist/services/fetcher.js +544 -44
  92. package/dist/services/fifo-queue.d.ts +8 -0
  93. package/dist/services/fifo-queue.js +25 -0
  94. package/dist/services/logger.js +2 -2
  95. package/dist/services/metadata-collector.d.ts +1 -9
  96. package/dist/services/metadata-collector.js +71 -2
  97. package/dist/services/transform-worker-pool.d.ts +4 -14
  98. package/dist/services/transform-worker-pool.js +177 -129
  99. package/dist/services/transform-worker-types.d.ts +32 -0
  100. package/dist/services/transform-worker-types.js +14 -0
  101. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  102. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  103. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -22
  104. package/dist/tools/handlers/fetch-single.shared.js +175 -89
  105. package/dist/tools/handlers/fetch-url.tool.d.ts +7 -1
  106. package/dist/tools/handlers/fetch-url.tool.js +84 -119
  107. package/dist/tools/index.js +21 -40
  108. package/dist/tools/schemas.d.ts +1 -51
  109. package/dist/tools/schemas.js +1 -107
  110. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  111. package/dist/tools/utils/cached-markdown.js +46 -0
  112. package/dist/tools/utils/content-shaping.d.ts +4 -0
  113. package/dist/tools/utils/content-shaping.js +67 -0
  114. package/dist/tools/utils/content-transform.d.ts +5 -17
  115. package/dist/tools/utils/content-transform.js +134 -114
  116. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  117. package/dist/tools/utils/fetch-pipeline.js +57 -63
  118. package/dist/tools/utils/frontmatter.d.ts +3 -0
  119. package/dist/tools/utils/frontmatter.js +73 -0
  120. package/dist/tools/utils/inline-content.d.ts +1 -2
  121. package/dist/tools/utils/inline-content.js +4 -7
  122. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  123. package/dist/tools/utils/markdown-heuristics.js +19 -0
  124. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  125. package/dist/tools/utils/markdown-signals.js +19 -0
  126. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  127. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  128. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  129. package/dist/tools/utils/raw-markdown.js +135 -0
  130. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  131. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  132. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  133. package/dist/transformers/markdown/frontmatter.js +45 -0
  134. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  135. package/dist/transformers/markdown/noise-rule.js +80 -0
  136. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  137. package/dist/transformers/markdown/turndown-instance.js +19 -0
  138. package/dist/transformers/markdown.d.ts +2 -0
  139. package/dist/transformers/markdown.js +185 -0
  140. package/dist/transformers/markdown.transformer.js +5 -117
  141. package/dist/utils/cached-payload.d.ts +7 -0
  142. package/dist/utils/cached-payload.js +36 -0
  143. package/dist/utils/code-language-bash.d.ts +1 -0
  144. package/dist/utils/code-language-bash.js +48 -0
  145. package/dist/utils/code-language-core.d.ts +2 -0
  146. package/dist/utils/code-language-core.js +13 -0
  147. package/dist/utils/code-language-detectors.d.ts +5 -0
  148. package/dist/utils/code-language-detectors.js +142 -0
  149. package/dist/utils/code-language-helpers.d.ts +5 -0
  150. package/dist/utils/code-language-helpers.js +62 -0
  151. package/dist/utils/code-language-parsing.d.ts +5 -0
  152. package/dist/utils/code-language-parsing.js +62 -0
  153. package/dist/utils/code-language.d.ts +9 -0
  154. package/dist/utils/code-language.js +250 -46
  155. package/dist/utils/error-details.d.ts +3 -0
  156. package/dist/utils/error-details.js +12 -0
  157. package/dist/utils/error-utils.js +1 -1
  158. package/dist/utils/filename-generator.js +34 -12
  159. package/dist/utils/guards.d.ts +1 -0
  160. package/dist/utils/guards.js +3 -0
  161. package/dist/utils/header-normalizer.d.ts +0 -3
  162. package/dist/utils/header-normalizer.js +3 -3
  163. package/dist/utils/ip-address.d.ts +4 -0
  164. package/dist/utils/ip-address.js +6 -0
  165. package/dist/utils/tool-error-handler.d.ts +2 -2
  166. package/dist/utils/tool-error-handler.js +14 -46
  167. package/dist/utils/url-transformer.d.ts +7 -0
  168. package/dist/utils/url-transformer.js +147 -0
  169. package/dist/utils/url-validator.d.ts +1 -2
  170. package/dist/utils/url-validator.js +53 -114
  171. package/dist/workers/content-transform.worker.d.ts +1 -0
  172. package/dist/workers/content-transform.worker.js +40 -0
  173. package/package.json +17 -18
@@ -1,21 +1,8 @@
1
- import type { PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
2
- import type { FileDownloadInfo, ToolResponseBase } from '../../config/types/tools.js';
3
- import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
4
- import { applyInlineContentLimit } from '../utils/inline-content.js';
5
- type SharedFetchFormat = 'jsonl' | 'markdown';
1
+ import type { FetchPipelineOptions, PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
6
2
  interface SharedFetchOptions<T extends {
7
3
  content: string;
8
4
  }> {
9
5
  readonly url: string;
10
- readonly format: SharedFetchFormat;
11
- readonly extractMainContent: boolean;
12
- readonly includeMetadata: boolean;
13
- readonly maxContentLength?: number;
14
- readonly includeContentBlocks?: boolean;
15
- readonly cacheVariant?: string;
16
- readonly customHeaders?: Record<string, string>;
17
- readonly retries?: number;
18
- readonly timeout?: number;
19
6
  readonly transform: (html: string, normalizedUrl: string) => T | Promise<T>;
20
7
  readonly serialize?: (result: T) => string;
21
8
  readonly deserialize?: (cached: string) => T | undefined;
@@ -30,13 +17,15 @@ export declare function performSharedFetch<T extends {
30
17
  inlineResult: ReturnType<typeof applyInlineContentLimit>;
31
18
  }>;
32
19
  export type InlineResult = ReturnType<typeof applyInlineContentLimit>;
33
- interface DownloadContext {
34
- cacheKey: string | null;
35
- url: string;
36
- title?: string;
20
+ export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, url?: string, title?: string): ToolContentBlock[];
21
+ interface InlineContentResult {
22
+ content?: string;
23
+ contentSize: number;
24
+ resourceUri?: string;
25
+ resourceMimeType?: string;
26
+ error?: string;
27
+ truncated?: boolean;
37
28
  }
38
- export declare function getFileDownloadInfo(context: DownloadContext): FileDownloadInfo | null;
39
- export declare function getInlineErrorResponse(inlineResult: InlineResult, url: string, details?: Record<string, unknown>): ToolResponseBase | null;
40
- export declare function applyInlineResultToStructuredContent(structuredContent: Record<string, unknown>, inlineResult: InlineResult, contentKey: string): void;
41
- export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, format?: SharedFetchFormat, url?: string, title?: string): ToolContentBlock[];
29
+ declare function applyInlineContentLimit(content: string, cacheKey: string | null): InlineContentResult;
30
+ export declare function executeFetchPipeline<T>(options: FetchPipelineOptions<T>): Promise<PipelineResult<T>>;
42
31
  export {};
@@ -1,79 +1,32 @@
1
+ import { TRUNCATION_MARKER } from '../../config/formatting.js';
1
2
  import { config } from '../../config/index.js';
2
- import { buildFileDownloadInfo } from '../../utils/download-url.js';
3
+ import * as cache from '../../services/cache.js';
4
+ import { createCacheKey, toResourceUri } from '../../services/cache-keys.js';
5
+ import { fetchNormalizedUrl } from '../../services/fetcher.js';
6
+ import { logDebug } from '../../services/logger.js';
3
7
  import { generateSafeFilename } from '../../utils/filename-generator.js';
4
- import { createToolErrorResponse } from '../../utils/tool-error-handler.js';
5
- import { appendHeaderVary } from '../utils/cache-vary.js';
6
- import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
7
- import { applyInlineContentLimit } from '../utils/inline-content.js';
8
- export async function performSharedFetch(options, deps = {}) {
9
- const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
10
- const cacheNamespace = options.format === 'markdown' ? 'markdown' : 'url';
11
- const cacheVary = appendHeaderVary({
12
- format: options.format,
13
- extractMainContent: options.extractMainContent,
14
- includeMetadata: options.includeMetadata,
15
- maxContentLength: options.maxContentLength,
16
- ...(options.cacheVariant ? { variant: options.cacheVariant } : {}),
17
- ...(options.format === 'markdown'
18
- ? { includeContentBlocks: options.includeContentBlocks }
19
- : { contentBlocks: true }),
20
- }, options.customHeaders);
21
- const pipelineOptions = {
22
- url: options.url,
23
- cacheNamespace,
24
- transform: options.transform,
25
- };
26
- if (options.customHeaders !== undefined) {
27
- pipelineOptions.customHeaders = options.customHeaders;
28
- }
29
- if (options.retries !== undefined) {
30
- pipelineOptions.retries = options.retries;
31
- }
32
- if (options.timeout !== undefined) {
33
- pipelineOptions.timeout = options.timeout;
34
- }
35
- if (cacheVary !== undefined) {
36
- pipelineOptions.cacheVary = cacheVary;
37
- }
8
+ import { isRecord } from '../../utils/guards.js';
9
+ import { transformToRawUrl } from '../../utils/url-transformer.js';
10
+ import { normalizeUrl } from '../../utils/url-validator.js';
11
+ function applyOptionalPipelineSerialization(pipelineOptions, options) {
38
12
  if (options.serialize !== undefined) {
39
13
  pipelineOptions.serialize = options.serialize;
40
14
  }
41
15
  if (options.deserialize !== undefined) {
42
16
  pipelineOptions.deserialize = options.deserialize;
43
17
  }
44
- const pipeline = await executePipeline(pipelineOptions);
45
- const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null, options.format);
46
- return { pipeline, inlineResult };
47
18
  }
48
- export function getFileDownloadInfo(context) {
49
- const infoOptions = {
50
- cacheKey: context.cacheKey,
51
- url: context.url,
19
+ export async function performSharedFetch(options, deps = {}) {
20
+ const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
21
+ const pipelineOptions = {
22
+ url: options.url,
23
+ cacheNamespace: 'markdown',
24
+ transform: options.transform,
52
25
  };
53
- if (context.title !== undefined) {
54
- return buildFileDownloadInfo({
55
- ...infoOptions,
56
- title: context.title,
57
- });
58
- }
59
- return buildFileDownloadInfo(infoOptions);
60
- }
61
- export function getInlineErrorResponse(inlineResult, url, details) {
62
- if (!inlineResult.error)
63
- return null;
64
- return createToolErrorResponse(inlineResult.error, url, 'INTERNAL_ERROR', details);
65
- }
66
- export function applyInlineResultToStructuredContent(structuredContent, inlineResult, contentKey) {
67
- if (inlineResult.truncated) {
68
- structuredContent.truncated = true;
69
- }
70
- if (typeof inlineResult.content === 'string') {
71
- structuredContent[contentKey] = inlineResult.content;
72
- }
73
- if (inlineResult.resourceUri) {
74
- structuredContent.resourceUri = inlineResult.resourceUri;
75
- structuredContent.resourceMimeType = inlineResult.resourceMimeType;
76
- }
26
+ applyOptionalPipelineSerialization(pipelineOptions, options);
27
+ const pipeline = await executePipeline(pipelineOptions);
28
+ const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
29
+ return { pipeline, inlineResult };
77
30
  }
78
31
  function serializeStructuredContent(structuredContent, fromCache) {
79
32
  return JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
@@ -93,45 +46,178 @@ function buildResourceLink(inlineResult, name) {
93
46
  }
94
47
  return block;
95
48
  }
96
- function buildEmbeddedResource(content, mimeType, url, title) {
49
+ function buildEmbeddedResource(content, url, title) {
97
50
  if (!content) {
98
51
  return null;
99
52
  }
100
- // Generate a proper filename with extension
101
- const extension = mimeType === 'text/markdown' ? '.md' : '.jsonl';
102
- const filename = generateSafeFilename(url, title, undefined, extension);
103
- // Use file: URI scheme with filename for better VS Code integration
53
+ const filename = generateSafeFilename(url, title, undefined, '.md');
104
54
  const uri = `file:///${filename}`;
105
55
  return {
106
56
  type: 'resource',
107
57
  resource: {
108
58
  uri,
109
- mimeType,
59
+ mimeType: 'text/markdown',
110
60
  text: content,
111
61
  },
112
62
  };
113
63
  }
114
- export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, format, url, title) {
115
- const textBlock = {
116
- type: 'text',
117
- text: serializeStructuredContent(structuredContent, fromCache),
118
- };
119
- const blocks = [textBlock];
120
- // Embed full content in stdio mode; HTTP mode relies on inline content or links.
121
- const mimeType = format === 'markdown' ? 'text/markdown' : 'application/jsonl';
122
- const contentToEmbed = config.runtime.httpMode
123
- ? inlineResult.content
124
- : (fullContent ?? inlineResult.content);
125
- if (typeof contentToEmbed === 'string' && url) {
126
- const embeddedResource = buildEmbeddedResource(contentToEmbed, mimeType, url, title);
127
- if (embeddedResource) {
128
- blocks.push(embeddedResource);
129
- }
64
+ function resolveContentToEmbed(inlineResult, fullContent, useInlineInHttpMode) {
65
+ if (useInlineInHttpMode) {
66
+ return inlineResult.content;
67
+ }
68
+ return fullContent ?? inlineResult.content;
69
+ }
70
+ function maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title) {
71
+ if (typeof contentToEmbed !== 'string')
72
+ return;
73
+ if (!url)
74
+ return;
75
+ const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
76
+ if (embeddedResource) {
77
+ blocks.push(embeddedResource);
130
78
  }
131
- // Add resource link for HTTP mode downloads (only when truncated)
79
+ }
80
+ function maybeAppendResourceLink(blocks, inlineResult, resourceName) {
132
81
  const resourceLink = buildResourceLink(inlineResult, resourceName);
133
82
  if (resourceLink) {
134
83
  blocks.push(resourceLink);
135
84
  }
85
+ }
86
+ function buildTextBlock(structuredContent, fromCache) {
87
+ return {
88
+ type: 'text',
89
+ text: serializeStructuredContent(structuredContent, fromCache),
90
+ };
91
+ }
92
+ export function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
93
+ const blocks = [
94
+ buildTextBlock(structuredContent, fromCache),
95
+ ];
96
+ const contentToEmbed = resolveContentToEmbed(inlineResult, fullContent, config.runtime.httpMode);
97
+ maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title);
98
+ maybeAppendResourceLink(blocks, inlineResult, resourceName);
136
99
  return blocks;
137
100
  }
101
+ function applyInlineContentLimit(content, cacheKey) {
102
+ const contentSize = content.length;
103
+ const inlineLimit = config.constants.maxInlineContentChars;
104
+ if (contentSize <= inlineLimit) {
105
+ return { content, contentSize };
106
+ }
107
+ const resourceUri = resolveResourceUri(cacheKey);
108
+ if (!resourceUri) {
109
+ return buildTruncatedFallback(content, contentSize, inlineLimit);
110
+ }
111
+ return {
112
+ contentSize,
113
+ resourceUri,
114
+ resourceMimeType: 'text/markdown',
115
+ };
116
+ }
117
+ function resolveResourceUri(cacheKey) {
118
+ if (!config.cache.enabled || !cacheKey)
119
+ return null;
120
+ return toResourceUri(cacheKey);
121
+ }
122
+ function buildTruncatedFallback(content, contentSize, inlineLimit) {
123
+ const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
124
+ const truncatedContent = content.length > inlineLimit
125
+ ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
126
+ : content;
127
+ return {
128
+ content: truncatedContent,
129
+ contentSize,
130
+ truncated: true,
131
+ };
132
+ }
133
+ function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
134
+ if (!cacheKey)
135
+ return null;
136
+ const cached = cache.get(cacheKey);
137
+ if (!cached)
138
+ return null;
139
+ if (!deserialize) {
140
+ logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
141
+ return null;
142
+ }
143
+ const data = deserialize(cached.content);
144
+ if (data === undefined) {
145
+ logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
146
+ return null;
147
+ }
148
+ logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
149
+ return {
150
+ data,
151
+ fromCache: true,
152
+ url: normalizedUrl,
153
+ fetchedAt: cached.fetchedAt,
154
+ cacheKey,
155
+ };
156
+ }
157
+ function resolveNormalizedUrl(url) {
158
+ const { normalizedUrl: validatedUrl } = normalizeUrl(url);
159
+ const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
160
+ return { normalizedUrl, originalUrl: validatedUrl, transformed };
161
+ }
162
+ export async function executeFetchPipeline(options) {
163
+ const resolvedUrl = resolveNormalizedUrl(options.url);
164
+ logRawUrlTransformation(resolvedUrl);
165
+ const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
166
+ const cachedResult = attemptCacheRetrieval({
167
+ cacheKey,
168
+ deserialize: options.deserialize,
169
+ cacheNamespace: options.cacheNamespace,
170
+ normalizedUrl: resolvedUrl.normalizedUrl,
171
+ });
172
+ if (cachedResult)
173
+ return cachedResult;
174
+ logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
175
+ const fetchOptions = options.signal === undefined ? {} : { signal: options.signal };
176
+ const html = await fetchNormalizedUrl(resolvedUrl.normalizedUrl, fetchOptions);
177
+ const data = await options.transform(html, resolvedUrl.normalizedUrl);
178
+ if (cache.isEnabled()) {
179
+ persistCache({
180
+ cacheKey,
181
+ data,
182
+ serialize: options.serialize,
183
+ normalizedUrl: resolvedUrl.normalizedUrl,
184
+ });
185
+ }
186
+ return {
187
+ data,
188
+ fromCache: false,
189
+ url: resolvedUrl.normalizedUrl,
190
+ fetchedAt: new Date().toISOString(),
191
+ cacheKey,
192
+ };
193
+ }
194
+ function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
195
+ if (!cacheKey)
196
+ return;
197
+ const serializer = serialize ?? JSON.stringify;
198
+ const title = extractTitle(data);
199
+ const metadata = {
200
+ url: normalizedUrl,
201
+ ...(title === undefined ? {} : { title }),
202
+ };
203
+ cache.set(cacheKey, serializer(data), metadata);
204
+ }
205
+ function extractTitle(value) {
206
+ if (!isRecord(value))
207
+ return undefined;
208
+ const { title } = value;
209
+ return typeof title === 'string' ? title : undefined;
210
+ }
211
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
212
+ logDebug(`Cache miss due to ${reason}`, {
213
+ namespace: cacheNamespace,
214
+ url: normalizedUrl,
215
+ });
216
+ }
217
+ function logRawUrlTransformation(resolvedUrl) {
218
+ if (!resolvedUrl.transformed)
219
+ return;
220
+ logDebug('Using transformed raw content URL', {
221
+ original: resolvedUrl.originalUrl,
222
+ });
223
+ }
@@ -1,4 +1,10 @@
1
+ import type { MarkdownTransformResult } from '../../config/types/content.js';
1
2
  import type { FetchUrlInput, ToolResponseBase } from '../../config/types/tools.js';
2
3
  export declare const FETCH_URL_TOOL_NAME = "fetch-url";
3
- export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.";
4
+ export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
5
+ type MarkdownPipelineResult = MarkdownTransformResult & {
6
+ readonly content: string;
7
+ };
8
+ export declare function parseCachedMarkdownResult(cached: string): MarkdownPipelineResult | undefined;
4
9
  export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<ToolResponseBase>;
10
+ export {};
@@ -1,152 +1,117 @@
1
- import { config } from '../../config/index.js';
2
1
  import { logDebug, logError } from '../../services/logger.js';
2
+ import { isRecord } from '../../utils/guards.js';
3
3
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
4
- import { transformHtmlToJsonlAsync, transformHtmlToMarkdownWithBlocksAsync, } from '../utils/content-transform-async.js';
5
- import { applyInlineResultToStructuredContent, buildToolContentBlocks, getInlineErrorResponse, performSharedFetch, } from './fetch-single.shared.js';
4
+ import { transformHtmlToMarkdown } from '../utils/content-transform.js';
5
+ import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
6
6
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
7
- export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. Supports custom headers, retries, and content length limits.';
8
- function isRecord(value) {
9
- return value !== null && typeof value === 'object';
10
- }
11
- function deserializeJsonlTransformResult(cached) {
7
+ export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
8
+ function parseJsonRecord(input) {
12
9
  try {
13
- const parsed = JSON.parse(cached);
14
- if (!isRecord(parsed))
15
- return undefined;
16
- const { content, contentBlocks, title, truncated } = parsed;
17
- if (typeof content !== 'string')
18
- return undefined;
19
- if (typeof contentBlocks !== 'number' || !Number.isFinite(contentBlocks)) {
20
- return undefined;
21
- }
22
- if (title !== undefined && typeof title !== 'string')
23
- return undefined;
24
- if (truncated !== undefined && typeof truncated !== 'boolean') {
25
- return undefined;
26
- }
27
- const resolvedTitle = typeof title === 'string' ? title : undefined;
28
- return {
29
- content,
30
- contentBlocks,
31
- title: resolvedTitle,
32
- ...(truncated !== undefined ? { truncated } : {}),
33
- };
10
+ const parsed = JSON.parse(input);
11
+ return isRecord(parsed) ? parsed : undefined;
34
12
  }
35
13
  catch {
36
14
  return undefined;
37
15
  }
38
16
  }
39
- function resolveFetchUrlOptions(input) {
40
- const format = input.format ?? 'jsonl';
17
+ function resolveMarkdownContent(parsed) {
18
+ const { markdown } = parsed;
19
+ if (typeof markdown === 'string')
20
+ return markdown;
21
+ const { content } = parsed;
22
+ if (typeof content === 'string')
23
+ return content;
24
+ return undefined;
25
+ }
26
+ function resolveOptionalTitle(parsed) {
27
+ const { title } = parsed;
28
+ if (title === undefined)
29
+ return undefined;
30
+ return typeof title === 'string' ? title : undefined;
31
+ }
32
+ function resolveTruncatedFlag(parsed) {
33
+ const { truncated } = parsed;
34
+ return typeof truncated === 'boolean' ? truncated : false;
35
+ }
36
+ export function parseCachedMarkdownResult(cached) {
37
+ const parsed = parseJsonRecord(cached);
38
+ if (!parsed)
39
+ return undefined;
40
+ const resolvedContent = resolveMarkdownContent(parsed);
41
+ if (resolvedContent === undefined)
42
+ return undefined;
43
+ const title = resolveOptionalTitle(parsed);
44
+ if (parsed.title !== undefined && title === undefined)
45
+ return undefined;
41
46
  return {
42
- extractMainContent: input.extractMainContent ?? config.extraction.extractMainContent,
43
- includeMetadata: input.includeMetadata ?? config.extraction.includeMetadata,
44
- format,
45
- includeContentBlocks: input.includeContentBlocks ?? (format === 'markdown' ? false : true),
46
- ...(input.maxContentLength !== undefined && {
47
- maxContentLength: input.maxContentLength,
48
- }),
47
+ content: resolvedContent,
48
+ markdown: resolvedContent,
49
+ title,
50
+ truncated: resolveTruncatedFlag(parsed),
49
51
  };
50
52
  }
51
- function buildFetchUrlErrorDetails(format) {
52
- return {
53
- contentBlocks: 0,
54
- fetchedAt: new Date().toISOString(),
55
- format,
56
- cached: false,
53
+ function deserializeMarkdownResult(cached) {
54
+ return parseCachedMarkdownResult(cached);
55
+ }
56
+ function buildMarkdownTransform() {
57
+ return (html, url) => {
58
+ const result = transformHtmlToMarkdown(html, url, {
59
+ includeMetadata: true,
60
+ });
61
+ return { ...result, content: result.markdown };
57
62
  };
58
63
  }
59
- function buildFetchUrlTransform(options) {
60
- return async (html, url) => options.format === 'markdown'
61
- ? transformHtmlToMarkdownWithBlocksAsync(html, url, {
62
- extractMainContent: options.extractMainContent,
63
- includeMetadata: options.includeMetadata,
64
- ...(options.maxContentLength !== undefined && {
65
- maxContentLength: options.maxContentLength,
66
- }),
67
- includeContentBlocks: options.includeContentBlocks,
68
- })
69
- : transformHtmlToJsonlAsync(html, url, options);
64
+ function serializeMarkdownResult(result) {
65
+ return JSON.stringify({
66
+ markdown: result.markdown,
67
+ title: result.title,
68
+ truncated: result.truncated,
69
+ });
70
70
  }
71
- function buildFetchUrlStructuredContent(format, pipeline, inlineResult) {
72
- const structuredContent = {
71
+ function buildStructuredContent(pipeline, inlineResult) {
72
+ return {
73
73
  url: pipeline.url,
74
74
  title: pipeline.data.title,
75
- contentBlocks: pipeline.data.contentBlocks,
76
- fetchedAt: pipeline.fetchedAt,
77
- format,
78
- contentSize: inlineResult.contentSize,
79
- cached: pipeline.fromCache,
75
+ markdown: inlineResult.content,
80
76
  };
81
- if (pipeline.data.truncated) {
82
- structuredContent.truncated = true;
83
- }
84
- if (inlineResult.truncated) {
85
- structuredContent.truncated = true;
86
- }
87
- applyInlineResultToStructuredContent(structuredContent, inlineResult, 'content');
88
- return structuredContent;
89
77
  }
90
- function logFetchUrlStart(url, options) {
91
- logDebug('Fetching URL', {
92
- url,
93
- extractMainContent: options.extractMainContent,
94
- includeMetadata: options.includeMetadata,
95
- format: options.format,
96
- includeContentBlocks: options.includeContentBlocks,
97
- });
78
+ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
79
+ return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
80
+ }
81
+ function logFetchStart(url) {
82
+ logDebug('Fetching URL', { url });
98
83
  }
99
- async function fetchUrlPipeline(url, input, options) {
100
- const sharedOptions = {
84
+ async function fetchPipeline(url) {
85
+ return performSharedFetch({
101
86
  url,
102
- format: options.format,
103
- extractMainContent: options.extractMainContent,
104
- includeMetadata: options.includeMetadata,
105
- includeContentBlocks: options.includeContentBlocks,
106
- ...(options.maxContentLength !== undefined && {
107
- maxContentLength: options.maxContentLength,
108
- }),
109
- ...(input.customHeaders !== undefined && {
110
- customHeaders: input.customHeaders,
111
- }),
112
- ...(input.retries !== undefined && { retries: input.retries }),
113
- ...(input.timeout !== undefined && { timeout: input.timeout }),
114
- ...(options.format === 'markdown' && {
115
- cacheVariant: 'markdown-with-blocks',
116
- }),
117
- transform: buildFetchUrlTransform(options),
118
- deserialize: deserializeJsonlTransformResult,
119
- };
120
- return performSharedFetch(sharedOptions);
87
+ transform: buildMarkdownTransform(),
88
+ serialize: serializeMarkdownResult,
89
+ deserialize: deserializeMarkdownResult,
90
+ });
121
91
  }
122
- function buildFetchUrlResponse(pipeline, inlineResult, format) {
123
- const structuredContent = buildFetchUrlStructuredContent(format, pipeline, inlineResult);
92
+ function buildResponse(pipeline, inlineResult) {
93
+ const structuredContent = buildStructuredContent(pipeline, inlineResult);
94
+ const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
124
95
  return {
125
- content: buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched content', pipeline.cacheKey, pipeline.data.content, format, pipeline.url, pipeline.data.title),
96
+ content,
126
97
  structuredContent,
127
98
  };
128
99
  }
129
100
  export async function fetchUrlToolHandler(input) {
130
- try {
131
- return await executeFetchUrl(input);
132
- }
133
- catch (error) {
101
+ return executeFetch(input).catch((error) => {
134
102
  logError('fetch-url tool error', error instanceof Error ? error : undefined);
135
- const errorDetails = buildFetchUrlErrorDetails(input.format ?? 'jsonl');
136
- return handleToolError(error, input.url, 'Failed to fetch URL', errorDetails);
137
- }
103
+ return handleToolError(error, input.url, 'Failed to fetch URL');
104
+ });
138
105
  }
139
- async function executeFetchUrl(input) {
106
+ async function executeFetch(input) {
140
107
  const { url } = input;
141
- const format = input.format ?? 'jsonl';
142
108
  if (!url) {
143
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR', buildFetchUrlErrorDetails(format));
109
+ return createToolErrorResponse('URL is required', '');
110
+ }
111
+ logFetchStart(url);
112
+ const { pipeline, inlineResult } = await fetchPipeline(url);
113
+ if (inlineResult.error) {
114
+ return createToolErrorResponse(inlineResult.error, url);
144
115
  }
145
- const options = resolveFetchUrlOptions(input);
146
- logFetchUrlStart(url, options);
147
- const { pipeline, inlineResult } = await fetchUrlPipeline(url, input, options);
148
- const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchUrlErrorDetails(options.format));
149
- if (inlineError)
150
- return inlineError;
151
- return buildFetchUrlResponse(pipeline, inlineResult, options.format);
116
+ return buildResponse(pipeline, inlineResult);
152
117
  }
@@ -1,44 +1,25 @@
1
- import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
2
1
  import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
3
- import { fetchMarkdownInputSchema, fetchMarkdownOutputSchema, fetchUrlInputSchema, fetchUrlOutputSchema, } from './schemas.js';
4
- const TOOL_DEFINITIONS = [
5
- {
6
- name: FETCH_URL_TOOL_NAME,
7
- title: 'Fetch URL',
8
- description: FETCH_URL_TOOL_DESCRIPTION,
9
- inputSchema: fetchUrlInputSchema,
10
- outputSchema: fetchUrlOutputSchema,
11
- handler: fetchUrlToolHandler,
12
- annotations: {
13
- readOnlyHint: true,
14
- destructiveHint: false,
15
- idempotentHint: true,
16
- openWorldHint: true,
17
- },
2
+ import { fetchUrlInputSchema, fetchUrlOutputSchema } from './schemas.js';
3
+ const TOOL_DEFINITION = {
4
+ name: FETCH_URL_TOOL_NAME,
5
+ title: 'Fetch URL',
6
+ description: FETCH_URL_TOOL_DESCRIPTION,
7
+ inputSchema: fetchUrlInputSchema,
8
+ outputSchema: fetchUrlOutputSchema,
9
+ handler: fetchUrlToolHandler,
10
+ annotations: {
11
+ readOnlyHint: true,
12
+ destructiveHint: false,
13
+ idempotentHint: true,
14
+ openWorldHint: true,
18
15
  },
19
- {
20
- name: FETCH_MARKDOWN_TOOL_NAME,
21
- title: 'Fetch Markdown',
22
- description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
23
- inputSchema: fetchMarkdownInputSchema,
24
- outputSchema: fetchMarkdownOutputSchema,
25
- handler: fetchMarkdownToolHandler,
26
- annotations: {
27
- readOnlyHint: true,
28
- destructiveHint: false,
29
- idempotentHint: true,
30
- openWorldHint: true,
31
- },
32
- },
33
- ];
16
+ };
34
17
  export function registerTools(server) {
35
- for (const tool of TOOL_DEFINITIONS) {
36
- server.registerTool(tool.name, {
37
- title: tool.title,
38
- description: tool.description,
39
- inputSchema: tool.inputSchema,
40
- outputSchema: tool.outputSchema,
41
- annotations: tool.annotations,
42
- }, tool.handler);
43
- }
18
+ server.registerTool(TOOL_DEFINITION.name, {
19
+ title: TOOL_DEFINITION.title,
20
+ description: TOOL_DEFINITION.description,
21
+ inputSchema: TOOL_DEFINITION.inputSchema,
22
+ outputSchema: TOOL_DEFINITION.outputSchema,
23
+ annotations: TOOL_DEFINITION.annotations,
24
+ }, TOOL_DEFINITION.handler);
44
25
  }