@j0hanz/superfetch 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +60 -45
  2. package/dist/config/formatting.d.ts +1 -1
  3. package/dist/config/types/content.d.ts +3 -3
  4. package/dist/config/types/runtime.d.ts +1 -1
  5. package/dist/config/types/tools.d.ts +12 -12
  6. package/dist/http/cors.js +23 -23
  7. package/dist/http/download-routes.js +11 -5
  8. package/dist/http/mcp-routes.js +2 -13
  9. package/dist/http/mcp-validation.js +1 -1
  10. package/dist/http/server-middleware.js +5 -3
  11. package/dist/http/server.js +2 -0
  12. package/dist/index.js +5 -0
  13. package/dist/middleware/error-handler.js +1 -1
  14. package/dist/resources/cached-content.js +8 -4
  15. package/dist/server.js +2 -0
  16. package/dist/services/cache.d.ts +2 -1
  17. package/dist/services/cache.js +23 -7
  18. package/dist/services/context.d.ts +4 -4
  19. package/dist/services/context.js +11 -1
  20. package/dist/services/extractor.js +26 -21
  21. package/dist/services/fetcher/agents.js +55 -1
  22. package/dist/services/fetcher/interceptors.d.ts +22 -0
  23. package/dist/services/fetcher/interceptors.js +57 -26
  24. package/dist/services/fetcher/response.d.ts +1 -1
  25. package/dist/services/fetcher/response.js +37 -16
  26. package/dist/services/fetcher.d.ts +1 -1
  27. package/dist/services/fetcher.js +9 -8
  28. package/dist/services/metadata-collector.d.ts +10 -0
  29. package/dist/services/metadata-collector.js +11 -0
  30. package/dist/services/parser.d.ts +5 -1
  31. package/dist/services/parser.js +82 -11
  32. package/dist/services/transform-worker-pool.d.ts +14 -0
  33. package/dist/services/transform-worker-pool.js +167 -0
  34. package/dist/tools/handlers/fetch-markdown.tool.d.ts +9 -1
  35. package/dist/tools/handlers/fetch-markdown.tool.js +58 -30
  36. package/dist/tools/handlers/fetch-single.shared.d.ts +8 -3
  37. package/dist/tools/handlers/fetch-single.shared.js +42 -17
  38. package/dist/tools/handlers/fetch-url.tool.js +46 -16
  39. package/dist/tools/index.js +13 -0
  40. package/dist/tools/schemas.d.ts +19 -16
  41. package/dist/tools/schemas.js +25 -4
  42. package/dist/tools/utils/common.js +20 -16
  43. package/dist/tools/utils/content-transform-async.d.ts +6 -0
  44. package/dist/tools/utils/content-transform-async.js +33 -0
  45. package/dist/tools/utils/content-transform.d.ts +4 -1
  46. package/dist/tools/utils/content-transform.js +37 -3
  47. package/dist/tools/utils/fetch-pipeline.js +26 -15
  48. package/dist/utils/content-cleaner.d.ts +1 -1
  49. package/dist/utils/download-url.d.ts +9 -1
  50. package/dist/utils/download-url.js +9 -6
  51. package/dist/utils/tool-error-handler.d.ts +2 -2
  52. package/dist/utils/tool-error-handler.js +7 -7
  53. package/dist/utils/url-validator.d.ts +5 -0
  54. package/dist/utils/url-validator.js +45 -3
  55. package/dist/workers/transform-worker.d.ts +1 -0
  56. package/dist/workers/transform-worker.js +50 -0
  57. package/package.json +4 -6
@@ -1,6 +1,6 @@
1
1
  import { z } from 'zod';
2
2
  export declare const fetchUrlInputSchema: z.ZodObject<{
3
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
3
+ customHeaders: z.ZodOptional<z.ZodEffects<z.ZodRecord<z.ZodString, z.ZodString>, Record<string, string>, Record<string, string>>>;
4
4
  timeout: z.ZodDefault<z.ZodNumber>;
5
5
  retries: z.ZodDefault<z.ZodNumber>;
6
6
  } & {
@@ -11,6 +11,7 @@ export declare const fetchUrlInputSchema: z.ZodObject<{
11
11
  maxContentLength: z.ZodOptional<z.ZodNumber>;
12
12
  } & {
13
13
  format: z.ZodDefault<z.ZodEnum<["jsonl", "markdown"]>>;
14
+ includeContentBlocks: z.ZodOptional<z.ZodBoolean>;
14
15
  }, "strict", z.ZodTypeAny, {
15
16
  url: string;
16
17
  extractMainContent: boolean;
@@ -18,20 +19,22 @@ export declare const fetchUrlInputSchema: z.ZodObject<{
18
19
  retries: number;
19
20
  format: "jsonl" | "markdown";
20
21
  timeout: number;
21
- customHeaders?: Record<string, string> | undefined;
22
22
  maxContentLength?: number | undefined;
23
+ includeContentBlocks?: boolean | undefined;
24
+ customHeaders?: Record<string, string> | undefined;
23
25
  }, {
24
26
  url: string;
25
- customHeaders?: Record<string, string> | undefined;
26
27
  extractMainContent?: boolean | undefined;
27
28
  includeMetadata?: boolean | undefined;
29
+ maxContentLength?: number | undefined;
28
30
  retries?: number | undefined;
29
31
  format?: "jsonl" | "markdown" | undefined;
30
- maxContentLength?: number | undefined;
32
+ includeContentBlocks?: boolean | undefined;
31
33
  timeout?: number | undefined;
34
+ customHeaders?: Record<string, string> | undefined;
32
35
  }>;
33
36
  export declare const fetchMarkdownInputSchema: z.ZodObject<{
34
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
37
+ customHeaders: z.ZodOptional<z.ZodEffects<z.ZodRecord<z.ZodString, z.ZodString>, Record<string, string>, Record<string, string>>>;
35
38
  timeout: z.ZodDefault<z.ZodNumber>;
36
39
  retries: z.ZodDefault<z.ZodNumber>;
37
40
  } & {
@@ -46,16 +49,16 @@ export declare const fetchMarkdownInputSchema: z.ZodObject<{
46
49
  includeMetadata: boolean;
47
50
  retries: number;
48
51
  timeout: number;
49
- customHeaders?: Record<string, string> | undefined;
50
52
  maxContentLength?: number | undefined;
53
+ customHeaders?: Record<string, string> | undefined;
51
54
  }, {
52
55
  url: string;
53
- customHeaders?: Record<string, string> | undefined;
54
56
  extractMainContent?: boolean | undefined;
55
57
  includeMetadata?: boolean | undefined;
56
- retries?: number | undefined;
57
58
  maxContentLength?: number | undefined;
59
+ retries?: number | undefined;
58
60
  timeout?: number | undefined;
61
+ customHeaders?: Record<string, string> | undefined;
59
62
  }>;
60
63
  export declare const fetchUrlOutputSchema: z.ZodObject<{
61
64
  url: z.ZodString;
@@ -74,30 +77,30 @@ export declare const fetchUrlOutputSchema: z.ZodObject<{
74
77
  errorCode: z.ZodOptional<z.ZodString>;
75
78
  }, "strict", z.ZodTypeAny, {
76
79
  url: string;
77
- contentBlocks: number;
78
80
  fetchedAt: string;
79
81
  format: "jsonl" | "markdown";
82
+ contentBlocks: number;
80
83
  cached: boolean;
81
84
  error?: string | undefined;
82
- content?: string | undefined;
83
85
  title?: string | undefined;
84
86
  truncated?: boolean | undefined;
85
87
  resourceUri?: string | undefined;
86
88
  resourceMimeType?: string | undefined;
89
+ content?: string | undefined;
87
90
  contentSize?: number | undefined;
88
91
  errorCode?: string | undefined;
89
92
  }, {
90
93
  url: string;
91
- contentBlocks: number;
92
94
  fetchedAt: string;
93
95
  format: "jsonl" | "markdown";
96
+ contentBlocks: number;
94
97
  cached: boolean;
95
98
  error?: string | undefined;
96
- content?: string | undefined;
97
99
  title?: string | undefined;
98
100
  truncated?: boolean | undefined;
99
101
  resourceUri?: string | undefined;
100
102
  resourceMimeType?: string | undefined;
103
+ content?: string | undefined;
101
104
  contentSize?: number | undefined;
102
105
  errorCode?: string | undefined;
103
106
  }>;
@@ -111,13 +114,13 @@ export declare const fetchMarkdownOutputSchema: z.ZodObject<{
111
114
  fileName: z.ZodString;
112
115
  expiresAt: z.ZodString;
113
116
  }, "strip", z.ZodTypeAny, {
114
- fileName: string;
115
117
  expiresAt: string;
116
118
  downloadUrl: string;
117
- }, {
118
119
  fileName: string;
120
+ }, {
119
121
  expiresAt: string;
120
122
  downloadUrl: string;
123
+ fileName: string;
121
124
  }>>;
122
125
  } & {
123
126
  contentSize: z.ZodOptional<z.ZodNumber>;
@@ -139,9 +142,9 @@ export declare const fetchMarkdownOutputSchema: z.ZodObject<{
139
142
  resourceMimeType?: string | undefined;
140
143
  contentSize?: number | undefined;
141
144
  file?: {
142
- fileName: string;
143
145
  expiresAt: string;
144
146
  downloadUrl: string;
147
+ fileName: string;
145
148
  } | undefined;
146
149
  errorCode?: string | undefined;
147
150
  }, {
@@ -156,9 +159,9 @@ export declare const fetchMarkdownOutputSchema: z.ZodObject<{
156
159
  resourceMimeType?: string | undefined;
157
160
  contentSize?: number | undefined;
158
161
  file?: {
159
- fileName: string;
160
162
  expiresAt: string;
161
163
  downloadUrl: string;
164
+ fileName: string;
162
165
  } | undefined;
163
166
  errorCode?: string | undefined;
164
167
  }>;
@@ -1,8 +1,16 @@
1
1
  import { z } from 'zod';
2
2
  import { config } from '../config/index.js';
3
+ const MAX_HEADER_NAME_LENGTH = 128;
4
+ const MAX_HEADER_VALUE_LENGTH = 2048;
5
+ const MAX_HEADER_COUNT = 50;
6
+ const MAX_CONTENT_LENGTH = config.constants.maxContentSize;
7
+ const customHeadersSchema = z
8
+ .record(z.string().max(MAX_HEADER_NAME_LENGTH), z.string().max(MAX_HEADER_VALUE_LENGTH))
9
+ .refine((headers) => Object.keys(headers).length <= MAX_HEADER_COUNT, {
10
+ message: `customHeaders must have at most ${MAX_HEADER_COUNT} entries`,
11
+ });
3
12
  const requestOptionsSchema = z.object({
4
- customHeaders: z
5
- .record(z.string())
13
+ customHeaders: customHeadersSchema
6
14
  .optional()
7
15
  .describe('Custom HTTP headers for the request'),
8
16
  timeout: z
@@ -30,6 +38,7 @@ const extractionOptionsSchema = z.object({
30
38
  maxContentLength: z
31
39
  .number()
32
40
  .positive()
41
+ .max(MAX_CONTENT_LENGTH)
33
42
  .optional()
34
43
  .describe('Maximum content length in characters'),
35
44
  });
@@ -38,6 +47,10 @@ const formatOptionsSchema = z.object({
38
47
  .enum(['jsonl', 'markdown'])
39
48
  .default('jsonl')
40
49
  .describe('Output format'),
50
+ includeContentBlocks: z
51
+ .boolean()
52
+ .optional()
53
+ .describe('Include content block counts when format=markdown'),
41
54
  });
42
55
  const resourceFieldsSchema = z.object({
43
56
  contentSize: z.number().optional().describe('Content length in characters'),
@@ -64,14 +77,22 @@ const fileDownloadSchema = z.object({
64
77
  });
65
78
  export const fetchUrlInputSchema = requestOptionsSchema
66
79
  .extend({
67
- url: z.string().min(1).describe('The URL to fetch'),
80
+ url: z
81
+ .string()
82
+ .min(1)
83
+ .max(config.constants.maxUrlLength)
84
+ .describe('The URL to fetch'),
68
85
  })
69
86
  .merge(extractionOptionsSchema)
70
87
  .merge(formatOptionsSchema)
71
88
  .strict();
72
89
  export const fetchMarkdownInputSchema = requestOptionsSchema
73
90
  .extend({
74
- url: z.string().min(1).describe('The URL to fetch'),
91
+ url: z
92
+ .string()
93
+ .min(1)
94
+ .max(config.constants.maxUrlLength)
95
+ .describe('The URL to fetch'),
75
96
  })
76
97
  .merge(extractionOptionsSchema)
77
98
  .strict();
@@ -6,22 +6,26 @@ export function createContentMetadataBlock(url, article, extractedMeta, shouldEx
6
6
  if (!includeMetadata)
7
7
  return undefined;
8
8
  const now = new Date().toISOString();
9
- return shouldExtractFromArticle && article
10
- ? {
11
- type: 'metadata',
12
- title: article.title,
13
- author: article.byline,
14
- url,
15
- fetchedAt: now,
16
- }
17
- : {
18
- type: 'metadata',
19
- title: extractedMeta.title,
20
- description: extractedMeta.description,
21
- author: extractedMeta.author,
22
- url,
23
- fetchedAt: now,
24
- };
9
+ const metadata = {
10
+ type: 'metadata',
11
+ url,
12
+ fetchedAt: now,
13
+ };
14
+ if (shouldExtractFromArticle && article) {
15
+ if (article.title !== undefined)
16
+ metadata.title = article.title;
17
+ if (article.byline !== undefined)
18
+ metadata.author = article.byline;
19
+ return metadata;
20
+ }
21
+ if (extractedMeta.title !== undefined)
22
+ metadata.title = extractedMeta.title;
23
+ if (extractedMeta.description !== undefined) {
24
+ metadata.description = extractedMeta.description;
25
+ }
26
+ if (extractedMeta.author !== undefined)
27
+ metadata.author = extractedMeta.author;
28
+ return metadata;
25
29
  }
26
30
  export function truncateContent(content, maxLength, suffix = TRUNCATION_MARKER) {
27
31
  if (maxLength === undefined ||
@@ -0,0 +1,6 @@
1
+ import type { JsonlTransformResult, MarkdownTransformResult, TransformOptions } from '../../config/types/content.js';
2
+ export declare function transformHtmlToJsonlAsync(html: string, url: string, options: TransformOptions): Promise<JsonlTransformResult>;
3
+ export declare function transformHtmlToMarkdownAsync(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
4
+ export declare function transformHtmlToMarkdownWithBlocksAsync(html: string, url: string, options: TransformOptions & {
5
+ includeContentBlocks?: boolean;
6
+ }): Promise<JsonlTransformResult>;
@@ -0,0 +1,33 @@
1
+ import { logWarn } from '../../services/logger.js';
2
+ import { runTransformInWorker, } from '../../services/transform-worker-pool.js';
3
+ import { transformHtmlToJsonl, transformHtmlToMarkdown, transformHtmlToMarkdownWithBlocks, } from './content-transform.js';
4
+ async function runOrFallback(job, fallback) {
5
+ try {
6
+ const result = await runTransformInWorker(job);
7
+ if (result)
8
+ return result;
9
+ }
10
+ catch (error) {
11
+ logWarn('Transform worker unavailable; using main thread', {
12
+ error: error instanceof Error ? error.message : String(error),
13
+ });
14
+ }
15
+ return fallback();
16
+ }
17
+ export async function transformHtmlToJsonlAsync(html, url, options) {
18
+ const result = await runOrFallback({ mode: 'jsonl', html, url, options }, () => transformHtmlToJsonl(html, url, options));
19
+ return result;
20
+ }
21
+ export async function transformHtmlToMarkdownAsync(html, url, options) {
22
+ const result = await runOrFallback({ mode: 'markdown', html, url, options }, () => transformHtmlToMarkdown(html, url, options));
23
+ return result;
24
+ }
25
+ export async function transformHtmlToMarkdownWithBlocksAsync(html, url, options) {
26
+ const result = await runOrFallback({
27
+ mode: 'markdown-blocks',
28
+ html,
29
+ url,
30
+ options,
31
+ }, () => transformHtmlToMarkdownWithBlocks(html, url, options));
32
+ return result;
33
+ }
@@ -8,7 +8,10 @@ interface ContentLengthOptions {
8
8
  }
9
9
  interface MarkdownOptions extends ExtractionOptions, ContentLengthOptions {
10
10
  }
11
+ interface MarkdownWithBlocksOptions extends ExtractionOptions, ContentLengthOptions {
12
+ readonly includeContentBlocks?: boolean;
13
+ }
11
14
  export declare function transformHtmlToJsonl(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
12
15
  export declare function transformHtmlToMarkdown(html: string, url: string, options: MarkdownOptions): MarkdownTransformResult;
13
- export declare function transformHtmlToMarkdownWithBlocks(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
16
+ export declare function transformHtmlToMarkdownWithBlocks(html: string, url: string, options: MarkdownWithBlocksOptions): JsonlTransformResult;
14
17
  export {};
@@ -1,6 +1,6 @@
1
1
  import { TRUNCATION_MARKER } from '../../config/formatting.js';
2
2
  import { extractContent } from '../../services/extractor.js';
3
- import { parseHtml } from '../../services/parser.js';
3
+ import { parseHtml, parseHtmlWithMetadata } from '../../services/parser.js';
4
4
  import { sanitizeText } from '../../utils/sanitizer.js';
5
5
  import { toJsonl } from '../../transformers/jsonl.transformer.js';
6
6
  import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
@@ -56,7 +56,10 @@ function decodeHtmlEntities(value) {
56
56
  }
57
57
  function buildJsonlPayload(context, maxContentLength) {
58
58
  const contentBlocks = parseHtml(context.sourceHtml);
59
- const { content, truncated } = truncateContent(toJsonl(contentBlocks, context.metadata), maxContentLength);
59
+ return buildJsonlPayloadFromBlocks(contentBlocks, context.metadata, maxContentLength);
60
+ }
61
+ function buildJsonlPayloadFromBlocks(contentBlocks, metadata, maxContentLength) {
62
+ const { content, truncated } = truncateContent(toJsonl(contentBlocks, metadata), maxContentLength);
60
63
  return {
61
64
  content,
62
65
  contentBlocks: contentBlocks.length,
@@ -69,6 +72,17 @@ function buildMarkdownPayload(context, maxContentLength) {
69
72
  return { content, truncated };
70
73
  }
71
74
  export function transformHtmlToJsonl(html, url, options) {
75
+ if (!options.extractMainContent && options.includeMetadata) {
76
+ const parsed = parseHtmlWithMetadata(html);
77
+ const metadataBlock = createContentMetadataBlock(url, null, parsed.metadata, false, true);
78
+ const { content, contentBlocks, truncated } = buildJsonlPayloadFromBlocks(parsed.blocks, metadataBlock, options.maxContentLength);
79
+ return {
80
+ content,
81
+ contentBlocks,
82
+ title: parsed.metadata.title,
83
+ ...(truncated && { truncated }),
84
+ };
85
+ }
72
86
  const context = resolveContentSource(html, url, options);
73
87
  const { content, contentBlocks, truncated } = buildJsonlPayload(context, options.maxContentLength);
74
88
  return {
@@ -88,8 +102,28 @@ export function transformHtmlToMarkdown(html, url, options) {
88
102
  };
89
103
  }
90
104
  export function transformHtmlToMarkdownWithBlocks(html, url, options) {
105
+ const includeContentBlocks = options.includeContentBlocks ?? true;
106
+ if (includeContentBlocks &&
107
+ !options.extractMainContent &&
108
+ options.includeMetadata) {
109
+ const parsed = parseHtmlWithMetadata(html);
110
+ const context = {
111
+ sourceHtml: html,
112
+ title: parsed.metadata.title,
113
+ metadata: createContentMetadataBlock(url, null, parsed.metadata, false, true),
114
+ };
115
+ const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
116
+ return {
117
+ content,
118
+ contentBlocks: parsed.blocks.length,
119
+ title: context.title,
120
+ ...(truncated && { truncated }),
121
+ };
122
+ }
91
123
  const context = resolveContentSource(html, url, options);
92
- const contentBlocks = parseHtml(context.sourceHtml);
124
+ const contentBlocks = includeContentBlocks
125
+ ? parseHtml(context.sourceHtml)
126
+ : [];
93
127
  const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
94
128
  return {
95
129
  content,
@@ -1,7 +1,7 @@
1
1
  import * as cache from '../../services/cache.js';
2
- import { fetchUrlWithRetry } from '../../services/fetcher.js';
2
+ import { fetchNormalizedUrlWithRetry } from '../../services/fetcher.js';
3
3
  import { logDebug } from '../../services/logger.js';
4
- import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
4
+ import { assertResolvedAddressesAllowed, normalizeUrl, } from '../../utils/url-validator.js';
5
5
  import { appendHeaderVary } from './cache-vary.js';
6
6
  function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalizedUrl) {
7
7
  if (!cacheKey)
@@ -42,16 +42,19 @@ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalized
42
42
  * @returns Promise resolving to the pipeline result
43
43
  */
44
44
  export async function executeFetchPipeline(options) {
45
- const normalizedUrl = await validateAndNormalizeUrl(options.url);
45
+ const { normalizedUrl, hostname } = normalizeUrl(options.url);
46
46
  const cacheKey = resolveCacheKey(options, normalizedUrl);
47
47
  const cachedResult = attemptCacheRetrieval(cacheKey, options.deserialize, options.cacheNamespace, normalizedUrl);
48
48
  if (cachedResult)
49
49
  return cachedResult;
50
+ await assertResolvedAddressesAllowed(hostname);
50
51
  const fetchOptions = buildFetchOptions(options);
51
52
  logDebug('Fetching URL', { url: normalizedUrl, retries: options.retries });
52
- const html = await fetchUrlWithRetry(normalizedUrl, fetchOptions, options.retries);
53
- const data = options.transform(html, normalizedUrl);
54
- persistCache(cacheKey, data, options.serialize, normalizedUrl);
53
+ const html = await fetchNormalizedUrlWithRetry(normalizedUrl, fetchOptions, options.retries);
54
+ const data = await options.transform(html, normalizedUrl);
55
+ if (cache.isEnabled()) {
56
+ persistCache(cacheKey, data, options.serialize, normalizedUrl);
57
+ }
55
58
  return buildPipelineResult(normalizedUrl, data, cacheKey);
56
59
  }
57
60
  function resolveCacheKey(options, normalizedUrl) {
@@ -59,20 +62,28 @@ function resolveCacheKey(options, normalizedUrl) {
59
62
  return cache.createCacheKey(options.cacheNamespace, normalizedUrl, cacheVary);
60
63
  }
61
64
  function buildFetchOptions(options) {
62
- return {
63
- customHeaders: options.customHeaders,
64
- signal: options.signal,
65
- timeout: options.timeout,
66
- };
65
+ const fetchOptions = {};
66
+ if (options.customHeaders !== undefined) {
67
+ fetchOptions.customHeaders = options.customHeaders;
68
+ }
69
+ if (options.signal !== undefined) {
70
+ fetchOptions.signal = options.signal;
71
+ }
72
+ if (options.timeout !== undefined) {
73
+ fetchOptions.timeout = options.timeout;
74
+ }
75
+ return fetchOptions;
67
76
  }
68
77
  function persistCache(cacheKey, data, serialize, normalizedUrl) {
69
78
  if (!cacheKey)
70
79
  return;
71
80
  const serializer = serialize ?? JSON.stringify;
72
- cache.set(cacheKey, serializer(data), {
73
- url: normalizedUrl,
74
- title: extractTitle(data),
75
- });
81
+ const metadata = { url: normalizedUrl };
82
+ const title = extractTitle(data);
83
+ if (title !== undefined) {
84
+ metadata.title = title;
85
+ }
86
+ cache.set(cacheKey, serializer(data), metadata);
76
87
  }
77
88
  function extractTitle(value) {
78
89
  if (!value || typeof value !== 'object')
@@ -1,5 +1,5 @@
1
1
  export declare function cleanParagraph(text: string): string | null;
2
2
  export declare function cleanHeading(text: string): string | null;
3
- export declare function cleanListItems(items: string[]): string[];
3
+ export declare function cleanListItems(items: readonly string[]): readonly string[];
4
4
  export declare function cleanCodeBlock(code: string): string | null;
5
5
  export declare function removeInlineTimestamps(text: string): string;
@@ -1,8 +1,16 @@
1
+ import { config } from '../config/index.js';
1
2
  import type { FileDownloadInfo } from '../config/types/tools.js';
3
+ import * as cache from '../services/cache.js';
4
+ import { generateSafeFilename } from './filename-generator.js';
2
5
  interface DownloadInfoOptions {
3
6
  cacheKey: string | null;
4
7
  url: string;
5
8
  title?: string;
6
9
  }
7
- export declare function buildFileDownloadInfo(options: DownloadInfoOptions): FileDownloadInfo | null;
10
+ interface DownloadInfoDeps {
11
+ readonly config?: typeof config;
12
+ readonly cache?: Pick<typeof cache, 'get' | 'parseCacheKey'>;
13
+ readonly generateSafeFilename?: typeof generateSafeFilename;
14
+ }
15
+ export declare function buildFileDownloadInfo(options: DownloadInfoOptions, deps?: DownloadInfoDeps): FileDownloadInfo | null;
8
16
  export {};
@@ -1,22 +1,25 @@
1
1
  import { config } from '../config/index.js';
2
2
  import * as cache from '../services/cache.js';
3
3
  import { generateSafeFilename } from './filename-generator.js';
4
- export function buildFileDownloadInfo(options) {
5
- if (!config.runtime.httpMode) {
4
+ export function buildFileDownloadInfo(options, deps = {}) {
5
+ const resolvedConfig = deps.config ?? config;
6
+ const resolvedCache = deps.cache ?? cache;
7
+ const resolveFilename = deps.generateSafeFilename ?? generateSafeFilename;
8
+ if (!resolvedConfig.runtime.httpMode) {
6
9
  return null;
7
10
  }
8
- if (!config.cache.enabled || !options.cacheKey) {
11
+ if (!resolvedConfig.cache.enabled || !options.cacheKey) {
9
12
  return null;
10
13
  }
11
- const parts = cache.parseCacheKey(options.cacheKey);
14
+ const parts = resolvedCache.parseCacheKey(options.cacheKey);
12
15
  if (!parts)
13
16
  return null;
14
- const cacheEntry = cache.get(options.cacheKey);
17
+ const cacheEntry = resolvedCache.get(options.cacheKey);
15
18
  if (!cacheEntry)
16
19
  return null;
17
20
  const { expiresAt, title, url } = cacheEntry;
18
21
  const downloadUrl = buildDownloadUrl(parts.namespace, parts.urlHash);
19
- const fileName = generateSafeFilename(url, title ?? options.title, parts.urlHash, resolveExtension(parts.namespace));
22
+ const fileName = resolveFilename(url, title ?? options.title, parts.urlHash, resolveExtension(parts.namespace));
20
23
  return { downloadUrl, fileName, expiresAt };
21
24
  }
22
25
  function buildDownloadUrl(namespace, hash) {
@@ -1,3 +1,3 @@
1
1
  import type { ToolErrorResponse } from '../config/types/tools.js';
2
- export declare function createToolErrorResponse(message: string, url: string, code: string): ToolErrorResponse;
3
- export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
2
+ export declare function createToolErrorResponse(message: string, url: string, code: string, details?: Record<string, unknown>): ToolErrorResponse;
3
+ export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string, details?: Record<string, unknown>): ToolErrorResponse;
@@ -22,12 +22,12 @@ function normalizeToolErrorCode(code) {
22
22
  return String(ErrorCode.InternalError);
23
23
  return MCP_ERROR_CODE_MAP[code] ?? code;
24
24
  }
25
- export function createToolErrorResponse(message, url, code) {
25
+ export function createToolErrorResponse(message, url, code, details = {}) {
26
26
  const structuredContent = {
27
+ ...details,
27
28
  error: message,
28
29
  url,
29
30
  errorCode: normalizeToolErrorCode(code),
30
- errorType: code,
31
31
  };
32
32
  return {
33
33
  content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
@@ -42,19 +42,19 @@ function formatErrorMessage(baseMessage, error, fallback) {
42
42
  }
43
43
  return message;
44
44
  }
45
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
45
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed', details = {}) {
46
46
  if (isValidationError(error)) {
47
- return createToolErrorResponse(error.message, url, 'VALIDATION_ERROR');
47
+ return createToolErrorResponse(error.message, url, 'VALIDATION_ERROR', details);
48
48
  }
49
49
  if (error instanceof FetchError) {
50
50
  const message = formatErrorMessage(error.message, error);
51
- return createToolErrorResponse(message, url, error.code);
51
+ return createToolErrorResponse(message, url, error.code, details);
52
52
  }
53
53
  if (error instanceof Error) {
54
54
  const message = formatErrorMessage(error.message, error, fallbackMessage);
55
- return createToolErrorResponse(message, url, 'UNKNOWN_ERROR');
55
+ return createToolErrorResponse(message, url, 'UNKNOWN_ERROR', details);
56
56
  }
57
- return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR');
57
+ return createToolErrorResponse(`${fallbackMessage}: Unknown error`, url, 'UNKNOWN_ERROR', details);
58
58
  }
59
59
  function isValidationError(error) {
60
60
  return (error instanceof Error &&
@@ -1,2 +1,7 @@
1
1
  export declare function isBlockedIp(ip: string): boolean;
2
+ export declare function assertResolvedAddressesAllowed(hostname: string): Promise<void>;
3
+ export declare function normalizeUrl(urlString: string): {
4
+ normalizedUrl: string;
5
+ hostname: string;
6
+ };
2
7
  export declare function validateAndNormalizeUrl(urlString: string): Promise<string>;
@@ -32,6 +32,35 @@ for (const entry of BLOCKED_IPV6_SUBNETS) {
32
32
  BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv6');
33
33
  }
34
34
  const DNS_LOOKUP_TIMEOUT_MS = 5000;
35
+ const DNS_DECISION_TTL_MS = 60000;
36
+ const DNS_DECISION_MAX = 1000;
37
+ const dnsDecisionCache = new Map();
38
+ function getCachedDnsDecision(hostname) {
39
+ const cached = dnsDecisionCache.get(hostname);
40
+ if (!cached)
41
+ return null;
42
+ if (cached.expiresAt <= Date.now()) {
43
+ dnsDecisionCache.delete(hostname);
44
+ return null;
45
+ }
46
+ return cached;
47
+ }
48
+ function setCachedDnsDecision(hostname, ok) {
49
+ dnsDecisionCache.set(hostname, {
50
+ ok,
51
+ expiresAt: Date.now() + DNS_DECISION_TTL_MS,
52
+ });
53
+ if (dnsDecisionCache.size <= DNS_DECISION_MAX)
54
+ return;
55
+ const evictCount = Math.ceil(DNS_DECISION_MAX * 0.05);
56
+ const iterator = dnsDecisionCache.keys();
57
+ for (let i = 0; i < evictCount; i++) {
58
+ const { value, done } = iterator.next();
59
+ if (done)
60
+ break;
61
+ dnsDecisionCache.delete(value);
62
+ }
63
+ }
35
64
  function matchesBlockedIpPatterns(resolvedIp) {
36
65
  for (const pattern of config.security.blockedIpPatterns) {
37
66
  if (pattern.test(resolvedIp)) {
@@ -78,7 +107,14 @@ function lookupWithTimeout(hostname) {
78
107
  });
79
108
  });
80
109
  }
81
- async function assertResolvedAddressesAllowed(hostname) {
110
+ export async function assertResolvedAddressesAllowed(hostname) {
111
+ const cached = getCachedDnsDecision(hostname);
112
+ if (cached) {
113
+ if (!cached.ok) {
114
+ throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
115
+ }
116
+ return;
117
+ }
82
118
  try {
83
119
  const result = await lookupWithTimeout(hostname);
84
120
  const addresses = Array.isArray(result) ? result : [result];
@@ -87,9 +123,11 @@ async function assertResolvedAddressesAllowed(hostname) {
87
123
  }
88
124
  for (const { address } of addresses) {
89
125
  if (isBlockedIp(address.toLowerCase())) {
126
+ setCachedDnsDecision(hostname, false);
90
127
  throw createValidationError(`Blocked IP range resolved from hostname: ${hostname}`);
91
128
  }
92
129
  }
130
+ setCachedDnsDecision(hostname, true);
93
131
  }
94
132
  catch (error) {
95
133
  const code = error?.code;
@@ -102,7 +140,7 @@ async function assertResolvedAddressesAllowed(hostname) {
102
140
  throw createValidationError(String(error));
103
141
  }
104
142
  }
105
- export async function validateAndNormalizeUrl(urlString) {
143
+ export function normalizeUrl(urlString) {
106
144
  const trimmedUrl = requireTrimmedUrl(urlString);
107
145
  assertUrlLength(trimmedUrl);
108
146
  const url = parseUrl(trimmedUrl);
@@ -110,8 +148,12 @@ export async function validateAndNormalizeUrl(urlString) {
110
148
  assertNoCredentials(url);
111
149
  const hostname = normalizeHostname(url);
112
150
  assertHostnameAllowed(hostname);
151
+ return { normalizedUrl: url.href, hostname };
152
+ }
153
+ export async function validateAndNormalizeUrl(urlString) {
154
+ const { normalizedUrl, hostname } = normalizeUrl(urlString);
113
155
  await assertResolvedAddressesAllowed(hostname);
114
- return url.href;
156
+ return normalizedUrl;
115
157
  }
116
158
  const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
117
159
  function createValidationError(message) {
@@ -0,0 +1 @@
1
+ export {};