@j0hanz/superfetch 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +61 -46
  2. package/dist/config/formatting.d.ts +1 -1
  3. package/dist/config/types/content.d.ts +3 -3
  4. package/dist/config/types/runtime.d.ts +1 -1
  5. package/dist/config/types/tools.d.ts +12 -12
  6. package/dist/http/cors.js +23 -23
  7. package/dist/http/download-routes.js +9 -4
  8. package/dist/http/mcp-routes.js +2 -13
  9. package/dist/http/mcp-validation.js +1 -1
  10. package/dist/http/server-middleware.js +2 -1
  11. package/dist/http/server.js +2 -0
  12. package/dist/index.js +5 -0
  13. package/dist/middleware/error-handler.js +1 -1
  14. package/dist/resources/cached-content.js +8 -4
  15. package/dist/server.js +2 -0
  16. package/dist/services/cache.d.ts +1 -1
  17. package/dist/services/cache.js +20 -7
  18. package/dist/services/context.d.ts +2 -4
  19. package/dist/services/context.js +1 -1
  20. package/dist/services/extractor.js +26 -21
  21. package/dist/services/fetcher/interceptors.d.ts +22 -0
  22. package/dist/services/fetcher/interceptors.js +18 -8
  23. package/dist/services/fetcher/response.js +32 -24
  24. package/dist/services/fetcher.d.ts +0 -1
  25. package/dist/services/fetcher.js +5 -7
  26. package/dist/services/metadata-collector.d.ts +10 -0
  27. package/dist/services/metadata-collector.js +11 -0
  28. package/dist/services/parser.js +26 -25
  29. package/dist/services/transform-worker-pool.d.ts +14 -0
  30. package/dist/services/transform-worker-pool.js +167 -0
  31. package/dist/tools/handlers/fetch-markdown.tool.d.ts +9 -1
  32. package/dist/tools/handlers/fetch-markdown.tool.js +58 -30
  33. package/dist/tools/handlers/fetch-single.shared.d.ts +8 -3
  34. package/dist/tools/handlers/fetch-single.shared.js +42 -17
  35. package/dist/tools/handlers/fetch-url.tool.js +46 -16
  36. package/dist/tools/index.js +13 -0
  37. package/dist/tools/schemas.d.ts +29 -133
  38. package/dist/tools/schemas.js +22 -32
  39. package/dist/tools/utils/common.js +20 -16
  40. package/dist/tools/utils/content-transform-async.d.ts +6 -0
  41. package/dist/tools/utils/content-transform-async.js +33 -0
  42. package/dist/tools/utils/content-transform.d.ts +4 -1
  43. package/dist/tools/utils/content-transform.js +7 -2
  44. package/dist/tools/utils/fetch-pipeline.js +18 -10
  45. package/dist/utils/content-cleaner.d.ts +1 -1
  46. package/dist/utils/download-url.d.ts +9 -1
  47. package/dist/utils/download-url.js +9 -6
  48. package/dist/utils/tool-error-handler.d.ts +2 -2
  49. package/dist/utils/tool-error-handler.js +7 -7
  50. package/dist/utils/url-validator.js +38 -0
  51. package/dist/workers/transform-worker.d.ts +1 -0
  52. package/dist/workers/transform-worker.js +50 -0
  53. package/package.json +5 -7
@@ -1,3 +1,25 @@
1
+ export type FetchChannelEvent = {
2
+ v: 1;
3
+ type: 'start';
4
+ requestId: string;
5
+ method: string;
6
+ url: string;
7
+ } | {
8
+ v: 1;
9
+ type: 'end';
10
+ requestId: string;
11
+ status: number;
12
+ duration: number;
13
+ } | {
14
+ v: 1;
15
+ type: 'error';
16
+ requestId: string;
17
+ url: string;
18
+ error: string;
19
+ code?: string;
20
+ status?: number;
21
+ duration: number;
22
+ };
1
23
  interface FetchTelemetryContext {
2
24
  requestId: string;
3
25
  startTime: number;
@@ -70,13 +70,18 @@ function publishFetchEnd(context, status, duration) {
70
70
  });
71
71
  }
72
72
  function buildResponseMeta(response, contentSize, duration) {
73
- const contentType = response.headers.get('content-type') ?? undefined;
74
73
  const contentLength = response.headers.get('content-length') ?? contentSize?.toString();
75
- return {
76
- contentType,
74
+ const meta = {
77
75
  duration: `${Math.round(duration)}ms`,
78
- size: contentLength,
79
76
  };
77
+ const contentType = response.headers.get('content-type');
78
+ if (contentType !== null) {
79
+ meta.contentType = contentType;
80
+ }
81
+ if (contentLength !== undefined) {
82
+ meta.size = contentLength;
83
+ }
84
+ return meta;
80
85
  }
81
86
  function logSlowRequestIfNeeded(context, duration) {
82
87
  if (duration <= 5000)
@@ -91,16 +96,21 @@ export function recordFetchError(context, error, status) {
91
96
  const duration = performance.now() - context.startTime;
92
97
  const err = error instanceof Error ? error : new Error(String(error));
93
98
  const code = isSystemError(err) ? err.code : undefined;
94
- publishFetchEvent({
99
+ const event = {
95
100
  v: 1,
96
101
  type: 'error',
97
102
  requestId: context.requestId,
98
103
  url: context.url,
99
104
  error: err.message,
100
- code,
101
- status,
102
105
  duration,
103
- });
106
+ };
107
+ if (code !== undefined) {
108
+ event.code = code;
109
+ }
110
+ if (status !== undefined) {
111
+ event.status = status;
112
+ }
113
+ publishFetchEvent(event);
104
114
  const log = status === 429 ? logWarn : logError;
105
115
  log('HTTP Request Error', {
106
116
  requestId: context.requestId,
@@ -1,3 +1,5 @@
1
+ import { Readable, Writable } from 'node:stream';
2
+ import { pipeline } from 'node:stream/promises';
1
3
  import { FetchError } from '../../errors/app-error.js';
2
4
  function assertContentLengthWithinLimit(response, url, maxBytes) {
3
5
  const contentLengthHeader = response.headers.get('content-length');
@@ -9,37 +11,43 @@ function assertContentLengthWithinLimit(response, url, maxBytes) {
9
11
  }
10
12
  throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
11
13
  }
12
- function throwIfReadAborted(url, signal) {
13
- if (!signal?.aborted)
14
- return;
15
- throw new FetchError('Request was aborted during response read', url, 499, {
16
- reason: 'aborted',
17
- });
18
- }
19
14
  async function readStreamWithLimit(stream, url, maxBytes, signal) {
20
- const reader = stream.getReader();
21
15
  const decoder = new TextDecoder();
22
16
  let total = 0;
23
- const chunks = [];
24
- try {
25
- for (;;) {
26
- throwIfReadAborted(url, signal);
27
- const { value, done } = await reader.read();
28
- if (done)
29
- break;
30
- total += value.byteLength;
17
+ let text = '';
18
+ const toBuffer = (chunk) => {
19
+ if (typeof chunk === 'string') {
20
+ return Buffer.from(chunk);
21
+ }
22
+ return Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
23
+ };
24
+ const sink = new Writable({
25
+ write(chunk, _encoding, callback) {
26
+ const buffer = toBuffer(chunk);
27
+ total += buffer.length;
31
28
  if (total > maxBytes) {
32
- await reader.cancel();
33
- throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
29
+ callback(new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url));
30
+ return;
34
31
  }
35
- chunks.push(decoder.decode(value, { stream: true }));
36
- }
37
- chunks.push(decoder.decode());
38
- return { text: chunks.join(''), size: total };
32
+ text += decoder.decode(buffer, { stream: true });
33
+ callback();
34
+ },
35
+ final(callback) {
36
+ text += decoder.decode();
37
+ callback();
38
+ },
39
+ });
40
+ try {
41
+ const readable = Readable.fromWeb(stream, { signal });
42
+ await pipeline(readable, sink, { signal });
39
43
  }
40
- finally {
41
- reader.releaseLock();
44
+ catch (error) {
45
+ if (signal?.aborted) {
46
+ throw new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
47
+ }
48
+ throw error;
42
49
  }
50
+ return { text, size: total };
43
51
  }
44
52
  export async function readResponseText(response, url, maxBytes, signal) {
45
53
  assertContentLengthWithinLimit(response, url, maxBytes);
@@ -1,5 +1,4 @@
1
1
  import type { FetchOptions } from '../config/types/runtime.js';
2
2
  import { destroyAgents } from './fetcher/agents.js';
3
3
  export { destroyAgents };
4
- export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number): Promise<string>;
5
4
  export declare function fetchNormalizedUrlWithRetry(normalizedUrl: string, options?: FetchOptions, maxRetries?: number): Promise<string>;
@@ -1,6 +1,5 @@
1
1
  import { config } from '../config/index.js';
2
2
  import { normalizeHeaderRecord } from '../utils/header-normalizer.js';
3
- import { validateAndNormalizeUrl } from '../utils/url-validator.js';
4
3
  import { destroyAgents, dispatcher } from './fetcher/agents.js';
5
4
  import { createHttpError, createRateLimitError, mapFetchError, } from './fetcher/errors.js';
6
5
  import { recordFetchError, recordFetchResponse, startFetchTelemetry, } from './fetcher/interceptors.js';
@@ -66,20 +65,19 @@ async function fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs) {
66
65
  throw mapped;
67
66
  }
68
67
  }
69
- export async function fetchUrlWithRetry(url, options, maxRetries = 3) {
70
- const normalizedUrl = await validateAndNormalizeUrl(url);
71
- return fetchNormalizedUrlWithRetry(normalizedUrl, options, maxRetries);
72
- }
73
68
  export async function fetchNormalizedUrlWithRetry(normalizedUrl, options, maxRetries = 3) {
74
69
  const context = buildRequestContext(options);
75
70
  return executeWithRetry(normalizedUrl, maxRetries, async () => runFetch(normalizedUrl, context), context.signal);
76
71
  }
77
72
  function buildRequestContext(options) {
78
- return {
73
+ const context = {
79
74
  timeoutMs: options?.timeout ?? config.fetcher.timeout,
80
75
  headers: buildHeaders(options?.customHeaders),
81
- signal: options?.signal,
82
76
  };
77
+ if (options?.signal) {
78
+ context.signal = options.signal;
79
+ }
80
+ return context;
83
81
  }
84
82
  async function runFetch(normalizedUrl, context) {
85
83
  const signal = buildRequestSignal(context.timeoutMs, context.signal);
@@ -0,0 +1,10 @@
1
+ import type { ExtractedMetadata } from '../config/types/content.js';
2
+ export type MetaSource = 'og' | 'twitter' | 'standard';
3
+ export type MetaField = keyof ExtractedMetadata;
4
+ export interface MetaCollectorState {
5
+ title: Partial<Record<MetaSource, string>>;
6
+ description: Partial<Record<MetaSource, string>>;
7
+ author: Partial<Record<MetaSource, string>>;
8
+ }
9
+ export declare function createMetaCollectorState(): MetaCollectorState;
10
+ export declare function resolveMetaField(state: MetaCollectorState, field: MetaField): string | undefined;
@@ -0,0 +1,11 @@
1
+ export function createMetaCollectorState() {
2
+ return {
3
+ title: {},
4
+ description: {},
5
+ author: {},
6
+ };
7
+ }
8
+ export function resolveMetaField(state, field) {
9
+ const sources = state[field];
10
+ return sources.og ?? sources.twitter ?? sources.standard;
11
+ }
@@ -6,18 +6,8 @@ import { getErrorMessage } from '../utils/error-utils.js';
6
6
  import { truncateHtml } from '../utils/html-truncator.js';
7
7
  import { sanitizeText } from '../utils/sanitizer.js';
8
8
  import { logWarn } from './logger.js';
9
+ import { createMetaCollectorState, resolveMetaField, } from './metadata-collector.js';
9
10
  const CONTENT_SELECTOR = 'h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code:not(pre code), table, img, blockquote';
10
- function createMetaCollectorState() {
11
- return {
12
- title: {},
13
- description: {},
14
- author: {},
15
- };
16
- }
17
- function resolveMetaField(state, field) {
18
- const sources = state[field];
19
- return sources.og ?? sources.twitter ?? sources.standard;
20
- }
21
11
  function extractMetadata($) {
22
12
  const state = createMetaCollectorState();
23
13
  $('meta').each((_, element) => {
@@ -55,11 +45,17 @@ function extractMetadata($) {
55
45
  state.title.standard = titleText;
56
46
  }
57
47
  }
58
- return {
59
- title: resolveMetaField(state, 'title'),
60
- description: resolveMetaField(state, 'description'),
61
- author: resolveMetaField(state, 'author'),
62
- };
48
+ const metadata = {};
49
+ const title = resolveMetaField(state, 'title');
50
+ const description = resolveMetaField(state, 'description');
51
+ const author = resolveMetaField(state, 'author');
52
+ if (title !== undefined)
53
+ metadata.title = title;
54
+ if (description !== undefined)
55
+ metadata.description = description;
56
+ if (author !== undefined)
57
+ metadata.author = author;
58
+ return metadata;
63
59
  }
64
60
  function parseHeading($, element) {
65
61
  const rawText = sanitizeText($(element).text());
@@ -109,11 +105,14 @@ function parseCode($, element) {
109
105
  const dataLang = $(element).attr('data-language') ?? '';
110
106
  const language = resolveLanguageFromAttributes(className, dataLang) ??
111
107
  detectLanguageFromCode(text);
112
- return {
108
+ const block = {
113
109
  type: 'code',
114
- language,
115
110
  text,
116
111
  };
112
+ if (language !== undefined) {
113
+ block.language = language;
114
+ }
115
+ return block;
117
116
  }
118
117
  function parseTable($, element) {
119
118
  const headers = [];
@@ -144,21 +143,23 @@ function parseTable($, element) {
144
143
  });
145
144
  if (rows.length === 0)
146
145
  return null;
147
- return {
148
- type: 'table',
149
- headers: headers.length > 0 ? headers : undefined,
150
- rows,
151
- };
146
+ return headers.length > 0
147
+ ? { type: 'table', headers, rows }
148
+ : { type: 'table', rows };
152
149
  }
153
150
  function parseImage($, element) {
154
151
  const src = $(element).attr('src');
155
152
  if (!src)
156
153
  return null;
157
- return {
154
+ const alt = $(element).attr('alt');
155
+ const image = {
158
156
  type: 'image',
159
157
  src,
160
- alt: $(element).attr('alt') ?? undefined,
161
158
  };
159
+ if (alt !== undefined) {
160
+ image.alt = alt;
161
+ }
162
+ return image;
162
163
  }
163
164
  function parseBlockquote($, element) {
164
165
  const rawText = sanitizeText($(element).text());
@@ -0,0 +1,14 @@
1
+ import type { JsonlTransformResult, MarkdownTransformResult, TransformOptions } from '../config/types/content.js';
2
+ type TransformMode = 'jsonl' | 'markdown' | 'markdown-blocks';
3
+ export interface TransformJob {
4
+ mode: TransformMode;
5
+ html: string;
6
+ url: string;
7
+ options: TransformOptions & {
8
+ includeContentBlocks?: boolean;
9
+ };
10
+ }
11
+ type TransformResult = JsonlTransformResult | MarkdownTransformResult;
12
+ export declare function runTransformInWorker(job: TransformJob): Promise<TransformResult | null>;
13
+ export declare function destroyTransformWorkers(): void;
14
+ export {};
@@ -0,0 +1,167 @@
1
+ import os from 'node:os';
2
+ import { isMainThread, Worker } from 'node:worker_threads';
3
+ import { config } from '../config/index.js';
4
+ import { getErrorMessage } from '../utils/error-utils.js';
5
+ import { logWarn } from './logger.js';
6
+ const MAX_POOL_SIZE = 4;
7
+ function resolvePoolSize() {
8
+ const available = os.availableParallelism();
9
+ return Math.max(1, Math.min(available - 1, MAX_POOL_SIZE));
10
+ }
11
+ let pool = null;
12
+ let poolDisabled = false;
13
+ function shouldUseWorkers() {
14
+ return isMainThread && config.runtime.httpMode && !poolDisabled;
15
+ }
16
+ function getWorkerUrl() {
17
+ return new URL('../workers/transform-worker.js', import.meta.url);
18
+ }
19
+ export async function runTransformInWorker(job) {
20
+ if (!shouldUseWorkers())
21
+ return null;
22
+ if (!pool) {
23
+ try {
24
+ pool = new TransformWorkerPool(getWorkerUrl(), resolvePoolSize());
25
+ }
26
+ catch (error) {
27
+ poolDisabled = true;
28
+ logWarn('Failed to initialize transform worker pool', {
29
+ error: getErrorMessage(error),
30
+ });
31
+ return null;
32
+ }
33
+ }
34
+ try {
35
+ return await pool.run(job);
36
+ }
37
+ catch (error) {
38
+ poolDisabled = true;
39
+ pool.destroy();
40
+ pool = null;
41
+ logWarn('Transform worker failed; falling back to main thread', {
42
+ error: getErrorMessage(error),
43
+ });
44
+ return null;
45
+ }
46
+ }
47
+ export function destroyTransformWorkers() {
48
+ pool?.destroy();
49
+ pool = null;
50
+ }
51
+ class TransformWorkerPool {
52
+ workerUrl;
53
+ size;
54
+ workers = [];
55
+ queue = [];
56
+ pending = new Map();
57
+ nextId = 1;
58
+ destroyed = false;
59
+ constructor(workerUrl, size) {
60
+ this.workerUrl = workerUrl;
61
+ this.size = size;
62
+ for (let i = 0; i < size; i += 1) {
63
+ this.workers.push(this.createWorker());
64
+ }
65
+ }
66
+ run(job) {
67
+ if (this.destroyed) {
68
+ return Promise.reject(new Error('Transform worker pool is closed'));
69
+ }
70
+ const id = this.nextId++;
71
+ const queuedJob = { ...job, id };
72
+ return new Promise((resolve, reject) => {
73
+ this.pending.set(id, { resolve, reject });
74
+ this.queue.push(queuedJob);
75
+ this.schedule();
76
+ });
77
+ }
78
+ destroy() {
79
+ if (this.destroyed)
80
+ return;
81
+ this.destroyed = true;
82
+ for (const workerState of this.workers) {
83
+ void workerState.worker.terminate();
84
+ }
85
+ for (const [id, pending] of this.pending.entries()) {
86
+ pending.reject(new Error('Transform worker pool shut down'));
87
+ this.pending.delete(id);
88
+ }
89
+ this.queue.length = 0;
90
+ }
91
+ createWorker() {
92
+ const worker = new Worker(this.workerUrl);
93
+ worker.unref();
94
+ const state = { worker, busy: false, currentJobId: undefined };
95
+ worker.on('message', (message) => {
96
+ this.handleMessage(state, message);
97
+ });
98
+ worker.on('error', (error) => {
99
+ this.handleWorkerError(state, error);
100
+ });
101
+ worker.on('exit', (code) => {
102
+ this.handleWorkerExit(state, code);
103
+ });
104
+ return state;
105
+ }
106
+ handleMessage(state, message) {
107
+ const pending = this.pending.get(message.id);
108
+ if (pending) {
109
+ this.pending.delete(message.id);
110
+ if (message.ok) {
111
+ pending.resolve(message.result);
112
+ }
113
+ else {
114
+ pending.reject(new Error(message.error));
115
+ }
116
+ }
117
+ state.busy = false;
118
+ state.currentJobId = undefined;
119
+ this.schedule();
120
+ }
121
+ handleWorkerError(state, error) {
122
+ this.failCurrentJob(state, error);
123
+ this.replaceWorker(state);
124
+ }
125
+ handleWorkerExit(state, code) {
126
+ if (code !== 0) {
127
+ this.failCurrentJob(state, new Error(`Transform worker exited with code ${code}`));
128
+ }
129
+ this.replaceWorker(state);
130
+ }
131
+ failCurrentJob(state, error) {
132
+ if (!state.currentJobId)
133
+ return;
134
+ const pending = this.pending.get(state.currentJobId);
135
+ if (pending) {
136
+ pending.reject(error);
137
+ this.pending.delete(state.currentJobId);
138
+ }
139
+ state.currentJobId = undefined;
140
+ state.busy = false;
141
+ }
142
+ replaceWorker(state) {
143
+ if (this.destroyed)
144
+ return;
145
+ const index = this.workers.indexOf(state);
146
+ if (index === -1)
147
+ return;
148
+ this.workers[index] = this.createWorker();
149
+ this.schedule();
150
+ }
151
+ schedule() {
152
+ if (this.destroyed)
153
+ return;
154
+ for (const workerState of this.workers) {
155
+ if (this.queue.length === 0)
156
+ return;
157
+ if (workerState.busy)
158
+ continue;
159
+ const job = this.queue.shift();
160
+ if (!job)
161
+ return;
162
+ workerState.busy = true;
163
+ workerState.currentJobId = job.id;
164
+ workerState.worker.postMessage(job);
165
+ }
166
+ }
167
+ }
@@ -1,4 +1,12 @@
1
1
  import type { FetchMarkdownInput, ToolResponseBase } from '../../config/types/tools.js';
2
+ import { transformHtmlToMarkdownAsync } from '../utils/content-transform-async.js';
3
+ import { performSharedFetch } from './fetch-single.shared.js';
2
4
  export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
3
5
  export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits";
4
- export declare function fetchMarkdownToolHandler(input: FetchMarkdownInput): Promise<ToolResponseBase>;
6
+ interface FetchMarkdownDeps {
7
+ readonly performSharedFetch?: typeof performSharedFetch;
8
+ readonly transformHtmlToMarkdown?: typeof transformHtmlToMarkdownAsync;
9
+ }
10
+ export declare function createFetchMarkdownToolHandler(deps?: FetchMarkdownDeps): (input: FetchMarkdownInput) => Promise<ToolResponseBase>;
11
+ export declare const fetchMarkdownToolHandler: (input: FetchMarkdownInput) => Promise<ToolResponseBase>;
12
+ export {};
@@ -1,7 +1,7 @@
1
1
  import { config } from '../../config/index.js';
2
2
  import { logDebug, logError } from '../../services/logger.js';
3
3
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
4
- import { transformHtmlToMarkdown } from '../utils/content-transform.js';
4
+ import { transformHtmlToMarkdownAsync } from '../utils/content-transform-async.js';
5
5
  import { applyInlineResultToStructuredContent, buildToolContentBlocks, getFileDownloadInfo, getInlineErrorResponse, performSharedFetch, } from './fetch-single.shared.js';
6
6
  export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
7
7
  export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits';
@@ -39,7 +39,15 @@ function resolveMarkdownOptions(input) {
39
39
  return {
40
40
  extractMainContent: input.extractMainContent ?? config.extraction.extractMainContent,
41
41
  includeMetadata: input.includeMetadata ?? config.extraction.includeMetadata,
42
- maxContentLength: input.maxContentLength,
42
+ ...(input.maxContentLength !== undefined && {
43
+ maxContentLength: input.maxContentLength,
44
+ }),
45
+ };
46
+ }
47
+ function buildFetchMarkdownErrorDetails() {
48
+ return {
49
+ fetchedAt: new Date().toISOString(),
50
+ cached: false,
43
51
  };
44
52
  }
45
53
  function buildMarkdownStructuredContent(pipeline, inlineResult, fileDownload) {
@@ -62,25 +70,30 @@ function buildMarkdownStructuredContent(pipeline, inlineResult, fileDownload) {
62
70
  function logFetchMarkdownStart(url, options) {
63
71
  logDebug('Fetching markdown', { url, ...options });
64
72
  }
65
- function buildMarkdownTransform(options) {
66
- return (html, url) => {
67
- const markdownResult = transformHtmlToMarkdown(html, url, options);
73
+ function buildMarkdownTransform(options, transform) {
74
+ return async (html, url) => {
75
+ const markdownResult = await transform(html, url, options);
68
76
  return { ...markdownResult, content: markdownResult.markdown };
69
77
  };
70
78
  }
71
- async function fetchMarkdownPipeline(url, input, options, transformOptions) {
72
- return performSharedFetch({
79
+ async function fetchMarkdownPipeline(url, input, options, transformOptions, performSharedFetchImpl, transformImpl) {
80
+ const sharedOptions = {
73
81
  url,
74
82
  format: 'markdown',
75
83
  extractMainContent: options.extractMainContent,
76
84
  includeMetadata: options.includeMetadata,
77
- maxContentLength: options.maxContentLength,
78
- customHeaders: input.customHeaders,
79
- retries: input.retries,
80
- timeout: input.timeout,
81
- transform: buildMarkdownTransform(transformOptions),
85
+ ...(options.maxContentLength !== undefined && {
86
+ maxContentLength: options.maxContentLength,
87
+ }),
88
+ ...(input.customHeaders !== undefined && {
89
+ customHeaders: input.customHeaders,
90
+ }),
91
+ ...(input.retries !== undefined && { retries: input.retries }),
92
+ ...(input.timeout !== undefined && { timeout: input.timeout }),
93
+ transform: buildMarkdownTransform(transformOptions, transformImpl),
82
94
  deserialize: deserializeMarkdownPipelineResult,
83
- });
95
+ };
96
+ return performSharedFetchImpl(sharedOptions);
84
97
  }
85
98
  function buildMarkdownResponse(pipeline, inlineResult, fileDownload) {
86
99
  const structuredContent = buildMarkdownStructuredContent(pipeline, inlineResult, fileDownload);
@@ -89,33 +102,48 @@ function buildMarkdownResponse(pipeline, inlineResult, fileDownload) {
89
102
  structuredContent,
90
103
  };
91
104
  }
92
- export async function fetchMarkdownToolHandler(input) {
93
- try {
94
- return await executeFetchMarkdown(input);
95
- }
96
- catch (error) {
97
- logError('fetch-markdown tool error', error instanceof Error ? error : undefined);
98
- return handleToolError(error, input.url, 'Failed to fetch markdown');
99
- }
105
+ export function createFetchMarkdownToolHandler(deps = {}) {
106
+ const performSharedFetchImpl = deps.performSharedFetch ?? performSharedFetch;
107
+ const transformImpl = deps.transformHtmlToMarkdown ?? transformHtmlToMarkdownAsync;
108
+ return async (input) => {
109
+ try {
110
+ return await executeFetchMarkdown(input, performSharedFetchImpl, transformImpl);
111
+ }
112
+ catch (error) {
113
+ logError('fetch-markdown tool error', error instanceof Error ? error : undefined);
114
+ const errorDetails = buildFetchMarkdownErrorDetails();
115
+ return handleToolError(error, input.url, 'Failed to fetch markdown', errorDetails);
116
+ }
117
+ };
100
118
  }
101
- async function executeFetchMarkdown(input) {
119
+ export const fetchMarkdownToolHandler = createFetchMarkdownToolHandler();
120
+ async function executeFetchMarkdown(input, performSharedFetchImpl, transformImpl) {
102
121
  const { url } = input;
103
122
  if (!url) {
104
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR');
123
+ return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR', buildFetchMarkdownErrorDetails());
105
124
  }
106
125
  const options = resolveMarkdownOptions(input);
107
126
  const transformOptions = { ...options };
108
127
  logFetchMarkdownStart(url, transformOptions);
109
- const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, input, options, transformOptions);
110
- const inlineError = getInlineErrorResponse(inlineResult, url);
128
+ const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, input, options, transformOptions, performSharedFetchImpl, transformImpl);
129
+ const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchMarkdownErrorDetails());
111
130
  if (inlineError)
112
131
  return inlineError;
113
- const fileDownload = inlineResult.resourceUri
114
- ? getFileDownloadInfo({
132
+ let fileDownload = null;
133
+ if (inlineResult.resourceUri) {
134
+ const downloadContext = {
115
135
  cacheKey: pipeline.cacheKey ?? null,
116
136
  url: pipeline.url,
117
- title: pipeline.data.title,
118
- })
119
- : null;
137
+ };
138
+ if (pipeline.data.title !== undefined) {
139
+ fileDownload = getFileDownloadInfo({
140
+ ...downloadContext,
141
+ title: pipeline.data.title,
142
+ });
143
+ }
144
+ else {
145
+ fileDownload = getFileDownloadInfo(downloadContext);
146
+ }
147
+ }
120
148
  return buildMarkdownResponse(pipeline, inlineResult, fileDownload);
121
149
  }