@j0hanz/superfetch 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +116 -152
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +25 -59
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +98 -26
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/jsonrpc-http.d.ts +2 -0
  29. package/dist/http/jsonrpc-http.js +10 -0
  30. package/dist/http/mcp-routes.d.ts +0 -1
  31. package/dist/http/mcp-routes.js +43 -30
  32. package/dist/http/mcp-session-helpers.d.ts +0 -1
  33. package/dist/http/mcp-session-helpers.js +1 -1
  34. package/dist/http/mcp-session-transport.d.ts +7 -0
  35. package/dist/http/mcp-session-transport.js +57 -0
  36. package/dist/http/mcp-session.js +60 -73
  37. package/dist/http/mcp-validation.d.ts +1 -0
  38. package/dist/http/mcp-validation.js +11 -10
  39. package/dist/http/protocol-policy.d.ts +2 -0
  40. package/dist/http/protocol-policy.js +31 -0
  41. package/dist/http/rate-limit.js +5 -2
  42. package/dist/http/server-config.d.ts +1 -0
  43. package/dist/http/server-config.js +40 -0
  44. package/dist/http/server-middleware.d.ts +2 -9
  45. package/dist/http/server-middleware.js +96 -43
  46. package/dist/http/server-shutdown.d.ts +4 -0
  47. package/dist/http/server-shutdown.js +43 -0
  48. package/dist/http/server.js +52 -64
  49. package/dist/http/session-cleanup.js +1 -1
  50. package/dist/middleware/error-handler.js +1 -3
  51. package/dist/resources/cached-content.js +50 -108
  52. package/dist/resources/index.js +0 -82
  53. package/dist/server.js +51 -30
  54. package/dist/services/cache-keys.d.ts +7 -0
  55. package/dist/services/cache-keys.js +57 -0
  56. package/dist/services/cache.d.ts +1 -7
  57. package/dist/services/cache.js +53 -119
  58. package/dist/services/context.d.ts +0 -1
  59. package/dist/services/context.js +0 -7
  60. package/dist/services/extractor.js +10 -82
  61. package/dist/services/fetcher/agents.d.ts +2 -2
  62. package/dist/services/fetcher/agents.js +34 -95
  63. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  64. package/dist/services/fetcher/dns-selection.js +72 -0
  65. package/dist/services/fetcher/interceptors.d.ts +0 -22
  66. package/dist/services/fetcher/interceptors.js +30 -13
  67. package/dist/services/fetcher/redirects.js +4 -3
  68. package/dist/services/fetcher/response.js +66 -31
  69. package/dist/services/fetcher.d.ts +1 -3
  70. package/dist/services/fetcher.js +14 -33
  71. package/dist/services/fifo-queue.d.ts +8 -0
  72. package/dist/services/fifo-queue.js +25 -0
  73. package/dist/services/logger.js +2 -2
  74. package/dist/services/metadata-collector.d.ts +1 -9
  75. package/dist/services/metadata-collector.js +71 -2
  76. package/dist/services/transform-worker-pool.d.ts +4 -14
  77. package/dist/services/transform-worker-pool.js +177 -129
  78. package/dist/services/transform-worker-types.d.ts +32 -0
  79. package/dist/services/transform-worker-types.js +14 -0
  80. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  81. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  82. package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
  83. package/dist/tools/handlers/fetch-single.shared.js +44 -87
  84. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
  85. package/dist/tools/handlers/fetch-url.tool.js +46 -123
  86. package/dist/tools/index.js +21 -40
  87. package/dist/tools/schemas.d.ts +1 -51
  88. package/dist/tools/schemas.js +2 -108
  89. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  90. package/dist/tools/utils/cached-markdown.js +46 -0
  91. package/dist/tools/utils/content-shaping.d.ts +4 -0
  92. package/dist/tools/utils/content-shaping.js +52 -0
  93. package/dist/tools/utils/content-transform.d.ts +2 -17
  94. package/dist/tools/utils/content-transform.js +120 -114
  95. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  96. package/dist/tools/utils/fetch-pipeline.js +65 -62
  97. package/dist/tools/utils/inline-content.d.ts +1 -2
  98. package/dist/tools/utils/inline-content.js +4 -7
  99. package/dist/transformers/markdown.transformer.js +109 -34
  100. package/dist/utils/cached-payload.d.ts +7 -0
  101. package/dist/utils/cached-payload.js +36 -0
  102. package/dist/utils/error-utils.js +1 -1
  103. package/dist/utils/filename-generator.js +21 -10
  104. package/dist/utils/guards.d.ts +1 -0
  105. package/dist/utils/guards.js +3 -0
  106. package/dist/utils/header-normalizer.d.ts +0 -3
  107. package/dist/utils/header-normalizer.js +3 -3
  108. package/dist/utils/tool-error-handler.d.ts +2 -2
  109. package/dist/utils/tool-error-handler.js +11 -38
  110. package/dist/utils/url-transformer.d.ts +7 -0
  111. package/dist/utils/url-transformer.js +147 -0
  112. package/dist/utils/url-validator.d.ts +1 -2
  113. package/dist/utils/url-validator.js +20 -93
  114. package/dist/workers/content-transform.worker.d.ts +1 -0
  115. package/dist/workers/content-transform.worker.js +40 -0
  116. package/package.json +13 -16
@@ -1,167 +1,215 @@
1
- import os from 'node:os';
2
- import { isMainThread, Worker } from 'node:worker_threads';
1
+ import { Worker } from 'node:worker_threads';
3
2
  import { config } from '../config/index.js';
4
3
  import { getErrorMessage } from '../utils/error-utils.js';
5
4
  import { logWarn } from './logger.js';
6
- const MAX_POOL_SIZE = 4;
7
- function resolvePoolSize() {
8
- const available = os.availableParallelism();
9
- return Math.max(1, Math.min(available - 1, MAX_POOL_SIZE));
10
- }
11
- let pool = null;
12
- let poolDisabled = false;
13
- function shouldUseWorkers() {
14
- return isMainThread && config.runtime.httpMode && !poolDisabled;
15
- }
16
- function getWorkerUrl() {
17
- return new URL('../workers/transform-worker.js', import.meta.url);
18
- }
19
- export async function runTransformInWorker(job) {
20
- if (!shouldUseWorkers())
21
- return null;
22
- if (!pool) {
23
- try {
24
- pool = new TransformWorkerPool(getWorkerUrl(), resolvePoolSize());
25
- }
26
- catch (error) {
27
- poolDisabled = true;
28
- logWarn('Failed to initialize transform worker pool', {
29
- error: getErrorMessage(error),
30
- });
31
- return null;
32
- }
33
- }
34
- try {
35
- return await pool.run(job);
36
- }
37
- catch (error) {
38
- poolDisabled = true;
39
- pool.destroy();
40
- pool = null;
41
- logWarn('Transform worker failed; falling back to main thread', {
42
- error: getErrorMessage(error),
43
- });
44
- return null;
45
- }
46
- }
47
- export function destroyTransformWorkers() {
48
- pool?.destroy();
49
- pool = null;
50
- }
5
+ import { isWorkerResponse } from './transform-worker-types.js';
51
6
  class TransformWorkerPool {
52
7
  workerUrl;
53
- size;
54
- workers = [];
8
+ slots = [];
55
9
  queue = [];
56
- pending = new Map();
57
10
  nextId = 1;
58
11
  destroyed = false;
59
12
  constructor(workerUrl, size) {
60
13
  this.workerUrl = workerUrl;
61
- this.size = size;
62
14
  for (let i = 0; i < size; i += 1) {
63
- this.workers.push(this.createWorker());
15
+ this.slots.push(this.spawnWorker());
64
16
  }
65
17
  }
66
- run(job) {
18
+ run(request, signal) {
67
19
  if (this.destroyed) {
68
- return Promise.reject(new Error('Transform worker pool is closed'));
20
+ return Promise.reject(new Error('Worker pool is shut down'));
69
21
  }
70
- const id = this.nextId++;
71
- const queuedJob = { ...job, id };
72
22
  return new Promise((resolve, reject) => {
73
- this.pending.set(id, { resolve, reject });
74
- this.queue.push(queuedJob);
75
- this.schedule();
23
+ if (signal?.aborted) {
24
+ reject(new Error('Aborted'));
25
+ return;
26
+ }
27
+ const task = this.createTask(request, resolve, reject, signal);
28
+ this.attachAbortHandler(task, signal);
29
+ this.enqueueTask(task);
76
30
  });
77
31
  }
78
- destroy() {
32
+ async destroy() {
79
33
  if (this.destroyed)
80
34
  return;
81
35
  this.destroyed = true;
82
- for (const workerState of this.workers) {
83
- void workerState.worker.terminate();
36
+ const pending = this.queue.splice(0);
37
+ for (const task of pending) {
38
+ this.cleanupTask(task);
39
+ task.reject(new Error('Worker pool shutting down'));
84
40
  }
85
- for (const [id, pending] of this.pending.entries()) {
86
- pending.reject(new Error('Transform worker pool shut down'));
87
- this.pending.delete(id);
41
+ for (const slot of this.slots) {
42
+ if (slot.current) {
43
+ const task = slot.current;
44
+ slot.current = undefined;
45
+ slot.busy = false;
46
+ this.cleanupTask(task);
47
+ task.reject(new Error('Worker pool shutting down'));
48
+ }
88
49
  }
89
- this.queue.length = 0;
90
- }
91
- createWorker() {
92
- const worker = new Worker(this.workerUrl);
93
- worker.unref();
94
- const state = { worker, busy: false, currentJobId: undefined };
95
- worker.on('message', (message) => {
96
- this.handleMessage(state, message);
97
- });
98
- worker.on('error', (error) => {
99
- this.handleWorkerError(state, error);
50
+ await Promise.allSettled(this.slots.map((slot) => slot.worker.terminate()));
51
+ this.slots = [];
52
+ }
53
+ dispatch() {
54
+ if (this.destroyed)
55
+ return;
56
+ const idle = this.slots.find((slot) => !slot.busy);
57
+ if (!idle)
58
+ return;
59
+ const task = this.queue.shift();
60
+ if (!task)
61
+ return;
62
+ task.status = 'running';
63
+ idle.busy = true;
64
+ idle.current = task;
65
+ try {
66
+ idle.worker.postMessage(task.request);
67
+ }
68
+ catch (error) {
69
+ this.failTask(idle, error);
70
+ }
71
+ }
72
+ createTask(request, resolve, reject, signal) {
73
+ const id = this.nextId;
74
+ this.nextId += 1;
75
+ return {
76
+ id,
77
+ request: { ...request, id },
78
+ resolve,
79
+ reject,
80
+ signal,
81
+ abortHandler: undefined,
82
+ status: 'queued',
83
+ };
84
+ }
85
+ attachAbortHandler(task, signal) {
86
+ if (!signal)
87
+ return;
88
+ const onAbort = () => {
89
+ if (task.status === 'queued') {
90
+ this.removeQueuedTask(task);
91
+ task.reject(new Error('Aborted'));
92
+ return;
93
+ }
94
+ this.abortRunningTask(task);
95
+ };
96
+ task.abortHandler = onAbort;
97
+ signal.addEventListener('abort', onAbort, { once: true });
98
+ }
99
+ enqueueTask(task) {
100
+ this.queue.push(task);
101
+ this.dispatch();
102
+ }
103
+ attachWorker(slot) {
104
+ slot.worker.on('message', (message) => {
105
+ this.handleMessage(slot, message);
100
106
  });
101
- worker.on('exit', (code) => {
102
- this.handleWorkerExit(state, code);
107
+ slot.worker.on('error', (error) => {
108
+ this.handleWorkerFailure(slot, error);
103
109
  });
104
- return state;
105
- }
106
- handleMessage(state, message) {
107
- const pending = this.pending.get(message.id);
108
- if (pending) {
109
- this.pending.delete(message.id);
110
- if (message.ok) {
111
- pending.resolve(message.result);
110
+ slot.worker.on('exit', (code) => {
111
+ if (code !== 0) {
112
+ this.handleWorkerFailure(slot, new Error(`Worker exited with code ${code}`));
112
113
  }
113
- else {
114
- pending.reject(new Error(message.error));
115
- }
116
- }
117
- state.busy = false;
118
- state.currentJobId = undefined;
119
- this.schedule();
114
+ });
120
115
  }
121
- handleWorkerError(state, error) {
122
- this.failCurrentJob(state, error);
123
- this.replaceWorker(state);
116
+ spawnWorker() {
117
+ const slot = {
118
+ worker: new Worker(this.workerUrl),
119
+ busy: false,
120
+ current: undefined,
121
+ };
122
+ this.attachWorker(slot);
123
+ return slot;
124
124
  }
125
- handleWorkerExit(state, code) {
126
- if (code !== 0) {
127
- this.failCurrentJob(state, new Error(`Transform worker exited with code ${code}`));
125
+ handleMessage(slot, message) {
126
+ const task = slot.current;
127
+ if (!task)
128
+ return;
129
+ if (!isWorkerResponse(message) || message.id !== task.id) {
130
+ this.handleWorkerFailure(slot, new Error('Unexpected worker response'));
131
+ return;
132
+ }
133
+ slot.current = undefined;
134
+ slot.busy = false;
135
+ this.cleanupTask(task);
136
+ if (message.ok) {
137
+ task.resolve(message.result);
128
138
  }
129
- this.replaceWorker(state);
139
+ else {
140
+ task.reject(new Error(message.error));
141
+ }
142
+ this.dispatch();
130
143
  }
131
- failCurrentJob(state, error) {
132
- if (!state.currentJobId)
133
- return;
134
- const pending = this.pending.get(state.currentJobId);
135
- if (pending) {
136
- pending.reject(error);
137
- this.pending.delete(state.currentJobId);
144
+ handleWorkerFailure(slot, error) {
145
+ const task = slot.current;
146
+ if (task) {
147
+ slot.current = undefined;
148
+ slot.busy = false;
149
+ this.cleanupTask(task);
150
+ task.reject(error instanceof Error ? error : new Error(getErrorMessage(error)));
138
151
  }
139
- state.currentJobId = undefined;
140
- state.busy = false;
152
+ logWarn('Worker thread failure', {
153
+ error: getErrorMessage(error),
154
+ });
155
+ this.replaceWorker(slot);
156
+ this.dispatch();
141
157
  }
142
- replaceWorker(state) {
143
- if (this.destroyed)
144
- return;
145
- const index = this.workers.indexOf(state);
146
- if (index === -1)
158
+ replaceWorker(slot) {
159
+ try {
160
+ void slot.worker.terminate();
161
+ }
162
+ catch {
163
+ // Best-effort cleanup.
164
+ }
165
+ slot.worker = new Worker(this.workerUrl);
166
+ slot.busy = false;
167
+ slot.current = undefined;
168
+ this.attachWorker(slot);
169
+ }
170
+ failTask(slot, error) {
171
+ const task = slot.current;
172
+ if (!task)
147
173
  return;
148
- this.workers[index] = this.createWorker();
149
- this.schedule();
174
+ slot.current = undefined;
175
+ slot.busy = false;
176
+ this.cleanupTask(task);
177
+ task.reject(error instanceof Error ? error : new Error(String(error)));
178
+ this.dispatch();
150
179
  }
151
- schedule() {
152
- if (this.destroyed)
180
+ abortRunningTask(task) {
181
+ const slot = this.slots.find((s) => s.current?.id === task.id);
182
+ if (!slot)
153
183
  return;
154
- for (const workerState of this.workers) {
155
- if (this.queue.length === 0)
156
- return;
157
- if (workerState.busy)
158
- continue;
159
- const job = this.queue.shift();
160
- if (!job)
161
- return;
162
- workerState.busy = true;
163
- workerState.currentJobId = job.id;
164
- workerState.worker.postMessage(job);
184
+ this.handleWorkerFailure(slot, new Error('Aborted'));
185
+ }
186
+ removeQueuedTask(task) {
187
+ const index = this.queue.findIndex((queued) => queued.id === task.id);
188
+ if (index >= 0) {
189
+ this.queue.splice(index, 1);
165
190
  }
191
+ this.cleanupTask(task);
166
192
  }
193
+ cleanupTask(task) {
194
+ if (task.signal && task.abortHandler) {
195
+ task.signal.removeEventListener('abort', task.abortHandler);
196
+ }
197
+ }
198
+ }
199
+ let pool = null;
200
+ function getPool() {
201
+ if (pool)
202
+ return pool;
203
+ pool = new TransformWorkerPool(new URL('../workers/content-transform.worker.js', import.meta.url), config.workers.poolSize);
204
+ return pool;
205
+ }
206
+ export async function transformInWorker(request, signal) {
207
+ return getPool().run(request, signal);
208
+ }
209
+ export async function destroyTransformWorkers() {
210
+ if (!pool)
211
+ return;
212
+ const current = pool;
213
+ pool = null;
214
+ await current.destroy();
167
215
  }
@@ -0,0 +1,32 @@
1
+ import type { Worker } from 'node:worker_threads';
2
+ import type { MarkdownTransformResult, TransformOptions } from '../config/types/content.js';
3
+ export interface WorkerTransformRequest {
4
+ id: number;
5
+ html: string;
6
+ url: string;
7
+ options: TransformOptions;
8
+ }
9
+ export type WorkerTransformResponse = {
10
+ id: number;
11
+ ok: true;
12
+ result: MarkdownTransformResult;
13
+ } | {
14
+ id: number;
15
+ ok: false;
16
+ error: string;
17
+ };
18
+ export interface TransformTask {
19
+ id: number;
20
+ request: WorkerTransformRequest;
21
+ resolve: (result: MarkdownTransformResult) => void;
22
+ reject: (error: Error) => void;
23
+ signal: AbortSignal | undefined;
24
+ abortHandler: (() => void) | undefined;
25
+ status: 'queued' | 'running';
26
+ }
27
+ export interface WorkerSlot {
28
+ worker: Worker;
29
+ busy: boolean;
30
+ current: TransformTask | undefined;
31
+ }
32
+ export declare function isWorkerResponse(value: unknown): value is WorkerTransformResponse;
@@ -0,0 +1,14 @@
1
+ import { isRecord } from '../utils/guards.js';
2
+ export function isWorkerResponse(value) {
3
+ if (!isRecord(value))
4
+ return false;
5
+ if (typeof value.id !== 'number')
6
+ return false;
7
+ if (value.ok === true) {
8
+ return 'result' in value;
9
+ }
10
+ if (value.ok === false) {
11
+ return typeof value.error === 'string';
12
+ }
13
+ return false;
14
+ }
@@ -1,12 +1,11 @@
1
1
  import type { FetchMarkdownInput, ToolResponseBase } from '../../config/types/tools.js';
2
- import { transformHtmlToMarkdownAsync } from '../utils/content-transform-async.js';
2
+ import { transformHtmlToMarkdown } from '../utils/content-transform.js';
3
3
  import { performSharedFetch } from './fetch-single.shared.js';
4
4
  export declare const FETCH_MARKDOWN_TOOL_NAME = "fetch-markdown";
5
- export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits";
5
+ export declare const FETCH_MARKDOWN_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format with optional frontmatter";
6
6
  interface FetchMarkdownDeps {
7
7
  readonly performSharedFetch?: typeof performSharedFetch;
8
- readonly transformHtmlToMarkdown?: typeof transformHtmlToMarkdownAsync;
8
+ readonly transformHtmlToMarkdown?: typeof transformHtmlToMarkdown;
9
9
  }
10
10
  export declare function createFetchMarkdownToolHandler(deps?: FetchMarkdownDeps): (input: FetchMarkdownInput) => Promise<ToolResponseBase>;
11
- export declare const fetchMarkdownToolHandler: (input: FetchMarkdownInput) => Promise<ToolResponseBase>;
12
11
  export {};
@@ -1,10 +1,10 @@
1
1
  import { config } from '../../config/index.js';
2
2
  import { logDebug, logError } from '../../services/logger.js';
3
3
  import { createToolErrorResponse, handleToolError, } from '../../utils/tool-error-handler.js';
4
- import { transformHtmlToMarkdownAsync } from '../utils/content-transform-async.js';
5
- import { applyInlineResultToStructuredContent, buildToolContentBlocks, getFileDownloadInfo, getInlineErrorResponse, performSharedFetch, } from './fetch-single.shared.js';
4
+ import { transformHtmlToMarkdown } from '../utils/content-transform.js';
5
+ import { buildToolContentBlocks, performSharedFetch, } from './fetch-single.shared.js';
6
6
  export const FETCH_MARKDOWN_TOOL_NAME = 'fetch-markdown';
7
- export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter and content length limits';
7
+ export const FETCH_MARKDOWN_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format with optional frontmatter';
8
8
  function isRecord(value) {
9
9
  return value !== null && typeof value === 'object';
10
10
  }
@@ -13,22 +13,18 @@ function deserializeMarkdownPipelineResult(cached) {
13
13
  const parsed = JSON.parse(cached);
14
14
  if (!isRecord(parsed))
15
15
  return undefined;
16
- const { content, markdown, title, truncated } = parsed;
16
+ const { content, markdown, title } = parsed;
17
17
  if (typeof content !== 'string')
18
18
  return undefined;
19
19
  if (typeof markdown !== 'string')
20
20
  return undefined;
21
21
  if (title !== undefined && typeof title !== 'string')
22
22
  return undefined;
23
- if (truncated !== undefined && typeof truncated !== 'boolean') {
24
- return undefined;
25
- }
26
- const resolvedTitle = typeof title === 'string' ? title : undefined;
27
23
  return {
28
24
  content,
29
25
  markdown,
30
- title: resolvedTitle,
31
- truncated: truncated ?? false,
26
+ title: typeof title === 'string' ? title : undefined,
27
+ truncated: false,
32
28
  };
33
29
  }
34
30
  catch {
@@ -37,35 +33,15 @@ function deserializeMarkdownPipelineResult(cached) {
37
33
  }
38
34
  function resolveMarkdownOptions(input) {
39
35
  return {
40
- extractMainContent: input.extractMainContent ?? config.extraction.extractMainContent,
41
36
  includeMetadata: input.includeMetadata ?? config.extraction.includeMetadata,
42
- ...(input.maxContentLength !== undefined && {
43
- maxContentLength: input.maxContentLength,
44
- }),
45
37
  };
46
38
  }
47
- function buildFetchMarkdownErrorDetails() {
39
+ function buildMarkdownStructuredContent(pipeline, inlineResult) {
48
40
  return {
49
- fetchedAt: new Date().toISOString(),
50
- cached: false,
51
- };
52
- }
53
- function buildMarkdownStructuredContent(pipeline, inlineResult, fileDownload) {
54
- const structuredContent = {
55
41
  url: pipeline.url,
56
42
  title: pipeline.data.title,
57
- fetchedAt: pipeline.fetchedAt,
58
- contentSize: inlineResult.contentSize,
59
- cached: pipeline.fromCache,
43
+ markdown: inlineResult.content,
60
44
  };
61
- if (pipeline.data.truncated || inlineResult.truncated) {
62
- structuredContent.truncated = true;
63
- }
64
- applyInlineResultToStructuredContent(structuredContent, inlineResult, 'markdown');
65
- if (fileDownload) {
66
- structuredContent.file = fileDownload;
67
- }
68
- return structuredContent;
69
45
  }
70
46
  function logFetchMarkdownStart(url, options) {
71
47
  logDebug('Fetching markdown', { url, ...options });
@@ -76,27 +52,18 @@ function buildMarkdownTransform(options, transform) {
76
52
  return { ...markdownResult, content: markdownResult.markdown };
77
53
  };
78
54
  }
79
- async function fetchMarkdownPipeline(url, input, options, transformOptions, performSharedFetchImpl, transformImpl) {
55
+ async function fetchMarkdownPipeline(url, options, performSharedFetchImpl, transformImpl) {
80
56
  const sharedOptions = {
81
57
  url,
82
58
  format: 'markdown',
83
- extractMainContent: options.extractMainContent,
84
59
  includeMetadata: options.includeMetadata,
85
- ...(options.maxContentLength !== undefined && {
86
- maxContentLength: options.maxContentLength,
87
- }),
88
- ...(input.customHeaders !== undefined && {
89
- customHeaders: input.customHeaders,
90
- }),
91
- ...(input.retries !== undefined && { retries: input.retries }),
92
- ...(input.timeout !== undefined && { timeout: input.timeout }),
93
- transform: buildMarkdownTransform(transformOptions, transformImpl),
60
+ transform: buildMarkdownTransform(options, transformImpl),
94
61
  deserialize: deserializeMarkdownPipelineResult,
95
62
  };
96
63
  return performSharedFetchImpl(sharedOptions);
97
64
  }
98
- function buildMarkdownResponse(pipeline, inlineResult, fileDownload) {
99
- const structuredContent = buildMarkdownStructuredContent(pipeline, inlineResult, fileDownload);
65
+ function buildMarkdownResponse(pipeline, inlineResult) {
66
+ const structuredContent = buildMarkdownStructuredContent(pipeline, inlineResult);
100
67
  return {
101
68
  content: buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, 'markdown', pipeline.url, pipeline.data.title),
102
69
  structuredContent,
@@ -104,46 +71,27 @@ function buildMarkdownResponse(pipeline, inlineResult, fileDownload) {
104
71
  }
105
72
  export function createFetchMarkdownToolHandler(deps = {}) {
106
73
  const performSharedFetchImpl = deps.performSharedFetch ?? performSharedFetch;
107
- const transformImpl = deps.transformHtmlToMarkdown ?? transformHtmlToMarkdownAsync;
74
+ const transformImpl = deps.transformHtmlToMarkdown ?? transformHtmlToMarkdown;
108
75
  return async (input) => {
109
76
  try {
110
77
  return await executeFetchMarkdown(input, performSharedFetchImpl, transformImpl);
111
78
  }
112
79
  catch (error) {
113
80
  logError('fetch-markdown tool error', error instanceof Error ? error : undefined);
114
- const errorDetails = buildFetchMarkdownErrorDetails();
115
- return handleToolError(error, input.url, 'Failed to fetch markdown', errorDetails);
81
+ return handleToolError(error, input.url, 'Failed to fetch markdown');
116
82
  }
117
83
  };
118
84
  }
119
- export const fetchMarkdownToolHandler = createFetchMarkdownToolHandler();
120
85
  async function executeFetchMarkdown(input, performSharedFetchImpl, transformImpl) {
121
86
  const { url } = input;
122
87
  if (!url) {
123
- return createToolErrorResponse('URL is required', '', 'VALIDATION_ERROR', buildFetchMarkdownErrorDetails());
88
+ return createToolErrorResponse('URL is required', '');
124
89
  }
125
90
  const options = resolveMarkdownOptions(input);
126
- const transformOptions = { ...options };
127
- logFetchMarkdownStart(url, transformOptions);
128
- const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, input, options, transformOptions, performSharedFetchImpl, transformImpl);
129
- const inlineError = getInlineErrorResponse(inlineResult, url, buildFetchMarkdownErrorDetails());
130
- if (inlineError)
131
- return inlineError;
132
- let fileDownload = null;
133
- if (inlineResult.resourceUri) {
134
- const downloadContext = {
135
- cacheKey: pipeline.cacheKey ?? null,
136
- url: pipeline.url,
137
- };
138
- if (pipeline.data.title !== undefined) {
139
- fileDownload = getFileDownloadInfo({
140
- ...downloadContext,
141
- title: pipeline.data.title,
142
- });
143
- }
144
- else {
145
- fileDownload = getFileDownloadInfo(downloadContext);
146
- }
91
+ logFetchMarkdownStart(url, options);
92
+ const { pipeline, inlineResult } = await fetchMarkdownPipeline(url, options, performSharedFetchImpl, transformImpl);
93
+ if (inlineResult.error) {
94
+ return createToolErrorResponse(inlineResult.error, url);
147
95
  }
148
- return buildMarkdownResponse(pipeline, inlineResult, fileDownload);
96
+ return buildMarkdownResponse(pipeline, inlineResult);
149
97
  }
@@ -1,21 +1,10 @@
1
1
  import type { PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
2
- import type { FileDownloadInfo, ToolResponseBase } from '../../config/types/tools.js';
3
2
  import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
4
3
  import { applyInlineContentLimit } from '../utils/inline-content.js';
5
- type SharedFetchFormat = 'jsonl' | 'markdown';
6
4
  interface SharedFetchOptions<T extends {
7
5
  content: string;
8
6
  }> {
9
7
  readonly url: string;
10
- readonly format: SharedFetchFormat;
11
- readonly extractMainContent: boolean;
12
- readonly includeMetadata: boolean;
13
- readonly maxContentLength?: number;
14
- readonly includeContentBlocks?: boolean;
15
- readonly cacheVariant?: string;
16
- readonly customHeaders?: Record<string, string>;
17
- readonly retries?: number;
18
- readonly timeout?: number;
19
8
  readonly transform: (html: string, normalizedUrl: string) => T | Promise<T>;
20
9
  readonly serialize?: (result: T) => string;
21
10
  readonly deserialize?: (cached: string) => T | undefined;
@@ -30,13 +19,5 @@ export declare function performSharedFetch<T extends {
30
19
  inlineResult: ReturnType<typeof applyInlineContentLimit>;
31
20
  }>;
32
21
  export type InlineResult = ReturnType<typeof applyInlineContentLimit>;
33
- interface DownloadContext {
34
- cacheKey: string | null;
35
- url: string;
36
- title?: string;
37
- }
38
- export declare function getFileDownloadInfo(context: DownloadContext): FileDownloadInfo | null;
39
- export declare function getInlineErrorResponse(inlineResult: InlineResult, url: string, details?: Record<string, unknown>): ToolResponseBase | null;
40
- export declare function applyInlineResultToStructuredContent(structuredContent: Record<string, unknown>, inlineResult: InlineResult, contentKey: string): void;
41
- export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, format?: SharedFetchFormat, url?: string, title?: string): ToolContentBlock[];
22
+ export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, url?: string, title?: string): ToolContentBlock[];
42
23
  export {};