@j0hanz/superfetch 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +139 -46
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +20 -8
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/auth.js +161 -2
  18. package/dist/http/base-middleware.d.ts +7 -0
  19. package/dist/http/base-middleware.js +143 -0
  20. package/dist/http/cors.d.ts +0 -5
  21. package/dist/http/cors.js +0 -6
  22. package/dist/http/download-routes.js +6 -2
  23. package/dist/http/error-handler.d.ts +2 -0
  24. package/dist/http/error-handler.js +55 -0
  25. package/dist/http/host-allowlist.d.ts +3 -0
  26. package/dist/http/host-allowlist.js +117 -0
  27. package/dist/http/mcp-routes.d.ts +8 -2
  28. package/dist/http/mcp-routes.js +101 -8
  29. package/dist/http/mcp-session-eviction.d.ts +3 -0
  30. package/dist/http/mcp-session-eviction.js +24 -0
  31. package/dist/http/mcp-session-init.d.ts +7 -0
  32. package/dist/http/mcp-session-init.js +94 -0
  33. package/dist/http/mcp-session-slots.d.ts +17 -0
  34. package/dist/http/mcp-session-slots.js +55 -0
  35. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  36. package/dist/http/mcp-session-transport-init.js +41 -0
  37. package/dist/http/mcp-session-types.d.ts +5 -0
  38. package/dist/http/mcp-session-types.js +1 -0
  39. package/dist/http/mcp-session.d.ts +9 -9
  40. package/dist/http/mcp-session.js +5 -114
  41. package/dist/http/mcp-sessions.d.ts +41 -0
  42. package/dist/http/mcp-sessions.js +392 -0
  43. package/dist/http/rate-limit.js +2 -2
  44. package/dist/http/server-middleware.d.ts +6 -1
  45. package/dist/http/server-middleware.js +3 -117
  46. package/dist/http/server-shutdown.js +1 -1
  47. package/dist/http/server-tuning.d.ts +9 -0
  48. package/dist/http/server-tuning.js +45 -0
  49. package/dist/http/server.js +206 -9
  50. package/dist/http/session-cleanup.js +8 -5
  51. package/dist/http.d.ts +78 -0
  52. package/dist/http.js +1437 -0
  53. package/dist/index.js +3 -3
  54. package/dist/mcp.d.ts +3 -0
  55. package/dist/mcp.js +94 -0
  56. package/dist/middleware/error-handler.d.ts +1 -1
  57. package/dist/middleware/error-handler.js +31 -30
  58. package/dist/observability.d.ts +16 -0
  59. package/dist/observability.js +78 -0
  60. package/dist/resources/cached-content-params.d.ts +5 -0
  61. package/dist/resources/cached-content-params.js +36 -0
  62. package/dist/resources/cached-content.js +33 -33
  63. package/dist/server.js +21 -6
  64. package/dist/services/cache-events.d.ts +8 -0
  65. package/dist/services/cache-events.js +19 -0
  66. package/dist/services/cache.d.ts +5 -4
  67. package/dist/services/cache.js +49 -45
  68. package/dist/services/context.d.ts +2 -0
  69. package/dist/services/context.js +3 -0
  70. package/dist/services/extractor.d.ts +1 -0
  71. package/dist/services/extractor.js +77 -40
  72. package/dist/services/fetcher/agents.js +1 -1
  73. package/dist/services/fetcher/dns-selection.js +1 -1
  74. package/dist/services/fetcher/interceptors.js +29 -60
  75. package/dist/services/fetcher/redirects.js +12 -4
  76. package/dist/services/fetcher/response.js +18 -8
  77. package/dist/services/fetcher.d.ts +23 -0
  78. package/dist/services/fetcher.js +553 -13
  79. package/dist/services/logger.js +4 -1
  80. package/dist/services/telemetry.d.ts +19 -0
  81. package/dist/services/telemetry.js +43 -0
  82. package/dist/services/transform-worker-pool.d.ts +10 -3
  83. package/dist/services/transform-worker-pool.js +213 -184
  84. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
  85. package/dist/tools/handlers/fetch-single.shared.js +131 -2
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
  87. package/dist/tools/handlers/fetch-url.tool.js +56 -12
  88. package/dist/tools/index.d.ts +1 -0
  89. package/dist/tools/index.js +13 -1
  90. package/dist/tools/schemas.d.ts +2 -0
  91. package/dist/tools/schemas.js +8 -0
  92. package/dist/tools/utils/content-shaping.js +19 -4
  93. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  94. package/dist/tools/utils/content-transform-core.js +180 -0
  95. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  96. package/dist/tools/utils/content-transform-workers.js +1 -0
  97. package/dist/tools/utils/content-transform.d.ts +2 -1
  98. package/dist/tools/utils/content-transform.js +37 -136
  99. package/dist/tools/utils/fetch-pipeline.js +47 -56
  100. package/dist/tools/utils/frontmatter.d.ts +3 -0
  101. package/dist/tools/utils/frontmatter.js +73 -0
  102. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  103. package/dist/tools/utils/markdown-heuristics.js +19 -0
  104. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  105. package/dist/tools/utils/markdown-signals.js +19 -0
  106. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  107. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  108. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  109. package/dist/tools/utils/raw-markdown.js +149 -0
  110. package/dist/tools.d.ts +104 -0
  111. package/dist/tools.js +421 -0
  112. package/dist/transform.d.ts +69 -0
  113. package/dist/transform.js +1509 -0
  114. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  115. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  116. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  117. package/dist/transformers/markdown/frontmatter.js +45 -0
  118. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  119. package/dist/transformers/markdown/noise-rule.js +80 -0
  120. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  121. package/dist/transformers/markdown/turndown-instance.js +19 -0
  122. package/dist/transformers/markdown.d.ts +5 -0
  123. package/dist/transformers/markdown.js +314 -0
  124. package/dist/transformers/markdown.transformer.js +2 -189
  125. package/dist/utils/cancellation.d.ts +1 -0
  126. package/dist/utils/cancellation.js +18 -0
  127. package/dist/utils/code-language-bash.d.ts +1 -0
  128. package/dist/utils/code-language-bash.js +48 -0
  129. package/dist/utils/code-language-core.d.ts +2 -0
  130. package/dist/utils/code-language-core.js +13 -0
  131. package/dist/utils/code-language-detectors.d.ts +5 -0
  132. package/dist/utils/code-language-detectors.js +142 -0
  133. package/dist/utils/code-language-helpers.d.ts +5 -0
  134. package/dist/utils/code-language-helpers.js +62 -0
  135. package/dist/utils/code-language-parsing.d.ts +5 -0
  136. package/dist/utils/code-language-parsing.js +62 -0
  137. package/dist/utils/code-language.js +250 -46
  138. package/dist/utils/error-details.d.ts +3 -0
  139. package/dist/utils/error-details.js +12 -0
  140. package/dist/utils/filename-generator.js +14 -3
  141. package/dist/utils/host-normalizer.d.ts +1 -0
  142. package/dist/utils/host-normalizer.js +37 -0
  143. package/dist/utils/ip-address.d.ts +4 -0
  144. package/dist/utils/ip-address.js +6 -0
  145. package/dist/utils/tool-error-handler.js +12 -17
  146. package/dist/utils/url-redactor.d.ts +1 -0
  147. package/dist/utils/url-redactor.js +13 -0
  148. package/dist/utils/url-validator.js +35 -20
  149. package/dist/workers/transform-worker.js +82 -38
  150. package/package.json +13 -10
@@ -1,215 +1,244 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import os from 'node:os';
1
3
  import { Worker } from 'node:worker_threads';
2
- import { config } from '../config/index.js';
3
- import { getErrorMessage } from '../utils/error-utils.js';
4
- import { logWarn } from './logger.js';
5
- import { isWorkerResponse } from './transform-worker-types.js';
6
- class TransformWorkerPool {
7
- workerUrl;
8
- slots = [];
4
+ import { FetchError } from '../errors/app-error.js';
5
+ import { getErrorMessage } from '../utils/error-details.js';
6
+ let pool = null;
7
+ function resolveDefaultWorkerCount() {
8
+ const parallelism = typeof os.availableParallelism === 'function'
9
+ ? os.availableParallelism()
10
+ : os.cpus().length;
11
+ // Leave 1 core for the event loop; cap to avoid runaway memory.
12
+ return Math.min(16, Math.max(1, parallelism - 1));
13
+ }
14
+ const DEFAULT_TIMEOUT_MS = 30000;
15
+ export function getOrCreateTransformWorkerPool() {
16
+ pool ??= new WorkerPool(resolveDefaultWorkerCount(), DEFAULT_TIMEOUT_MS);
17
+ return pool;
18
+ }
19
+ export async function shutdownTransformWorkerPool() {
20
+ if (!pool)
21
+ return;
22
+ await pool.close();
23
+ pool = null;
24
+ }
25
+ class WorkerPool {
26
+ workers = [];
9
27
  queue = [];
10
- nextId = 1;
11
- destroyed = false;
12
- constructor(workerUrl, size) {
13
- this.workerUrl = workerUrl;
14
- for (let i = 0; i < size; i += 1) {
15
- this.slots.push(this.spawnWorker());
16
- }
17
- }
18
- run(request, signal) {
19
- if (this.destroyed) {
20
- return Promise.reject(new Error('Worker pool is shut down'));
21
- }
22
- return new Promise((resolve, reject) => {
23
- if (signal?.aborted) {
24
- reject(new Error('Aborted'));
25
- return;
26
- }
27
- const task = this.createTask(request, resolve, reject, signal);
28
- this.attachAbortHandler(task, signal);
29
- this.enqueueTask(task);
30
- });
31
- }
32
- async destroy() {
33
- if (this.destroyed)
34
- return;
35
- this.destroyed = true;
36
- const pending = this.queue.splice(0);
37
- for (const task of pending) {
38
- this.cleanupTask(task);
39
- task.reject(new Error('Worker pool shutting down'));
40
- }
41
- for (const slot of this.slots) {
42
- if (slot.current) {
43
- const task = slot.current;
44
- slot.current = undefined;
45
- slot.busy = false;
46
- this.cleanupTask(task);
47
- task.reject(new Error('Worker pool shutting down'));
48
- }
49
- }
50
- await Promise.allSettled(this.slots.map((slot) => slot.worker.terminate()));
51
- this.slots = [];
52
- }
53
- dispatch() {
54
- if (this.destroyed)
55
- return;
56
- const idle = this.slots.find((slot) => !slot.busy);
57
- if (!idle)
58
- return;
59
- const task = this.queue.shift();
60
- if (!task)
61
- return;
62
- task.status = 'running';
63
- idle.busy = true;
64
- idle.current = task;
65
- try {
66
- idle.worker.postMessage(task.request);
67
- }
68
- catch (error) {
69
- this.failTask(idle, error);
28
+ inflight = new Map();
29
+ timeoutMs;
30
+ queueMax;
31
+ closed = false;
32
+ constructor(size, timeoutMs) {
33
+ const safeSize = Math.max(1, size);
34
+ this.timeoutMs = timeoutMs;
35
+ this.queueMax = safeSize * 2;
36
+ for (let index = 0; index < safeSize; index += 1) {
37
+ this.workers.push(this.spawnWorker(index));
70
38
  }
71
39
  }
72
- createTask(request, resolve, reject, signal) {
73
- const id = this.nextId;
74
- this.nextId += 1;
75
- return {
76
- id,
77
- request: { ...request, id },
78
- resolve,
79
- reject,
80
- signal,
81
- abortHandler: undefined,
82
- status: 'queued',
83
- };
84
- }
85
- attachAbortHandler(task, signal) {
86
- if (!signal)
87
- return;
88
- const onAbort = () => {
89
- if (task.status === 'queued') {
90
- this.removeQueuedTask(task);
91
- task.reject(new Error('Aborted'));
92
- return;
93
- }
94
- this.abortRunningTask(task);
40
+ spawnWorker(workerIndex) {
41
+ const worker = new Worker(new URL('../workers/transform-worker.js', import.meta.url));
42
+ // Workers must not keep the process alive by themselves.
43
+ worker.unref();
44
+ const slot = {
45
+ worker,
46
+ busy: false,
47
+ currentTaskId: null,
95
48
  };
96
- task.abortHandler = onAbort;
97
- signal.addEventListener('abort', onAbort, { once: true });
98
- }
99
- enqueueTask(task) {
100
- this.queue.push(task);
101
- this.dispatch();
102
- }
103
- attachWorker(slot) {
104
- slot.worker.on('message', (message) => {
105
- this.handleMessage(slot, message);
49
+ worker.on('message', (raw) => {
50
+ this.onWorkerMessage(workerIndex, raw);
106
51
  });
107
- slot.worker.on('error', (error) => {
108
- this.handleWorkerFailure(slot, error);
52
+ worker.on('error', (error) => {
53
+ this.onWorkerBroken(workerIndex, `Transform worker error: ${getErrorMessage(error)}`);
109
54
  });
110
- slot.worker.on('exit', (code) => {
111
- if (code !== 0) {
112
- this.handleWorkerFailure(slot, new Error(`Worker exited with code ${code}`));
113
- }
55
+ worker.on('exit', (code) => {
56
+ this.onWorkerBroken(workerIndex, `Transform worker exited (code ${code})`);
114
57
  });
115
- }
116
- spawnWorker() {
117
- const slot = {
118
- worker: new Worker(this.workerUrl),
119
- busy: false,
120
- current: undefined,
121
- };
122
- this.attachWorker(slot);
123
58
  return slot;
124
59
  }
125
- handleMessage(slot, message) {
126
- const task = slot.current;
127
- if (!task)
60
+ onWorkerBroken(workerIndex, message) {
61
+ if (this.closed)
128
62
  return;
129
- if (!isWorkerResponse(message) || message.id !== task.id) {
130
- this.handleWorkerFailure(slot, new Error('Unexpected worker response'));
63
+ const slot = this.workers[workerIndex];
64
+ if (!slot)
65
+ return;
66
+ if (slot.busy && slot.currentTaskId) {
67
+ this.failTask(slot.currentTaskId, new Error(message));
68
+ }
69
+ void slot.worker.terminate();
70
+ this.workers[workerIndex] = this.spawnWorker(workerIndex);
71
+ this.drainQueue();
72
+ }
73
+ onWorkerMessage(workerIndex, raw) {
74
+ if (!raw ||
75
+ typeof raw !== 'object' ||
76
+ !('type' in raw) ||
77
+ !('id' in raw) ||
78
+ typeof raw.id !== 'string' ||
79
+ typeof raw.type !== 'string') {
80
+ return;
81
+ }
82
+ const message = raw;
83
+ const inflight = this.inflight.get(message.id);
84
+ if (!inflight)
131
85
  return;
86
+ clearTimeout(inflight.timer);
87
+ if (inflight.signal && inflight.abortListener) {
88
+ inflight.signal.removeEventListener('abort', inflight.abortListener);
132
89
  }
133
- slot.current = undefined;
134
- slot.busy = false;
135
- this.cleanupTask(task);
136
- if (message.ok) {
137
- task.resolve(message.result);
90
+ this.inflight.delete(message.id);
91
+ const slot = this.workers[workerIndex];
92
+ if (slot) {
93
+ slot.busy = false;
94
+ slot.currentTaskId = null;
95
+ }
96
+ if (message.type === 'result') {
97
+ inflight.resolve(message.result);
138
98
  }
139
99
  else {
140
- task.reject(new Error(message.error));
100
+ const { error } = message;
101
+ if (error.name === 'FetchError') {
102
+ inflight.reject(new FetchError(error.message, error.url, error.statusCode, error.details ?? {}));
103
+ }
104
+ else {
105
+ inflight.reject(new Error(error.message));
106
+ }
141
107
  }
142
- this.dispatch();
108
+ this.drainQueue();
143
109
  }
144
- handleWorkerFailure(slot, error) {
145
- const task = slot.current;
146
- if (task) {
147
- slot.current = undefined;
110
+ failTask(id, error) {
111
+ const inflight = this.inflight.get(id);
112
+ if (!inflight)
113
+ return;
114
+ clearTimeout(inflight.timer);
115
+ if (inflight.signal && inflight.abortListener) {
116
+ inflight.signal.removeEventListener('abort', inflight.abortListener);
117
+ }
118
+ this.inflight.delete(id);
119
+ inflight.reject(error);
120
+ const slot = this.workers[inflight.workerIndex];
121
+ if (slot) {
148
122
  slot.busy = false;
149
- this.cleanupTask(task);
150
- task.reject(error instanceof Error ? error : new Error(getErrorMessage(error)));
123
+ slot.currentTaskId = null;
151
124
  }
152
- logWarn('Worker thread failure', {
153
- error: getErrorMessage(error),
154
- });
155
- this.replaceWorker(slot);
156
- this.dispatch();
157
125
  }
158
- replaceWorker(slot) {
159
- try {
160
- void slot.worker.terminate();
126
+ async transform(html, url, options) {
127
+ if (this.closed) {
128
+ throw new Error('Transform worker pool closed');
161
129
  }
162
- catch {
163
- // Best-effort cleanup.
130
+ if (this.queue.length >= this.queueMax) {
131
+ throw new Error('Transform worker queue is full');
164
132
  }
165
- slot.worker = new Worker(this.workerUrl);
166
- slot.busy = false;
167
- slot.current = undefined;
168
- this.attachWorker(slot);
133
+ return new Promise((resolve, reject) => {
134
+ this.queue.push({
135
+ id: randomUUID(),
136
+ html,
137
+ url,
138
+ includeMetadata: options.includeMetadata,
139
+ signal: options.signal,
140
+ resolve,
141
+ reject,
142
+ });
143
+ this.drainQueue();
144
+ });
169
145
  }
170
- failTask(slot, error) {
171
- const task = slot.current;
172
- if (!task)
146
+ drainQueue() {
147
+ if (this.queue.length === 0)
173
148
  return;
174
- slot.current = undefined;
175
- slot.busy = false;
176
- this.cleanupTask(task);
177
- task.reject(error instanceof Error ? error : new Error(String(error)));
178
- this.dispatch();
149
+ for (let workerIndex = 0; workerIndex < this.workers.length; workerIndex += 1) {
150
+ const slot = this.workers[workerIndex];
151
+ if (!slot || slot.busy)
152
+ continue;
153
+ const task = this.queue.shift();
154
+ if (!task)
155
+ return;
156
+ this.dispatch(workerIndex, slot, task);
157
+ if (this.queue.length === 0)
158
+ return;
159
+ }
179
160
  }
180
- abortRunningTask(task) {
181
- const slot = this.slots.find((s) => s.current?.id === task.id);
182
- if (!slot)
161
+ dispatch(workerIndex, slot, task) {
162
+ if (task.signal?.aborted) {
163
+ task.reject(new FetchError('Request was canceled', task.url, 499, {
164
+ reason: 'aborted',
165
+ stage: 'transform:dispatch',
166
+ }));
183
167
  return;
184
- this.handleWorkerFailure(slot, new Error('Aborted'));
185
- }
186
- removeQueuedTask(task) {
187
- const index = this.queue.findIndex((queued) => queued.id === task.id);
188
- if (index >= 0) {
189
- this.queue.splice(index, 1);
190
168
  }
191
- this.cleanupTask(task);
169
+ slot.busy = true;
170
+ slot.currentTaskId = task.id;
171
+ const timer = setTimeout(() => {
172
+ try {
173
+ slot.worker.postMessage({ type: 'cancel', id: task.id });
174
+ }
175
+ catch {
176
+ // ignore
177
+ }
178
+ const inflight = this.inflight.get(task.id);
179
+ if (!inflight)
180
+ return;
181
+ clearTimeout(inflight.timer);
182
+ if (inflight.signal && inflight.abortListener) {
183
+ inflight.signal.removeEventListener('abort', inflight.abortListener);
184
+ }
185
+ this.inflight.delete(task.id);
186
+ inflight.reject(new FetchError('Request timeout', task.url, 504, {
187
+ reason: 'timeout',
188
+ stage: 'transform:worker-timeout',
189
+ }));
190
+ if (!this.closed) {
191
+ void slot.worker.terminate();
192
+ this.workers[workerIndex] = this.spawnWorker(workerIndex);
193
+ this.drainQueue();
194
+ }
195
+ }, this.timeoutMs).unref();
196
+ let abortListener;
197
+ if (task.signal) {
198
+ abortListener = () => {
199
+ try {
200
+ slot.worker.postMessage({ type: 'cancel', id: task.id });
201
+ }
202
+ catch {
203
+ // ignore
204
+ }
205
+ };
206
+ task.signal.addEventListener('abort', abortListener, { once: true });
207
+ }
208
+ this.inflight.set(task.id, {
209
+ resolve: task.resolve,
210
+ reject: task.reject,
211
+ timer,
212
+ signal: task.signal,
213
+ abortListener,
214
+ workerIndex,
215
+ });
216
+ slot.worker.postMessage({
217
+ type: 'transform',
218
+ id: task.id,
219
+ html: task.html,
220
+ url: task.url,
221
+ includeMetadata: task.includeMetadata,
222
+ });
192
223
  }
193
- cleanupTask(task) {
194
- if (task.signal && task.abortHandler) {
195
- task.signal.removeEventListener('abort', task.abortHandler);
224
+ async close() {
225
+ if (this.closed)
226
+ return;
227
+ this.closed = true;
228
+ const terminations = this.workers.map((slot) => slot.worker.terminate());
229
+ this.workers.length = 0;
230
+ for (const [id, inflight] of this.inflight.entries()) {
231
+ clearTimeout(inflight.timer);
232
+ if (inflight.signal && inflight.abortListener) {
233
+ inflight.signal.removeEventListener('abort', inflight.abortListener);
234
+ }
235
+ inflight.reject(new Error('Transform worker pool closed'));
236
+ this.inflight.delete(id);
196
237
  }
238
+ for (const task of this.queue) {
239
+ task.reject(new Error('Transform worker pool closed'));
240
+ }
241
+ this.queue.length = 0;
242
+ await Promise.allSettled(terminations);
197
243
  }
198
244
  }
199
- let pool = null;
200
- function getPool() {
201
- if (pool)
202
- return pool;
203
- pool = new TransformWorkerPool(new URL('../workers/content-transform.worker.js', import.meta.url), config.workers.poolSize);
204
- return pool;
205
- }
206
- export async function transformInWorker(request, signal) {
207
- return getPool().run(request, signal);
208
- }
209
- export async function destroyTransformWorkers() {
210
- if (!pool)
211
- return;
212
- const current = pool;
213
- pool = null;
214
- await current.destroy();
215
- }
@@ -1,6 +1,4 @@
1
- import type { PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
2
- import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
3
- import { applyInlineContentLimit } from '../utils/inline-content.js';
1
+ import type { FetchPipelineOptions, PipelineResult, ToolContentBlock } from '../../config/types/runtime.js';
4
2
  interface SharedFetchOptions<T extends {
5
3
  content: string;
6
4
  }> {
@@ -20,4 +18,14 @@ export declare function performSharedFetch<T extends {
20
18
  }>;
21
19
  export type InlineResult = ReturnType<typeof applyInlineContentLimit>;
22
20
  export declare function buildToolContentBlocks(structuredContent: Record<string, unknown>, fromCache: boolean, inlineResult: InlineResult, resourceName: string, cacheKey?: string | null, fullContent?: string, url?: string, title?: string): ToolContentBlock[];
21
+ interface InlineContentResult {
22
+ content?: string;
23
+ contentSize: number;
24
+ resourceUri?: string;
25
+ resourceMimeType?: string;
26
+ error?: string;
27
+ truncated?: boolean;
28
+ }
29
+ declare function applyInlineContentLimit(content: string, cacheKey: string | null): InlineContentResult;
30
+ export declare function executeFetchPipeline<T>(options: FetchPipelineOptions<T>): Promise<PipelineResult<T>>;
23
31
  export {};
@@ -1,7 +1,13 @@
1
+ import { TRUNCATION_MARKER } from '../../config/formatting.js';
1
2
  import { config } from '../../config/index.js';
3
+ import * as cache from '../../services/cache.js';
4
+ import { createCacheKey, toResourceUri } from '../../services/cache-keys.js';
5
+ import { fetchNormalizedUrl } from '../../services/fetcher.js';
6
+ import { logDebug } from '../../services/logger.js';
2
7
  import { generateSafeFilename } from '../../utils/filename-generator.js';
3
- import { executeFetchPipeline } from '../utils/fetch-pipeline.js';
4
- import { applyInlineContentLimit } from '../utils/inline-content.js';
8
+ import { isRecord } from '../../utils/guards.js';
9
+ import { transformToRawUrl } from '../../utils/url-transformer.js';
10
+ import { normalizeUrl } from '../../utils/url-validator.js';
5
11
  function applyOptionalPipelineSerialization(pipelineOptions, options) {
6
12
  if (options.serialize !== undefined) {
7
13
  pipelineOptions.serialize = options.serialize;
@@ -92,3 +98,126 @@ export function buildToolContentBlocks(structuredContent, fromCache, inlineResul
92
98
  maybeAppendResourceLink(blocks, inlineResult, resourceName);
93
99
  return blocks;
94
100
  }
101
+ function applyInlineContentLimit(content, cacheKey) {
102
+ const contentSize = content.length;
103
+ const inlineLimit = config.constants.maxInlineContentChars;
104
+ if (contentSize <= inlineLimit) {
105
+ return { content, contentSize };
106
+ }
107
+ const resourceUri = resolveResourceUri(cacheKey);
108
+ if (!resourceUri) {
109
+ return buildTruncatedFallback(content, contentSize, inlineLimit);
110
+ }
111
+ return {
112
+ contentSize,
113
+ resourceUri,
114
+ resourceMimeType: 'text/markdown',
115
+ };
116
+ }
117
+ function resolveResourceUri(cacheKey) {
118
+ if (!config.cache.enabled || !cacheKey)
119
+ return null;
120
+ return toResourceUri(cacheKey);
121
+ }
122
+ function buildTruncatedFallback(content, contentSize, inlineLimit) {
123
+ const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
124
+ const truncatedContent = content.length > inlineLimit
125
+ ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
126
+ : content;
127
+ return {
128
+ content: truncatedContent,
129
+ contentSize,
130
+ truncated: true,
131
+ };
132
+ }
133
+ function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
134
+ if (!cacheKey)
135
+ return null;
136
+ const cached = cache.get(cacheKey);
137
+ if (!cached)
138
+ return null;
139
+ if (!deserialize) {
140
+ logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
141
+ return null;
142
+ }
143
+ const data = deserialize(cached.content);
144
+ if (data === undefined) {
145
+ logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
146
+ return null;
147
+ }
148
+ logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
149
+ return {
150
+ data,
151
+ fromCache: true,
152
+ url: normalizedUrl,
153
+ fetchedAt: cached.fetchedAt,
154
+ cacheKey,
155
+ };
156
+ }
157
+ function resolveNormalizedUrl(url) {
158
+ const { normalizedUrl: validatedUrl } = normalizeUrl(url);
159
+ const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
160
+ return { normalizedUrl, originalUrl: validatedUrl, transformed };
161
+ }
162
+ export async function executeFetchPipeline(options) {
163
+ const resolvedUrl = resolveNormalizedUrl(options.url);
164
+ logRawUrlTransformation(resolvedUrl);
165
+ const cacheKey = createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
166
+ const cachedResult = attemptCacheRetrieval({
167
+ cacheKey,
168
+ deserialize: options.deserialize,
169
+ cacheNamespace: options.cacheNamespace,
170
+ normalizedUrl: resolvedUrl.normalizedUrl,
171
+ });
172
+ if (cachedResult)
173
+ return cachedResult;
174
+ logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
175
+ const fetchOptions = options.signal === undefined ? {} : { signal: options.signal };
176
+ const html = await fetchNormalizedUrl(resolvedUrl.normalizedUrl, fetchOptions);
177
+ const data = await options.transform(html, resolvedUrl.normalizedUrl);
178
+ if (cache.isEnabled()) {
179
+ persistCache({
180
+ cacheKey,
181
+ data,
182
+ serialize: options.serialize,
183
+ normalizedUrl: resolvedUrl.normalizedUrl,
184
+ });
185
+ }
186
+ return {
187
+ data,
188
+ fromCache: false,
189
+ url: resolvedUrl.normalizedUrl,
190
+ fetchedAt: new Date().toISOString(),
191
+ cacheKey,
192
+ };
193
+ }
194
+ function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
195
+ if (!cacheKey)
196
+ return;
197
+ const serializer = serialize ?? JSON.stringify;
198
+ const title = extractTitle(data);
199
+ const metadata = {
200
+ url: normalizedUrl,
201
+ ...(title === undefined ? {} : { title }),
202
+ };
203
+ cache.set(cacheKey, serializer(data), metadata);
204
+ }
205
+ function extractTitle(value) {
206
+ if (!isRecord(value))
207
+ return undefined;
208
+ const { title } = value;
209
+ return typeof title === 'string' ? title : undefined;
210
+ }
211
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
212
+ logDebug(`Cache miss due to ${reason}`, {
213
+ namespace: cacheNamespace,
214
+ url: normalizedUrl,
215
+ });
216
+ }
217
+ function logRawUrlTransformation(resolvedUrl) {
218
+ if (!resolvedUrl.transformed)
219
+ return;
220
+ logDebug('Using transformed raw content URL', {
221
+ original: resolvedUrl.originalUrl,
222
+ });
223
+ }
@@ -1,4 +1,10 @@
1
+ import type { MarkdownTransformResult } from '../../config/types/content.js';
1
2
  import type { FetchUrlInput, ToolResponseBase } from '../../config/types/tools.js';
2
3
  export declare const FETCH_URL_TOOL_NAME = "fetch-url";
3
4
  export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
5
+ type MarkdownPipelineResult = MarkdownTransformResult & {
6
+ readonly content: string;
7
+ };
8
+ export declare function parseCachedMarkdownResult(cached: string): MarkdownPipelineResult | undefined;
4
9
  export declare function fetchUrlToolHandler(input: FetchUrlInput): Promise<ToolResponseBase>;
10
+ export {};