@j0hanz/fetch-url-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/cache.d.ts +9 -3
  2. package/dist/cache.d.ts.map +1 -0
  3. package/dist/cache.js +44 -110
  4. package/dist/cache.js.map +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +9 -4
  8. package/dist/cli.js.map +1 -0
  9. package/dist/config.d.ts +2 -3
  10. package/dist/config.d.ts.map +1 -0
  11. package/dist/config.js +18 -25
  12. package/dist/config.js.map +1 -0
  13. package/dist/crypto.d.ts +1 -0
  14. package/dist/crypto.d.ts.map +1 -0
  15. package/dist/crypto.js +1 -0
  16. package/dist/crypto.js.map +1 -0
  17. package/dist/dom-noise-removal.d.ts +2 -1
  18. package/dist/dom-noise-removal.d.ts.map +1 -0
  19. package/dist/dom-noise-removal.js +8 -4
  20. package/dist/dom-noise-removal.js.map +1 -0
  21. package/dist/download.d.ts +4 -0
  22. package/dist/download.d.ts.map +1 -0
  23. package/dist/download.js +106 -0
  24. package/dist/download.js.map +1 -0
  25. package/dist/errors.d.ts +1 -0
  26. package/dist/errors.d.ts.map +1 -0
  27. package/dist/errors.js +1 -0
  28. package/dist/errors.js.map +1 -0
  29. package/dist/examples/mcp-fetch-url-client.js +19 -3
  30. package/dist/examples/mcp-fetch-url-client.js.map +1 -1
  31. package/dist/fetch-content.d.ts +1 -0
  32. package/dist/fetch-content.d.ts.map +1 -0
  33. package/dist/fetch-content.js +14 -14
  34. package/dist/fetch-content.js.map +1 -0
  35. package/dist/fetch-stream.d.ts +1 -0
  36. package/dist/fetch-stream.d.ts.map +1 -0
  37. package/dist/fetch-stream.js +6 -3
  38. package/dist/fetch-stream.js.map +1 -0
  39. package/dist/fetch.d.ts +1 -0
  40. package/dist/fetch.d.ts.map +1 -0
  41. package/dist/fetch.js +120 -51
  42. package/dist/fetch.js.map +1 -0
  43. package/dist/host-normalization.d.ts +1 -0
  44. package/dist/host-normalization.d.ts.map +1 -0
  45. package/dist/host-normalization.js +19 -6
  46. package/dist/host-normalization.js.map +1 -0
  47. package/dist/http/auth.d.ts +35 -0
  48. package/dist/http/auth.d.ts.map +1 -0
  49. package/dist/http/auth.js +283 -0
  50. package/dist/http/auth.js.map +1 -0
  51. package/dist/http/health.d.ts +7 -0
  52. package/dist/http/health.d.ts.map +1 -0
  53. package/dist/http/health.js +166 -0
  54. package/dist/http/health.js.map +1 -0
  55. package/dist/http/helpers.d.ts +58 -0
  56. package/dist/http/helpers.d.ts.map +1 -0
  57. package/dist/http/helpers.js +372 -0
  58. package/dist/http/helpers.js.map +1 -0
  59. package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
  60. package/dist/http/native.d.ts.map +1 -0
  61. package/dist/http/native.js +529 -0
  62. package/dist/http/native.js.map +1 -0
  63. package/dist/http/rate-limit.d.ts +13 -0
  64. package/dist/http/rate-limit.d.ts.map +1 -0
  65. package/dist/http/rate-limit.js +81 -0
  66. package/dist/http/rate-limit.js.map +1 -0
  67. package/dist/index.d.ts +1 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +2 -1
  70. package/dist/index.js.map +1 -0
  71. package/dist/instructions.d.ts +2 -0
  72. package/dist/instructions.d.ts.map +1 -0
  73. package/dist/instructions.js +108 -0
  74. package/dist/instructions.js.map +1 -0
  75. package/dist/ip-blocklist.d.ts +1 -0
  76. package/dist/ip-blocklist.d.ts.map +1 -0
  77. package/dist/ip-blocklist.js +2 -0
  78. package/dist/ip-blocklist.js.map +1 -0
  79. package/dist/json.d.ts +2 -1
  80. package/dist/json.d.ts.map +1 -0
  81. package/dist/json.js +19 -6
  82. package/dist/json.js.map +1 -0
  83. package/dist/language-detection.d.ts +1 -0
  84. package/dist/language-detection.d.ts.map +1 -0
  85. package/dist/language-detection.js +1 -0
  86. package/dist/language-detection.js.map +1 -0
  87. package/dist/markdown-cleanup.d.ts +2 -1
  88. package/dist/markdown-cleanup.d.ts.map +1 -0
  89. package/dist/markdown-cleanup.js +51 -52
  90. package/dist/markdown-cleanup.js.map +1 -0
  91. package/dist/mcp-validator.d.ts +1 -0
  92. package/dist/mcp-validator.d.ts.map +1 -0
  93. package/dist/mcp-validator.js +16 -8
  94. package/dist/mcp-validator.js.map +1 -0
  95. package/dist/mcp.d.ts +2 -2
  96. package/dist/mcp.d.ts.map +1 -0
  97. package/dist/mcp.js +17 -333
  98. package/dist/mcp.js.map +1 -0
  99. package/dist/observability.d.ts +2 -0
  100. package/dist/observability.d.ts.map +1 -0
  101. package/dist/observability.js +30 -5
  102. package/dist/observability.js.map +1 -0
  103. package/dist/prompts.d.ts +1 -0
  104. package/dist/prompts.d.ts.map +1 -0
  105. package/dist/prompts.js +15 -3
  106. package/dist/prompts.js.map +1 -0
  107. package/dist/resources.d.ts +1 -0
  108. package/dist/resources.d.ts.map +1 -0
  109. package/dist/resources.js +30 -23
  110. package/dist/resources.js.map +1 -0
  111. package/dist/server-tuning.d.ts +1 -0
  112. package/dist/server-tuning.d.ts.map +1 -0
  113. package/dist/server-tuning.js +11 -15
  114. package/dist/server-tuning.js.map +1 -0
  115. package/dist/server.d.ts +1 -0
  116. package/dist/server.d.ts.map +1 -0
  117. package/dist/server.js +23 -23
  118. package/dist/server.js.map +1 -0
  119. package/dist/session.d.ts +1 -0
  120. package/dist/session.d.ts.map +1 -0
  121. package/dist/session.js +55 -28
  122. package/dist/session.js.map +1 -0
  123. package/dist/tasks/execution.d.ts +42 -0
  124. package/dist/tasks/execution.d.ts.map +1 -0
  125. package/dist/tasks/execution.js +232 -0
  126. package/dist/tasks/execution.js.map +1 -0
  127. package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
  128. package/dist/tasks/manager.d.ts.map +1 -0
  129. package/dist/{tasks.js → tasks/manager.js} +86 -37
  130. package/dist/tasks/manager.js.map +1 -0
  131. package/dist/tasks/owner.d.ts +33 -0
  132. package/dist/tasks/owner.d.ts.map +1 -0
  133. package/dist/tasks/owner.js +99 -0
  134. package/dist/tasks/owner.js.map +1 -0
  135. package/dist/timer-utils.d.ts +1 -0
  136. package/dist/timer-utils.d.ts.map +1 -0
  137. package/dist/timer-utils.js +12 -5
  138. package/dist/timer-utils.js.map +1 -0
  139. package/dist/tool-errors.d.ts +12 -0
  140. package/dist/tool-errors.d.ts.map +1 -0
  141. package/dist/tool-errors.js +52 -0
  142. package/dist/tool-errors.js.map +1 -0
  143. package/dist/tool-pipeline.d.ts +72 -0
  144. package/dist/tool-pipeline.d.ts.map +1 -0
  145. package/dist/tool-pipeline.js +407 -0
  146. package/dist/tool-pipeline.js.map +1 -0
  147. package/dist/tool-progress.d.ts +32 -0
  148. package/dist/tool-progress.d.ts.map +1 -0
  149. package/dist/tool-progress.js +123 -0
  150. package/dist/tool-progress.js.map +1 -0
  151. package/dist/tools.d.ts +35 -111
  152. package/dist/tools.d.ts.map +1 -0
  153. package/dist/tools.js +93 -566
  154. package/dist/tools.js.map +1 -0
  155. package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
  156. package/dist/transform/transform.d.ts.map +1 -0
  157. package/dist/{transform.js → transform/transform.js} +73 -769
  158. package/dist/transform/transform.js.map +1 -0
  159. package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
  160. package/dist/transform/types.d.ts.map +1 -0
  161. package/dist/{transform-types.js → transform/types.js} +1 -0
  162. package/dist/transform/types.js.map +1 -0
  163. package/dist/transform/worker-pool.d.ts +93 -0
  164. package/dist/transform/worker-pool.d.ts.map +1 -0
  165. package/dist/transform/worker-pool.js +759 -0
  166. package/dist/transform/worker-pool.js.map +1 -0
  167. package/dist/transform/workers/transform-child.d.ts +2 -0
  168. package/dist/transform/workers/transform-child.d.ts.map +1 -0
  169. package/dist/{workers → transform/workers}/transform-child.js +3 -1
  170. package/dist/transform/workers/transform-child.js.map +1 -0
  171. package/dist/transform/workers/transform-worker.d.ts +2 -0
  172. package/dist/transform/workers/transform-worker.d.ts.map +1 -0
  173. package/dist/{workers → transform/workers}/transform-worker.js +2 -1
  174. package/dist/transform/workers/transform-worker.js.map +1 -0
  175. package/dist/type-guards.d.ts +1 -0
  176. package/dist/type-guards.d.ts.map +1 -0
  177. package/dist/type-guards.js +1 -0
  178. package/dist/type-guards.js.map +1 -0
  179. package/package.json +6 -7
  180. package/dist/AGENTS.md +0 -152
  181. package/dist/http-native.js +0 -1320
  182. package/dist/instructions.md +0 -113
  183. package/dist/workers/transform-child.d.ts +0 -1
  184. package/dist/workers/transform-worker.d.ts +0 -1
@@ -0,0 +1,759 @@
1
+ import { AsyncLocalStorage, AsyncResource } from 'node:async_hooks';
2
+ import { Buffer } from 'node:buffer';
3
+ import { fork } from 'node:child_process';
4
+ import { availableParallelism } from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ import { isSharedArrayBuffer } from 'node:util/types';
7
+ import { Worker, } from 'node:worker_threads';
8
+ import { config } from '../config.js';
9
+ import { FetchError, getErrorMessage } from '../errors.js';
10
+ import { logWarn } from '../observability.js';
11
+ import { createUnrefTimeout } from '../timer-utils.js';
12
+ import { isObject } from '../type-guards.js';
13
+ // ---------------------------------------------------------------------------
14
+ // Abort helper (inlined to avoid circular dependency with transform.ts)
15
+ // ---------------------------------------------------------------------------
16
+ function createAbortError(url, stage) {
17
+ return new FetchError('Request was canceled', url, 499, {
18
+ reason: 'aborted',
19
+ stage,
20
+ });
21
+ }
22
+ // ---------------------------------------------------------------------------
23
+ // Worker message validation
24
+ // ---------------------------------------------------------------------------
25
+ function isWorkerResultPayload(value) {
26
+ if (!isObject(value))
27
+ return false;
28
+ const { markdown, metadata, title, truncated } = value;
29
+ const isMetadataObject = metadata === undefined || isObject(metadata);
30
+ if (!isMetadataObject)
31
+ return false;
32
+ if (metadata && !isExtractedMetadataPayload(metadata)) {
33
+ return false;
34
+ }
35
+ return (typeof markdown === 'string' &&
36
+ typeof truncated === 'boolean' &&
37
+ (title === undefined || typeof title === 'string'));
38
+ }
39
+ function isExtractedMetadataPayload(value) {
40
+ if (!isObject(value))
41
+ return false;
42
+ const { author, description, favicon, image, modifiedAt, publishedAt, title, } = value;
43
+ return ((title === undefined || typeof title === 'string') &&
44
+ (description === undefined || typeof description === 'string') &&
45
+ (author === undefined || typeof author === 'string') &&
46
+ (image === undefined || typeof image === 'string') &&
47
+ (favicon === undefined || typeof favicon === 'string') &&
48
+ (publishedAt === undefined || typeof publishedAt === 'string') &&
49
+ (modifiedAt === undefined || typeof modifiedAt === 'string'));
50
+ }
51
+ function isWorkerErrorPayload(value) {
52
+ if (!isObject(value))
53
+ return false;
54
+ const { details, message, name, statusCode, url } = value;
55
+ return (typeof name === 'string' &&
56
+ typeof message === 'string' &&
57
+ typeof url === 'string' &&
58
+ (statusCode === undefined || typeof statusCode === 'number') &&
59
+ (details === undefined || isObject(details)));
60
+ }
61
+ function isWorkerResponse(raw) {
62
+ if (!isObject(raw))
63
+ return false;
64
+ if (typeof raw['id'] !== 'string')
65
+ return false;
66
+ if (raw['type'] === 'result') {
67
+ return isWorkerResultPayload(raw['result']);
68
+ }
69
+ if (raw['type'] === 'error') {
70
+ return isWorkerErrorPayload(raw['error']);
71
+ }
72
+ if (raw['type'] === 'cancelled') {
73
+ return true;
74
+ }
75
+ return false;
76
+ }
77
+ function createTaskContext() {
78
+ const runWithStore = AsyncLocalStorage.snapshot();
79
+ const asyncResource = new AsyncResource('fetch-url-mcp.transform.task');
80
+ let disposed = false;
81
+ return {
82
+ run: (fn) => {
83
+ runWithStore(() => {
84
+ asyncResource.runInAsyncScope(fn);
85
+ });
86
+ },
87
+ dispose: () => {
88
+ if (disposed)
89
+ return;
90
+ disposed = true;
91
+ asyncResource.emitDestroy();
92
+ },
93
+ };
94
+ }
95
+ function ensureTightBuffer(buffer) {
96
+ if (buffer.byteOffset === 0 &&
97
+ buffer.byteLength === buffer.buffer.byteLength) {
98
+ return buffer;
99
+ }
100
+ return Buffer.from(buffer);
101
+ }
102
+ function buildWorkerDispatchPayload(task, supportsTransferList) {
103
+ const message = {
104
+ type: 'transform',
105
+ id: task.id,
106
+ url: task.url,
107
+ includeMetadata: task.includeMetadata,
108
+ ...(task.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
109
+ ...(task.inputTruncated ? { inputTruncated: true } : {}),
110
+ };
111
+ if (!task.htmlBuffer) {
112
+ message.html = task.html;
113
+ return { message };
114
+ }
115
+ const htmlBuffer = ensureTightBuffer(task.htmlBuffer);
116
+ if (!supportsTransferList) {
117
+ message.htmlBuffer = htmlBuffer;
118
+ if (task.encoding)
119
+ message.encoding = task.encoding;
120
+ return { message };
121
+ }
122
+ const transferableHtmlBuffer = Uint8Array.from(htmlBuffer);
123
+ message.htmlBuffer = transferableHtmlBuffer;
124
+ if (task.encoding)
125
+ message.encoding = task.encoding;
126
+ const backingBuffer = transferableHtmlBuffer.buffer;
127
+ if (isSharedArrayBuffer(backingBuffer))
128
+ return { message };
129
+ return { message, transferList: [backingBuffer] };
130
+ }
131
+ // ---------------------------------------------------------------------------
132
+ // Pool sizing & constants
133
+ // ---------------------------------------------------------------------------
134
+ /**
135
+ * Worker Pool Sizing Configuration
136
+ *
137
+ * Default: min(4, floor(availableParallelism() / 2)), constrained to [2, N]
138
+ *
139
+ * Tuning Guidance:
140
+ * - **Default behavior**: Appropriate for most deployments. Uses half of available
141
+ * CPU threads (capped at 4) to balance throughput with system resource availability.
142
+ *
143
+ * - **CPU-limited containers**: If running in a container with strict CPU limits
144
+ * (e.g., Docker with --cpus=2), the default may over-subscribe. Consider setting
145
+ * maxWorkerScale to match the container's CPU limit.
146
+ *
147
+ * - **High-concurrency workloads**: For dedicated servers handling many concurrent
148
+ * fetch requests, increasing maxWorkerScale to (availableParallelism() + 2) may
149
+ * improve throughput by overlapping I/O wait with computation.
150
+ *
151
+ * - **Memory-constrained environments**: Each worker allocates ~50-100MB for DOM
152
+ * parsing. If memory is limited, reduce maxWorkerScale to (availableParallelism() / 2)
153
+ * or lower to prevent OOM errors.
154
+ *
155
+ * - **Shared hosting**: On shared systems where CPU is contested, reducing the pool
156
+ * size prevents starving other processes. Consider maxWorkerScale = 2 or using
157
+ * process-based workers (TRANSFORM_WORKER_MODE=process) for better isolation.
158
+ *
159
+ * Configuration:
160
+ * - TRANSFORM_MAX_WORKER_SCALE env var (default: availableParallelism())
161
+ * - TRANSFORM_WORKER_MODE env var: 'threads' (default) or 'process'
162
+ *
163
+ * See config.ts for full worker configuration options.
164
+ */
165
+ const POOL_MIN_WORKERS = Math.max(2, Math.min(4, Math.floor(availableParallelism() / 2)));
166
+ const POOL_MAX_WORKERS = config.transform.maxWorkerScale;
167
+ const POOL_SCALE_THRESHOLD = 0.5;
168
+ const WORKER_NAME_PREFIX = 'fetch-url-mcp-transform';
169
+ const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
170
+ const TRANSFORM_CHILD_PATH = fileURLToPath(new URL('./workers/transform-child.js', import.meta.url));
171
+ // ---------------------------------------------------------------------------
172
+ // Worker host spawners
173
+ // ---------------------------------------------------------------------------
174
+ function createThreadWorkerHost(_workerIndex, name) {
175
+ const resourceLimits = config.transform.workerResourceLimits;
176
+ const worker = new Worker(new URL('./workers/transform-worker.js', import.meta.url), {
177
+ name,
178
+ ...(resourceLimits ? { resourceLimits } : {}),
179
+ });
180
+ return {
181
+ kind: 'thread',
182
+ supportsTransferList: true,
183
+ threadId: worker.threadId,
184
+ postMessage: (message, transferList) => {
185
+ worker.postMessage(message, transferList);
186
+ },
187
+ terminate: async () => {
188
+ await worker.terminate();
189
+ },
190
+ unref: () => {
191
+ worker.unref();
192
+ },
193
+ onMessage: (handler) => {
194
+ worker.on('message', handler);
195
+ },
196
+ onError: (handler) => {
197
+ worker.on('error', handler);
198
+ worker.on('messageerror', handler);
199
+ },
200
+ onExit: (handler) => {
201
+ worker.on('exit', (code) => {
202
+ handler(code, null);
203
+ });
204
+ },
205
+ };
206
+ }
207
+ function createProcessWorkerHost(workerIndex, name) {
208
+ const child = fork(TRANSFORM_CHILD_PATH, [], {
209
+ stdio: ['ignore', 'ignore', 'ignore', 'ipc'],
210
+ serialization: 'advanced',
211
+ env: {
212
+ ...process.env,
213
+ FETCH_URL_MCP_WORKER_INDEX: String(workerIndex),
214
+ FETCH_URL_MCP_WORKER_NAME: name,
215
+ },
216
+ });
217
+ if (child.pid === undefined) {
218
+ throw new Error('Failed to fork process');
219
+ }
220
+ return {
221
+ kind: 'process',
222
+ supportsTransferList: false,
223
+ pid: child.pid,
224
+ postMessage: (message) => {
225
+ if (!child.connected) {
226
+ throw new Error('Transform worker IPC channel is closed');
227
+ }
228
+ child.send(message);
229
+ },
230
+ terminate: () => new Promise((resolve) => {
231
+ if (child.exitCode !== null || child.killed) {
232
+ resolve();
233
+ return;
234
+ }
235
+ child.once('exit', () => {
236
+ resolve();
237
+ });
238
+ try {
239
+ child.kill();
240
+ }
241
+ catch {
242
+ resolve();
243
+ }
244
+ }),
245
+ unref: () => {
246
+ child.unref();
247
+ },
248
+ onMessage: (handler) => {
249
+ child.on('message', handler);
250
+ },
251
+ onError: (handler) => {
252
+ child.on('error', handler);
253
+ },
254
+ onExit: (handler) => {
255
+ child.on('exit', (code, signal) => {
256
+ handler(code, signal);
257
+ });
258
+ },
259
+ };
260
+ }
261
+ // ---------------------------------------------------------------------------
262
+ // WorkerPool
263
+ // ---------------------------------------------------------------------------
264
+ class WorkerPool {
265
+ static CLOSED_MESSAGE = 'Transform worker pool closed';
266
+ workers = [];
267
+ capacity;
268
+ minCapacity = POOL_MIN_WORKERS;
269
+ maxCapacity = POOL_MAX_WORKERS;
270
+ queue = [];
271
+ queueHead = 0;
272
+ inflight = new Map();
273
+ cancelAcks = new Map();
274
+ timeoutMs;
275
+ queueMax;
276
+ spawnWorkerImpl;
277
+ closed = false;
278
+ taskIdSeq = 0;
279
+ constructor(size, timeoutMs, spawnWorker) {
280
+ if (size === 0) {
281
+ this.capacity = 0;
282
+ }
283
+ else {
284
+ this.capacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
285
+ }
286
+ this.timeoutMs = timeoutMs;
287
+ this.queueMax = this.maxCapacity * 32;
288
+ this.spawnWorkerImpl = spawnWorker;
289
+ }
290
+ async transform(htmlOrBuffer, url, options) {
291
+ this.ensureOpen();
292
+ if (options.signal?.aborted)
293
+ throw createAbortError(url, 'transform:enqueue');
294
+ if (this.getQueueDepth() >= this.queueMax) {
295
+ throw new FetchError('Transform worker queue is full', url, 503, {
296
+ reason: 'queue_full',
297
+ stage: 'transform:enqueue',
298
+ });
299
+ }
300
+ return new Promise((resolve, reject) => {
301
+ const task = this.createPendingTask(htmlOrBuffer, url, options, resolve, reject);
302
+ this.queue.push(task);
303
+ this.drainQueue();
304
+ });
305
+ }
306
+ getQueueDepth() {
307
+ const depth = this.queue.length - this.queueHead;
308
+ return depth > 0 ? depth : 0;
309
+ }
310
+ getActiveWorkers() {
311
+ return this.workers.filter((s) => s?.busy).length;
312
+ }
313
+ getCapacity() {
314
+ return this.capacity;
315
+ }
316
+ resize(size) {
317
+ const newCapacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
318
+ if (newCapacity === this.capacity)
319
+ return;
320
+ this.capacity = newCapacity;
321
+ this.drainQueue();
322
+ }
323
+ async close() {
324
+ if (this.closed)
325
+ return;
326
+ this.closed = true;
327
+ const terminations = this.workers
328
+ .map((slot) => slot?.host.terminate())
329
+ .filter((p) => p !== undefined);
330
+ this.workers.fill(undefined);
331
+ this.workers.length = 0;
332
+ for (const id of Array.from(this.inflight.keys())) {
333
+ const inflight = this.takeInflight(id);
334
+ if (!inflight)
335
+ continue;
336
+ this.finalizeTask(inflight.context, () => {
337
+ inflight.reject(new Error(WorkerPool.CLOSED_MESSAGE));
338
+ });
339
+ }
340
+ for (let i = this.queueHead; i < this.queue.length; i += 1) {
341
+ const task = this.queue[i];
342
+ if (!task)
343
+ continue;
344
+ this.clearAbortListener(task.signal, task.abortListener);
345
+ this.finalizeTask(task.context, () => {
346
+ task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
347
+ });
348
+ }
349
+ this.queue.length = 0;
350
+ this.queueHead = 0;
351
+ await Promise.allSettled(terminations);
352
+ }
353
+ ensureOpen() {
354
+ if (this.closed)
355
+ throw new Error(WorkerPool.CLOSED_MESSAGE);
356
+ }
357
+ createPendingTask(htmlOrBuffer, url, options, resolve, reject) {
358
+ const id = (this.taskIdSeq++).toString(36);
359
+ // Preserve request context for resolve/reject even when callbacks fire
360
+ // from worker thread events.
361
+ const context = createTaskContext();
362
+ let abortListener;
363
+ if (options.signal) {
364
+ abortListener = () => {
365
+ this.onAbortSignal(id, url, context, reject);
366
+ };
367
+ options.signal.addEventListener('abort', abortListener, { once: true });
368
+ }
369
+ const task = {
370
+ id,
371
+ url,
372
+ includeMetadata: options.includeMetadata,
373
+ ...(options.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
374
+ ...(options.inputTruncated ? { inputTruncated: true } : {}),
375
+ signal: options.signal,
376
+ abortListener,
377
+ context,
378
+ resolve,
379
+ reject,
380
+ };
381
+ if (typeof htmlOrBuffer === 'string') {
382
+ task.html = htmlOrBuffer;
383
+ }
384
+ else {
385
+ task.htmlBuffer = htmlOrBuffer;
386
+ if (options.encoding) {
387
+ task.encoding = options.encoding;
388
+ }
389
+ }
390
+ return task;
391
+ }
392
+ onAbortSignal(id, url, context, reject) {
393
+ if (this.closed) {
394
+ this.finalizeTask(context, () => {
395
+ reject(new Error(WorkerPool.CLOSED_MESSAGE));
396
+ });
397
+ return;
398
+ }
399
+ const inflight = this.inflight.get(id);
400
+ if (inflight) {
401
+ void this.abortInflight(id, url, inflight.workerIndex);
402
+ return;
403
+ }
404
+ const queuedIndex = this.findQueuedIndex(id);
405
+ if (queuedIndex !== null) {
406
+ const task = this.queue[queuedIndex];
407
+ if (task)
408
+ this.clearAbortListener(task.signal, task.abortListener);
409
+ this.queue.splice(queuedIndex, 1);
410
+ if (task) {
411
+ this.finalizeTask(task.context, () => {
412
+ task.reject(createAbortError(url, 'transform:queued-abort'));
413
+ });
414
+ }
415
+ else {
416
+ this.finalizeTask(context, () => {
417
+ reject(createAbortError(url, 'transform:queued-abort'));
418
+ });
419
+ }
420
+ this.maybeCompactQueue();
421
+ }
422
+ }
423
+ resolveCancelAck(id) {
424
+ const pending = this.cancelAcks.get(id);
425
+ if (!pending)
426
+ return;
427
+ pending.timeout.cancel();
428
+ pending.resolve();
429
+ }
430
+ waitForCancelAck(id) {
431
+ const existing = this.cancelAcks.get(id);
432
+ if (existing) {
433
+ return existing.promise;
434
+ }
435
+ let resolve = () => { };
436
+ const timeout = createUnrefTimeout(config.transform.cancelAckTimeoutMs, undefined);
437
+ const racePromise = new Promise((finish) => {
438
+ resolve = finish;
439
+ });
440
+ const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
441
+ this.cancelAcks.delete(id);
442
+ timeout.cancel();
443
+ });
444
+ this.cancelAcks.set(id, { promise, resolve, timeout });
445
+ return promise;
446
+ }
447
+ async abortInflight(id, url, workerIndex) {
448
+ const slot = this.workers[workerIndex];
449
+ const inflight = this.inflight.get(id);
450
+ if (inflight) {
451
+ inflight.cancelPending = true;
452
+ }
453
+ if (slot) {
454
+ try {
455
+ slot.host.postMessage({ type: 'cancel', id });
456
+ }
457
+ catch {
458
+ // Worker may be unavailable; failure is acceptable during abort
459
+ }
460
+ }
461
+ await this.waitForCancelAck(id);
462
+ const taken = this.failTask(id, createAbortError(url, 'transform:signal-abort'));
463
+ if (taken && slot)
464
+ this.restartWorker(workerIndex, slot);
465
+ }
466
+ clearAbortListener(signal, listener) {
467
+ if (!signal || !listener)
468
+ return;
469
+ try {
470
+ signal.removeEventListener('abort', listener);
471
+ }
472
+ catch {
473
+ // Defensive: removeEventListener should not throw, but handle edge cases
474
+ }
475
+ }
476
+ spawnWorker(workerIndex) {
477
+ const name = `${WORKER_NAME_PREFIX}-${workerIndex + 1}`;
478
+ const host = this.spawnWorkerImpl(workerIndex, name);
479
+ host.unref();
480
+ host.onMessage((raw) => {
481
+ this.onWorkerMessage(workerIndex, raw);
482
+ });
483
+ host.onError((error) => {
484
+ this.onWorkerBroken(workerIndex, `Transform worker error: ${getErrorMessage(error)}`);
485
+ });
486
+ host.onExit((code, signal) => {
487
+ const suffix = signal ? `signal ${signal}` : `code ${code ?? 'unknown'}`;
488
+ this.onWorkerBroken(workerIndex, `Transform worker exited (${suffix})`);
489
+ });
490
+ return { host, busy: false, currentTaskId: null, name };
491
+ }
492
+ onWorkerBroken(workerIndex, message) {
493
+ if (this.closed)
494
+ return;
495
+ const slot = this.workers[workerIndex];
496
+ if (!slot)
497
+ return;
498
+ logWarn('Transform worker unavailable; restarting', {
499
+ reason: message,
500
+ workerIndex,
501
+ workerKind: slot.host.kind,
502
+ workerName: slot.name,
503
+ ...(slot.host.kind === 'process'
504
+ ? { pid: slot.host.pid }
505
+ : { threadId: slot.host.threadId }),
506
+ });
507
+ if (slot.busy && slot.currentTaskId) {
508
+ this.failTask(slot.currentTaskId, new FetchError(message, '', 503, { reason: 'worker_exit' }));
509
+ }
510
+ this.restartWorker(workerIndex, slot);
511
+ }
512
+ restartWorker(workerIndex, slot) {
513
+ if (this.closed)
514
+ return;
515
+ const target = slot ?? this.workers[workerIndex];
516
+ if (target) {
517
+ target.host.terminate().catch(() => undefined);
518
+ }
519
+ this.workers[workerIndex] = this.spawnWorker(workerIndex);
520
+ this.drainQueue();
521
+ }
522
+ onWorkerMessage(workerIndex, raw) {
523
+ if (!isWorkerResponse(raw))
524
+ return;
525
+ const message = raw;
526
+ if (message.type === 'cancelled') {
527
+ this.resolveCancelAck(message.id);
528
+ return;
529
+ }
530
+ const inflightPeek = this.inflight.get(message.id);
531
+ if (inflightPeek?.cancelPending) {
532
+ this.resolveCancelAck(message.id);
533
+ return;
534
+ }
535
+ const inflight = this.takeInflight(message.id);
536
+ if (!inflight)
537
+ return;
538
+ this.markIdle(workerIndex);
539
+ if (message.type === 'result') {
540
+ this.finalizeTask(inflight.context, () => {
541
+ inflight.resolve({
542
+ markdown: message.result.markdown,
543
+ truncated: message.result.truncated,
544
+ title: message.result.title,
545
+ ...(message.result.metadata
546
+ ? { metadata: message.result.metadata }
547
+ : {}),
548
+ });
549
+ });
550
+ }
551
+ else {
552
+ const err = message.error;
553
+ if (err.name === 'FetchError') {
554
+ this.finalizeTask(inflight.context, () => {
555
+ inflight.reject(new FetchError(err.message, err.url, err.statusCode, err.details ?? {}));
556
+ });
557
+ }
558
+ else {
559
+ this.finalizeTask(inflight.context, () => {
560
+ inflight.reject(new Error(err.message));
561
+ });
562
+ }
563
+ }
564
+ this.drainQueue();
565
+ }
566
+ takeInflight(id) {
567
+ const inflight = this.inflight.get(id);
568
+ if (!inflight)
569
+ return null;
570
+ inflight.timeout.cancel();
571
+ this.clearAbortListener(inflight.signal, inflight.abortListener);
572
+ this.inflight.delete(id);
573
+ return inflight;
574
+ }
575
+ markIdle(workerIndex) {
576
+ const slot = this.workers[workerIndex];
577
+ if (!slot)
578
+ return;
579
+ slot.busy = false;
580
+ slot.currentTaskId = null;
581
+ }
582
+ failTask(id, error) {
583
+ const inflight = this.takeInflight(id);
584
+ if (!inflight)
585
+ return false;
586
+ this.finalizeTask(inflight.context, () => {
587
+ inflight.reject(error);
588
+ });
589
+ this.markIdle(inflight.workerIndex);
590
+ return true;
591
+ }
592
+ maybeScaleUp() {
593
+ if (this.getQueueDepth() > this.capacity * POOL_SCALE_THRESHOLD &&
594
+ this.capacity < this.maxCapacity) {
595
+ this.capacity += 1;
596
+ }
597
+ }
598
+ drainQueue() {
599
+ if (this.closed || this.getQueueDepth() === 0)
600
+ return;
601
+ this.maybeScaleUp();
602
+ for (let i = 0; i < this.workers.length; i += 1) {
603
+ const slot = this.workers[i];
604
+ if (slot && !slot.busy) {
605
+ this.dispatchFromQueue(i, slot);
606
+ if (this.getQueueDepth() === 0)
607
+ return;
608
+ }
609
+ }
610
+ if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
611
+ const workerIndex = this.workers.length;
612
+ const slot = this.spawnWorker(workerIndex);
613
+ this.workers.push(slot);
614
+ this.dispatchFromQueue(workerIndex, slot);
615
+ if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
616
+ setImmediate(() => {
617
+ this.drainQueue();
618
+ });
619
+ }
620
+ }
621
+ }
622
+ takeNextQueuedTask() {
623
+ while (this.queueHead < this.queue.length) {
624
+ const task = this.queue[this.queueHead];
625
+ this.queueHead += 1;
626
+ if (task) {
627
+ this.maybeCompactQueue();
628
+ return task;
629
+ }
630
+ }
631
+ this.maybeCompactQueue();
632
+ return null;
633
+ }
634
+ dispatchFromQueue(workerIndex, slot) {
635
+ const task = this.takeNextQueuedTask();
636
+ if (!task)
637
+ return;
638
+ if (this.closed) {
639
+ this.clearAbortListener(task.signal, task.abortListener);
640
+ this.finalizeTask(task.context, () => {
641
+ task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
642
+ });
643
+ return;
644
+ }
645
+ if (task.signal?.aborted) {
646
+ this.clearAbortListener(task.signal, task.abortListener);
647
+ this.finalizeTask(task.context, () => {
648
+ task.reject(createAbortError(task.url, 'transform:dispatch'));
649
+ });
650
+ return;
651
+ }
652
+ slot.busy = true;
653
+ slot.currentTaskId = task.id;
654
+ const timeout = createUnrefTimeout(this.timeoutMs, null);
655
+ void timeout.promise
656
+ .then(() => {
657
+ try {
658
+ slot.host.postMessage({ type: 'cancel', id: task.id });
659
+ }
660
+ catch {
661
+ // Worker may be unavailable; proceed with timeout handling
662
+ }
663
+ const inflight = this.takeInflight(task.id);
664
+ if (!inflight)
665
+ return;
666
+ this.finalizeTask(inflight.context, () => {
667
+ inflight.reject(new FetchError('Request timeout', task.url, 504, {
668
+ reason: 'timeout',
669
+ stage: 'transform:worker-timeout',
670
+ }));
671
+ });
672
+ this.restartWorker(workerIndex, slot);
673
+ })
674
+ .catch((error) => {
675
+ this.failTask(task.id, error);
676
+ });
677
+ this.inflight.set(task.id, {
678
+ resolve: task.resolve,
679
+ reject: task.reject,
680
+ timeout,
681
+ signal: task.signal,
682
+ abortListener: task.abortListener,
683
+ workerIndex,
684
+ context: task.context,
685
+ cancelPending: false,
686
+ });
687
+ try {
688
+ const { message, transferList } = buildWorkerDispatchPayload(task, slot.host.supportsTransferList);
689
+ slot.host.postMessage(message, transferList);
690
+ }
691
+ catch (error) {
692
+ timeout.cancel();
693
+ this.clearAbortListener(task.signal, task.abortListener);
694
+ this.inflight.delete(task.id);
695
+ this.markIdle(workerIndex);
696
+ this.finalizeTask(task.context, () => {
697
+ task.reject(error instanceof Error
698
+ ? error
699
+ : new Error('Failed to dispatch transform worker message'));
700
+ });
701
+ this.restartWorker(workerIndex, slot);
702
+ }
703
+ }
704
+ finalizeTask(context, fn) {
705
+ try {
706
+ context.run(fn);
707
+ }
708
+ finally {
709
+ context.dispose();
710
+ }
711
+ }
712
+ findQueuedIndex(id) {
713
+ for (let i = this.queueHead; i < this.queue.length; i += 1) {
714
+ const task = this.queue[i];
715
+ if (task?.id === id)
716
+ return i;
717
+ }
718
+ return null;
719
+ }
720
+ maybeCompactQueue() {
721
+ if (this.queueHead === 0)
722
+ return;
723
+ if (this.queueHead >= this.queue.length ||
724
+ (this.queueHead > 1024 && this.queueHead > this.queue.length / 2)) {
725
+ this.queue.splice(0, this.queueHead);
726
+ this.queueHead = 0;
727
+ }
728
+ }
729
+ }
730
+ // ---------------------------------------------------------------------------
731
+ // Pool singleton management
732
+ // ---------------------------------------------------------------------------
733
+ let workerPool = null;
734
+ function resolveWorkerSpawner() {
735
+ return config.transform.workerMode === 'process'
736
+ ? createProcessWorkerHost
737
+ : createThreadWorkerHost;
738
+ }
739
+ export function getOrCreateWorkerPool() {
740
+ const size = config.transform.maxWorkerScale === 0 ? 0 : POOL_MIN_WORKERS;
741
+ workerPool ??= new WorkerPool(size, DEFAULT_TIMEOUT_MS, resolveWorkerSpawner());
742
+ return workerPool;
743
+ }
744
+ export function getWorkerPoolStats() {
745
+ if (!workerPool)
746
+ return null;
747
+ return {
748
+ queueDepth: workerPool.getQueueDepth(),
749
+ activeWorkers: workerPool.getActiveWorkers(),
750
+ capacity: workerPool.getCapacity(),
751
+ };
752
+ }
753
+ export async function shutdownWorkerPool() {
754
+ if (!workerPool)
755
+ return;
756
+ await workerPool.close();
757
+ workerPool = null;
758
+ }
759
+ //# sourceMappingURL=worker-pool.js.map