@j0hanz/fetch-url-mcp 1.12.7 → 1.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/dist/http/auth.d.ts +2 -2
  2. package/dist/http/auth.d.ts.map +1 -1
  3. package/dist/http/auth.js +4 -5
  4. package/dist/http/index.d.ts +6 -0
  5. package/dist/http/index.d.ts.map +1 -0
  6. package/dist/http/index.js +5 -0
  7. package/dist/http/native.d.ts +73 -0
  8. package/dist/http/native.d.ts.map +1 -1
  9. package/dist/http/native.js +554 -10
  10. package/dist/http/rate-limit.d.ts +1 -1
  11. package/dist/http/rate-limit.d.ts.map +1 -1
  12. package/dist/http/rate-limit.js +3 -4
  13. package/dist/index.d.ts +17 -0
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +67 -6
  16. package/dist/lib/config.js +2 -2
  17. package/dist/lib/core.d.ts +56 -4
  18. package/dist/lib/core.d.ts.map +1 -1
  19. package/dist/lib/core.js +155 -4
  20. package/dist/lib/error/classes.d.ts +19 -0
  21. package/dist/lib/error/classes.d.ts.map +1 -0
  22. package/dist/lib/error/classes.js +107 -0
  23. package/dist/lib/error/classify.d.ts +4 -0
  24. package/dist/lib/error/classify.d.ts.map +1 -0
  25. package/dist/lib/error/classify.js +154 -0
  26. package/dist/lib/error/codes.d.ts +23 -0
  27. package/dist/lib/error/codes.d.ts.map +1 -0
  28. package/dist/lib/error/codes.js +22 -0
  29. package/dist/lib/error/index.d.ts +6 -0
  30. package/dist/lib/error/index.d.ts.map +1 -0
  31. package/dist/lib/error/index.js +5 -0
  32. package/dist/lib/{error-messages.d.ts → error/messages.d.ts} +2 -2
  33. package/dist/lib/error/messages.d.ts.map +1 -0
  34. package/dist/lib/{error-messages.js → error/messages.js} +2 -2
  35. package/dist/lib/{tool-errors.d.ts → error/payload.d.ts} +7 -13
  36. package/dist/lib/error/payload.d.ts.map +1 -0
  37. package/dist/lib/error/payload.js +108 -0
  38. package/dist/lib/mcp-interop.d.ts.map +1 -1
  39. package/dist/lib/mcp-interop.js +4 -6
  40. package/dist/lib/net/http.d.ts.map +1 -0
  41. package/dist/lib/{http.js → net/http.js} +4 -7
  42. package/dist/lib/net/index.d.ts +4 -0
  43. package/dist/lib/net/index.d.ts.map +1 -0
  44. package/dist/lib/net/index.js +3 -0
  45. package/dist/lib/{fetch-pipeline.d.ts → net/pipeline.d.ts} +3 -3
  46. package/dist/lib/net/pipeline.d.ts.map +1 -0
  47. package/dist/lib/{fetch-pipeline.js → net/pipeline.js} +3 -5
  48. package/dist/lib/{url.d.ts → net/url.d.ts} +1 -1
  49. package/dist/lib/net/url.d.ts.map +1 -0
  50. package/dist/lib/{url.js → net/url.js} +3 -5
  51. package/dist/lib/utils.d.ts +2 -18
  52. package/dist/lib/utils.d.ts.map +1 -1
  53. package/dist/lib/utils.js +29 -104
  54. package/dist/resources/index.d.ts.map +1 -1
  55. package/dist/resources/index.js +8 -5
  56. package/dist/schemas.d.ts +1 -1
  57. package/dist/server.d.ts.map +1 -1
  58. package/dist/server.js +7 -9
  59. package/dist/tasks/index.d.ts +2 -0
  60. package/dist/tasks/index.d.ts.map +1 -0
  61. package/dist/tasks/index.js +1 -0
  62. package/dist/tasks/manager.d.ts +123 -1
  63. package/dist/tasks/manager.d.ts.map +1 -1
  64. package/dist/tasks/manager.js +745 -10
  65. package/dist/tools/{fetch-url.d.ts → index.d.ts} +4 -5
  66. package/dist/tools/index.d.ts.map +1 -0
  67. package/dist/tools/{fetch-url.js → index.js} +6 -8
  68. package/dist/transform/index.d.ts +279 -0
  69. package/dist/transform/index.d.ts.map +1 -0
  70. package/dist/transform/index.js +5234 -0
  71. package/package.json +2 -2
  72. package/dist/cli.d.ts +0 -19
  73. package/dist/cli.d.ts.map +0 -1
  74. package/dist/cli.js +0 -65
  75. package/dist/http/health.d.ts +0 -8
  76. package/dist/http/health.d.ts.map +0 -1
  77. package/dist/http/health.js +0 -152
  78. package/dist/http/helpers.d.ts +0 -68
  79. package/dist/http/helpers.d.ts.map +0 -1
  80. package/dist/http/helpers.js +0 -402
  81. package/dist/lib/error-codes.d.ts +0 -13
  82. package/dist/lib/error-codes.d.ts.map +0 -1
  83. package/dist/lib/error-codes.js +0 -12
  84. package/dist/lib/error-messages.d.ts.map +0 -1
  85. package/dist/lib/fetch-pipeline.d.ts.map +0 -1
  86. package/dist/lib/http.d.ts.map +0 -1
  87. package/dist/lib/logger-names.d.ts +0 -16
  88. package/dist/lib/logger-names.d.ts.map +0 -1
  89. package/dist/lib/logger-names.js +0 -15
  90. package/dist/lib/session.d.ts +0 -44
  91. package/dist/lib/session.d.ts.map +0 -1
  92. package/dist/lib/session.js +0 -137
  93. package/dist/lib/tool-errors.d.ts.map +0 -1
  94. package/dist/lib/tool-errors.js +0 -253
  95. package/dist/lib/url.d.ts.map +0 -1
  96. package/dist/lib/zod.d.ts +0 -3
  97. package/dist/lib/zod.d.ts.map +0 -1
  98. package/dist/lib/zod.js +0 -27
  99. package/dist/tasks/call-contract.d.ts +0 -25
  100. package/dist/tasks/call-contract.d.ts.map +0 -1
  101. package/dist/tasks/call-contract.js +0 -59
  102. package/dist/tasks/execution.d.ts +0 -16
  103. package/dist/tasks/execution.d.ts.map +0 -1
  104. package/dist/tasks/execution.js +0 -241
  105. package/dist/tasks/handlers.d.ts +0 -11
  106. package/dist/tasks/handlers.d.ts.map +0 -1
  107. package/dist/tasks/handlers.js +0 -157
  108. package/dist/tasks/owner.d.ts +0 -43
  109. package/dist/tasks/owner.d.ts.map +0 -1
  110. package/dist/tasks/owner.js +0 -144
  111. package/dist/tasks/registry.d.ts +0 -20
  112. package/dist/tasks/registry.d.ts.map +0 -1
  113. package/dist/tasks/registry.js +0 -40
  114. package/dist/tasks/waiters.d.ts +0 -27
  115. package/dist/tasks/waiters.d.ts.map +0 -1
  116. package/dist/tasks/waiters.js +0 -114
  117. package/dist/tools/fetch-url.d.ts.map +0 -1
  118. package/dist/transform/dom-prep.d.ts +0 -16
  119. package/dist/transform/dom-prep.d.ts.map +0 -1
  120. package/dist/transform/dom-prep.js +0 -1287
  121. package/dist/transform/html-translators.d.ts +0 -5
  122. package/dist/transform/html-translators.d.ts.map +0 -1
  123. package/dist/transform/html-translators.js +0 -697
  124. package/dist/transform/markdown-cleanup.d.ts +0 -10
  125. package/dist/transform/markdown-cleanup.d.ts.map +0 -1
  126. package/dist/transform/markdown-cleanup.js +0 -542
  127. package/dist/transform/metadata.d.ts +0 -18
  128. package/dist/transform/metadata.d.ts.map +0 -1
  129. package/dist/transform/metadata.js +0 -462
  130. package/dist/transform/next-flight.d.ts +0 -2
  131. package/dist/transform/next-flight.d.ts.map +0 -1
  132. package/dist/transform/next-flight.js +0 -374
  133. package/dist/transform/shared.d.ts +0 -8
  134. package/dist/transform/shared.d.ts.map +0 -1
  135. package/dist/transform/shared.js +0 -137
  136. package/dist/transform/transform.d.ts +0 -38
  137. package/dist/transform/transform.d.ts.map +0 -1
  138. package/dist/transform/transform.js +0 -1042
  139. package/dist/transform/types.d.ts +0 -124
  140. package/dist/transform/types.d.ts.map +0 -1
  141. package/dist/transform/types.js +0 -5
  142. package/dist/transform/worker-pool.d.ts +0 -76
  143. package/dist/transform/worker-pool.d.ts.map +0 -1
  144. package/dist/transform/worker-pool.js +0 -725
  145. /package/dist/lib/{http.d.ts → net/http.d.ts} +0 -0
@@ -1,725 +0,0 @@
1
- import { AsyncLocalStorage } from 'node:async_hooks';
2
- import { availableParallelism } from 'node:os';
3
- import process from 'node:process';
4
- import { isSharedArrayBuffer } from 'node:util/types';
5
- import { isMainThread, isMarkedAsUntransferable, parentPort, Worker, } from 'node:worker_threads';
6
- import { z } from 'zod';
7
- import { config, logDebug, logInfo, logWarn } from '../lib/core.js';
8
- import { SystemErrors } from '../lib/error-codes.js';
9
- import { Loggers } from '../lib/logger-names.js';
10
- import { createAbortError, createUnrefTimeout, FetchError, getErrorMessage, } from '../lib/utils.js';
11
- import { formatZodError } from '../lib/zod.js';
12
- import { extractedMetadataSchema } from '../schemas.js';
13
- import { createTransformMessageHandler } from './shared.js';
14
- import { transformHtmlToMarkdownInProcess } from './transform.js';
15
- // Worker message validation
16
- const workerResultPayloadSchema = z.strictObject({
17
- markdown: z.string(),
18
- title: z.string().optional(),
19
- metadata: extractedMetadataSchema.optional(),
20
- truncated: z.boolean(),
21
- });
22
- const workerErrorPayloadSchema = z.strictObject({
23
- name: z.string(),
24
- message: z.string(),
25
- url: z.string(),
26
- statusCode: z.number().int().optional(),
27
- details: z.record(z.string(), z.unknown()).optional(),
28
- });
29
- const workerResponseSchema = z.discriminatedUnion('type', [
30
- z.strictObject({
31
- type: z.literal('result'),
32
- id: z.string(),
33
- result: workerResultPayloadSchema,
34
- }),
35
- z.strictObject({
36
- type: z.literal('error'),
37
- id: z.string(),
38
- error: workerErrorPayloadSchema,
39
- }),
40
- z.strictObject({
41
- type: z.literal('cancelled'),
42
- id: z.string(),
43
- }),
44
- ]);
45
- function createTaskContext() {
46
- const runWithStore = AsyncLocalStorage.snapshot();
47
- return {
48
- run: (fn) => {
49
- runWithStore(fn);
50
- },
51
- };
52
- }
53
- function ensureTightBuffer(buffer) {
54
- if (buffer.byteOffset === 0 &&
55
- buffer.byteLength === buffer.buffer.byteLength) {
56
- return buffer;
57
- }
58
- const copy = new Uint8Array(buffer);
59
- return copy;
60
- }
61
- function getTransferableBuffer(buffer) {
62
- const backingBuffer = buffer.buffer;
63
- if (isSharedArrayBuffer(backingBuffer))
64
- return null;
65
- if (!(backingBuffer instanceof ArrayBuffer))
66
- return null;
67
- return isMarkedAsUntransferable(backingBuffer) ? null : backingBuffer;
68
- }
69
- function buildWorkerDispatchPayload(task) {
70
- const message = {
71
- type: 'transform',
72
- id: task.id,
73
- url: task.url,
74
- includeMetadataFooter: task.includeMetadataFooter,
75
- ...(task.inputTruncated ? { inputTruncated: true } : {}),
76
- };
77
- if (!task.htmlBuffer) {
78
- message.html = task.html;
79
- return { message };
80
- }
81
- const htmlBuffer = ensureTightBuffer(task.htmlBuffer);
82
- message.htmlBuffer = htmlBuffer;
83
- if (task.encoding)
84
- message.encoding = task.encoding;
85
- const transferableBuffer = getTransferableBuffer(htmlBuffer);
86
- return transferableBuffer
87
- ? { message, transferList: [transferableBuffer] }
88
- : { message };
89
- }
90
- // Pool sizing & constants
91
- // Core tuning: ~half of available CPUs as baseline, capped by config limits.
92
- const POOL_MIN_WORKERS = Math.max(2, Math.min(4, Math.floor(availableParallelism() / 2)));
93
- const POOL_MAX_WORKERS = config.transform.maxWorkerScale;
94
- const POOL_SCALE_THRESHOLD = 0.5;
95
- const WORKER_NAME_PREFIX = 'fetch-url-mcp-transform';
96
- const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
97
- const TRANSFORM_WORKER_PATH = new URL(import.meta.url);
98
- const COMPACTION_HEAD_THRESHOLD = 1024;
99
- const QUEUE_CAPACITY_MULTIPLIER = 4;
100
- const HTTP_SERVICE_UNAVAILABLE = 503;
101
- const HTTP_GATEWAY_TIMEOUT = 504;
102
- // TaskQueue — array-deque with auto-compaction
103
- class TaskQueue {
104
- items = [];
105
- head = 0;
106
- activeCount = 0;
107
- get depth() {
108
- return this.activeCount;
109
- }
110
- enqueue(item) {
111
- this.items.push(item);
112
- this.activeCount += 1;
113
- }
114
- dequeue() {
115
- let found = null;
116
- while (this.head < this.items.length) {
117
- const item = this.items[this.head];
118
- this.head += 1;
119
- if (item) {
120
- this.activeCount -= 1;
121
- found = item;
122
- break;
123
- }
124
- }
125
- this.compact();
126
- return found;
127
- }
128
- removeById(id) {
129
- for (let i = this.head; i < this.items.length; i += 1) {
130
- const item = this.items[i];
131
- if (item?.id === id) {
132
- this.items[i] = undefined;
133
- this.activeCount -= 1;
134
- this.compact();
135
- return item;
136
- }
137
- }
138
- return undefined;
139
- }
140
- drain(callback) {
141
- for (let i = this.head; i < this.items.length; i += 1) {
142
- const item = this.items[i];
143
- if (item)
144
- callback(item);
145
- }
146
- this.items.length = 0;
147
- this.head = 0;
148
- this.activeCount = 0;
149
- }
150
- compact() {
151
- if (this.head === 0)
152
- return;
153
- if (this.head >= this.items.length ||
154
- (this.head > COMPACTION_HEAD_THRESHOLD &&
155
- this.head > this.items.length / 2)) {
156
- this.items.splice(0, this.head);
157
- this.head = 0;
158
- }
159
- }
160
- }
161
- // CancelAckTracker — isolates the cancel-acknowledgement protocol
162
- class CancelAckTracker {
163
- pending = new Map();
164
- earlyResolutions = new Set();
165
- resolve(id) {
166
- const entry = this.pending.get(id);
167
- if (!entry) {
168
- this.earlyResolutions.add(id);
169
- return;
170
- }
171
- entry.timeout.cancel();
172
- entry.resolve();
173
- }
174
- wait(id, timeoutMs) {
175
- if (this.earlyResolutions.has(id)) {
176
- this.earlyResolutions.delete(id);
177
- return Promise.resolve();
178
- }
179
- const existing = this.pending.get(id);
180
- if (existing)
181
- return existing.promise;
182
- const timeout = createUnrefTimeout(timeoutMs, undefined);
183
- const { promise: racePromise, resolve } = Promise.withResolvers();
184
- const promise = Promise.race([racePromise, timeout.promise])
185
- .finally(() => {
186
- this.pending.delete(id);
187
- timeout.cancel();
188
- })
189
- .then(() => {
190
- return;
191
- });
192
- this.pending.set(id, {
193
- promise,
194
- resolve: resolve,
195
- timeout,
196
- });
197
- return promise;
198
- }
199
- dispose() {
200
- for (const entry of this.pending.values()) {
201
- entry.timeout.cancel();
202
- entry.resolve();
203
- }
204
- this.pending.clear();
205
- this.earlyResolutions.clear();
206
- }
207
- }
208
- // WorkerPool
209
- class WorkerPool {
210
- static CLOSED_MESSAGE = 'Transform worker pool closed';
211
- workers = [];
212
- capacity;
213
- minCapacity = POOL_MIN_WORKERS;
214
- maxCapacity = POOL_MAX_WORKERS;
215
- queue = new TaskQueue();
216
- inflight = new Map();
217
- cancelAcks = new CancelAckTracker();
218
- timeoutMs;
219
- queueMax;
220
- closed = false;
221
- taskIdSeq = 0;
222
- busyCount = 0;
223
- draining = false;
224
- restartBackoff = new Map();
225
- constructor(size, timeoutMs) {
226
- this.capacity =
227
- size === 0
228
- ? 0
229
- : Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
230
- this.timeoutMs = timeoutMs;
231
- this.queueMax = this.maxCapacity * QUEUE_CAPACITY_MULTIPLIER;
232
- }
233
- async transform(htmlOrBuffer, url, options) {
234
- this.ensureOpen();
235
- if (options.signal?.aborted)
236
- throw createAbortError(url, 'transform:enqueue');
237
- if (this.queue.depth >= this.queueMax) {
238
- logWarn('Transform worker queue capacity reached', {
239
- queueDepth: this.queue.depth,
240
- queueMax: this.queueMax,
241
- activeWorkers: this.busyCount,
242
- capacity: this.capacity,
243
- url,
244
- }, Loggers.LOG_TRANSFORM);
245
- const error = new FetchError('Transform worker queue is full', url, HTTP_SERVICE_UNAVAILABLE, {
246
- reason: SystemErrors.QUEUE_FULL,
247
- stage: 'transform:enqueue',
248
- });
249
- throw error;
250
- }
251
- const { promise, resolve, reject } = Promise.withResolvers();
252
- const task = this.createPendingTask(htmlOrBuffer, url, options, resolve, reject);
253
- this.queue.enqueue(task);
254
- this.drainQueue();
255
- return promise;
256
- }
257
- getQueueDepth() {
258
- return this.queue.depth;
259
- }
260
- getActiveWorkers() {
261
- return this.busyCount;
262
- }
263
- getCapacity() {
264
- return this.capacity;
265
- }
266
- resize(size) {
267
- const newCapacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
268
- if (newCapacity === this.capacity)
269
- return;
270
- this.capacity = newCapacity;
271
- this.drainQueue();
272
- }
273
- async close() {
274
- if (this.closed)
275
- return;
276
- this.closed = true;
277
- logInfo('Shutting down transform worker pool', {
278
- workers: this.workers.length,
279
- activeWorkers: this.busyCount,
280
- queueDepth: this.queue.depth,
281
- inflight: this.inflight.size,
282
- }, Loggers.LOG_TRANSFORM);
283
- const terminations = this.workers
284
- .map((slot) => slot?.worker.terminate().catch(() => undefined))
285
- .filter((p) => p !== undefined);
286
- this.workers.fill(undefined);
287
- this.workers.length = 0;
288
- this.busyCount = 0;
289
- this.cancelAcks.dispose();
290
- for (const id of Array.from(this.inflight.keys())) {
291
- const inflight = this.takeInflight(id);
292
- if (!inflight)
293
- continue;
294
- this.abortAndCleanTask(inflight, new Error(WorkerPool.CLOSED_MESSAGE));
295
- }
296
- this.queue.drain((task) => {
297
- this.abortAndCleanTask(task, new Error(WorkerPool.CLOSED_MESSAGE));
298
- });
299
- await Promise.allSettled(terminations);
300
- }
301
- ensureOpen() {
302
- if (this.closed)
303
- throw Error(WorkerPool.CLOSED_MESSAGE);
304
- }
305
- createPendingTask(htmlOrBuffer, url, options, resolve, reject) {
306
- const id = (this.taskIdSeq++).toString(36);
307
- // Preserve request context for resolve/reject even when callbacks fire
308
- // from worker thread events.
309
- const context = createTaskContext();
310
- let abortListener;
311
- if (options.signal) {
312
- abortListener = () => {
313
- this.onAbortSignal(id, url, context, reject);
314
- };
315
- options.signal.addEventListener('abort', abortListener, { once: true });
316
- }
317
- const task = {
318
- id,
319
- url,
320
- includeMetadataFooter: options.includeMetadataFooter,
321
- ...(options.inputTruncated
322
- ? { inputTruncated: options.inputTruncated }
323
- : {}),
324
- signal: options.signal,
325
- abortListener,
326
- context,
327
- resolve,
328
- reject,
329
- };
330
- if (typeof htmlOrBuffer === 'string') {
331
- task.html = htmlOrBuffer;
332
- }
333
- else {
334
- task.htmlBuffer = htmlOrBuffer;
335
- if (options.encoding) {
336
- task.encoding = options.encoding;
337
- }
338
- }
339
- return task;
340
- }
341
- onAbortSignal(id, url, context, reject) {
342
- if (this.closed) {
343
- this.finalizeTask(context, () => {
344
- reject(new Error(WorkerPool.CLOSED_MESSAGE));
345
- });
346
- return;
347
- }
348
- const inflight = this.inflight.get(id);
349
- if (inflight) {
350
- void this.abortInflight(id, url, inflight.workerIndex);
351
- return;
352
- }
353
- const queuedTask = this.queue.removeById(id);
354
- if (queuedTask) {
355
- this.abortAndCleanTask(queuedTask, createAbortError(url, 'transform:queued-abort'));
356
- }
357
- }
358
- async abortInflight(id, url, workerIndex) {
359
- const slot = this.workers[workerIndex];
360
- const inflight = this.inflight.get(id);
361
- if (inflight) {
362
- inflight.cancelPending = true;
363
- }
364
- if (slot) {
365
- try {
366
- slot.worker.postMessage({ type: 'cancel', id });
367
- }
368
- catch {
369
- // Worker may be unavailable; failure is acceptable during abort
370
- }
371
- }
372
- await this.cancelAcks.wait(id, config.transform.cancelAckTimeoutMs);
373
- const taken = this.failTask(id, createAbortError(url, 'transform:signal-abort'));
374
- if (taken && slot)
375
- this.restartWorker(workerIndex, slot);
376
- }
377
- clearAbortListener(signal, listener) {
378
- if (!signal || !listener)
379
- return;
380
- try {
381
- signal.removeEventListener('abort', listener);
382
- }
383
- catch {
384
- // Defensive: removeEventListener should not throw, but handle edge cases
385
- }
386
- }
387
- spawnWorker(workerIndex) {
388
- const name = `${WORKER_NAME_PREFIX}-${workerIndex + 1}`;
389
- const resourceLimits = config.transform.workerResourceLimits;
390
- const worker = new Worker(TRANSFORM_WORKER_PATH, {
391
- name,
392
- ...(resourceLimits ? { resourceLimits } : {}),
393
- });
394
- logDebug('Spawned transform worker', {
395
- workerIndex,
396
- workerName: name,
397
- }, Loggers.LOG_TRANSFORM);
398
- worker.unref();
399
- worker.on('message', (raw) => {
400
- this.onWorkerMessage(workerIndex, raw);
401
- });
402
- worker.on('error', (error) => {
403
- this.onWorkerBroken(workerIndex, `Transform worker error: ${getErrorMessage(error)}`);
404
- });
405
- worker.on('messageerror', (error) => {
406
- this.onWorkerBroken(workerIndex, `Transform worker error: ${getErrorMessage(error)}`);
407
- });
408
- worker.on('exit', (code) => {
409
- this.onWorkerBroken(workerIndex, `Transform worker exited (code ${code ?? 'unknown'})`);
410
- });
411
- return { worker, busy: false, currentTaskId: null, name };
412
- }
413
- onWorkerBroken(workerIndex, message) {
414
- if (this.closed)
415
- return;
416
- const slot = this.workers[workerIndex];
417
- if (!slot)
418
- return;
419
- logWarn('Transform worker unavailable; restarting', {
420
- reason: message,
421
- workerIndex,
422
- workerName: slot.name,
423
- threadId: slot.worker.threadId,
424
- }, Loggers.LOG_TRANSFORM);
425
- if (slot.busy && slot.currentTaskId) {
426
- try {
427
- this.failTask(slot.currentTaskId, new FetchError(message, '', HTTP_SERVICE_UNAVAILABLE, {
428
- reason: 'worker_exit',
429
- }));
430
- }
431
- catch {
432
- this.markIdle(workerIndex);
433
- }
434
- }
435
- this.restartWorker(workerIndex, slot);
436
- }
437
- restartWorker(workerIndex, slot) {
438
- if (this.closed)
439
- return;
440
- const target = slot ?? this.workers[workerIndex];
441
- if (target) {
442
- target.worker.terminate().catch(() => undefined);
443
- }
444
- const attempts = this.restartBackoff.get(workerIndex) ?? 0;
445
- this.restartBackoff.set(workerIndex, attempts + 1);
446
- if (attempts > 0) {
447
- const delayMs = Math.min(1000 * 2 ** (attempts - 1), 30_000);
448
- logWarn('Scheduling transform worker restart with backoff', {
449
- workerIndex,
450
- delayMs,
451
- attempt: attempts + 1,
452
- }, Loggers.LOG_TRANSFORM);
453
- setTimeout(() => {
454
- if (this.closed)
455
- return;
456
- this.workers[workerIndex] = this.spawnWorker(workerIndex);
457
- this.drainQueue();
458
- }, delayMs).unref();
459
- return;
460
- }
461
- this.workers[workerIndex] = this.spawnWorker(workerIndex);
462
- this.drainQueue();
463
- }
464
- onWorkerMessage(workerIndex, raw) {
465
- const parsed = workerResponseSchema.safeParse(raw);
466
- if (!parsed.success) {
467
- this.onWorkerBroken(workerIndex, `Transform worker sent invalid message: ${formatZodError(parsed.error)}`);
468
- return;
469
- }
470
- const message = parsed.data;
471
- if (message.type === 'cancelled') {
472
- this.cancelAcks.resolve(message.id);
473
- return;
474
- }
475
- const inflightPeek = this.inflight.get(message.id);
476
- if (inflightPeek?.cancelPending) {
477
- this.cancelAcks.resolve(message.id);
478
- return;
479
- }
480
- const inflight = this.takeInflight(message.id);
481
- if (!inflight)
482
- return;
483
- this.restartBackoff.delete(workerIndex);
484
- this.markIdle(workerIndex);
485
- this.resolveWorkerResult(inflight, message);
486
- this.drainQueue();
487
- }
488
- resolveWorkerResult(inflight, message) {
489
- this.finalizeTask(inflight.context, () => {
490
- if (message.type === 'result') {
491
- inflight.resolve({
492
- markdown: message.result.markdown,
493
- truncated: message.result.truncated,
494
- title: message.result.title,
495
- ...(message.result.metadata
496
- ? { metadata: message.result.metadata }
497
- : {}),
498
- });
499
- }
500
- else {
501
- const err = message.error;
502
- inflight.reject(err.name === 'FetchError'
503
- ? new FetchError(err.message, err.url, err.statusCode, err.details ?? {})
504
- : new Error(err.message));
505
- }
506
- });
507
- }
508
- takeInflight(id) {
509
- const inflight = this.inflight.get(id);
510
- if (!inflight)
511
- return null;
512
- inflight.timeout.cancel();
513
- this.clearAbortListener(inflight.signal, inflight.abortListener);
514
- this.inflight.delete(id);
515
- return inflight;
516
- }
517
- markBusy(workerIndex, taskId) {
518
- const slot = this.workers[workerIndex];
519
- if (!slot)
520
- return;
521
- if (!slot.busy) {
522
- slot.busy = true;
523
- this.busyCount += 1;
524
- }
525
- slot.currentTaskId = taskId;
526
- }
527
- markIdle(workerIndex) {
528
- const slot = this.workers[workerIndex];
529
- if (!slot)
530
- return;
531
- if (slot.busy) {
532
- slot.busy = false;
533
- this.busyCount -= 1;
534
- }
535
- slot.currentTaskId = null;
536
- }
537
- failTask(id, error) {
538
- const inflight = this.takeInflight(id);
539
- if (!inflight)
540
- return false;
541
- this.abortAndCleanTask(inflight, error);
542
- this.markIdle(inflight.workerIndex);
543
- return true;
544
- }
545
- maybeScaleUp() {
546
- if (this.getQueueDepth() > this.capacity * POOL_SCALE_THRESHOLD &&
547
- this.capacity < this.maxCapacity) {
548
- const previousCapacity = this.capacity;
549
- this.capacity += 1;
550
- logInfo('Scaled transform worker pool', {
551
- fromCapacity: previousCapacity,
552
- toCapacity: this.capacity,
553
- queueDepth: this.getQueueDepth(),
554
- }, Loggers.LOG_TRANSFORM);
555
- }
556
- }
557
- drainQueue() {
558
- if (this.closed || this.queue.depth === 0 || this.draining)
559
- return;
560
- this.draining = true;
561
- try {
562
- this.maybeScaleUp();
563
- for (let i = 0; i < this.workers.length; i += 1) {
564
- const slot = this.workers[i];
565
- if (slot && !slot.busy) {
566
- this.dispatchFromQueue(i, slot);
567
- if (this.queue.depth === 0)
568
- return;
569
- }
570
- }
571
- if (this.workers.length < this.capacity && this.queue.depth > 0) {
572
- const workerIndex = this.workers.length;
573
- const slot = this.spawnWorker(workerIndex);
574
- this.workers.push(slot);
575
- this.dispatchFromQueue(workerIndex, slot);
576
- if (this.workers.length < this.capacity && this.queue.depth > 0) {
577
- setImmediate(() => {
578
- this.drainQueue();
579
- });
580
- }
581
- }
582
- }
583
- finally {
584
- this.draining = false;
585
- }
586
- }
587
- dispatchFromQueue(workerIndex, slot) {
588
- let task = this.queue.dequeue();
589
- while (task) {
590
- const currentTask = task;
591
- if (this.closed) {
592
- this.abortAndCleanTask(currentTask, new Error(WorkerPool.CLOSED_MESSAGE));
593
- return;
594
- }
595
- if (currentTask.signal?.aborted) {
596
- this.abortAndCleanTask(currentTask, createAbortError(currentTask.url, 'transform:dispatch'));
597
- task = this.queue.dequeue();
598
- continue;
599
- }
600
- break;
601
- }
602
- if (!task)
603
- return;
604
- this.markBusy(workerIndex, task.id);
605
- const timeout = this.registerInflight(task, workerIndex, slot);
606
- this.sendToWorker(task, slot, workerIndex, timeout);
607
- }
608
- registerInflight(task, workerIndex, slot) {
609
- const timeout = createUnrefTimeout(this.timeoutMs, null);
610
- void timeout.promise
611
- .then(() => {
612
- try {
613
- slot.worker.postMessage({ type: 'cancel', id: task.id });
614
- }
615
- catch {
616
- // Worker may be unavailable; proceed with timeout handling
617
- }
618
- const inflight = this.takeInflight(task.id);
619
- if (!inflight)
620
- return;
621
- logWarn('Transform worker task timed out', {
622
- taskId: task.id,
623
- url: task.url,
624
- workerIndex,
625
- timeoutMs: this.timeoutMs,
626
- }, Loggers.LOG_TRANSFORM);
627
- this.abortAndCleanTask(inflight, new FetchError('Request timeout', task.url, HTTP_GATEWAY_TIMEOUT, {
628
- reason: 'timeout',
629
- stage: 'transform:worker-timeout',
630
- }));
631
- this.markIdle(workerIndex);
632
- this.restartWorker(workerIndex, slot);
633
- })
634
- .catch((error) => {
635
- this.failTask(task.id, error);
636
- });
637
- this.inflight.set(task.id, {
638
- resolve: task.resolve,
639
- reject: task.reject,
640
- timeout,
641
- signal: task.signal,
642
- abortListener: task.abortListener,
643
- workerIndex,
644
- context: task.context,
645
- cancelPending: false,
646
- });
647
- return timeout;
648
- }
649
- sendToWorker(task, slot, workerIndex, timeout) {
650
- try {
651
- const { message, transferList } = buildWorkerDispatchPayload(task);
652
- slot.worker.postMessage(message, transferList);
653
- }
654
- catch (error) {
655
- timeout.cancel();
656
- this.inflight.delete(task.id);
657
- this.markIdle(workerIndex);
658
- this.abortAndCleanTask(task, error instanceof Error
659
- ? error
660
- : new Error('Failed to dispatch transform worker message'));
661
- this.restartWorker(workerIndex, slot);
662
- }
663
- }
664
- finalizeTask(context, fn) {
665
- context.run(fn);
666
- }
667
- abortAndCleanTask(task, error) {
668
- this.clearAbortListener(task.signal, task.abortListener);
669
- this.finalizeTask(task.context, () => {
670
- task.reject(error);
671
- });
672
- }
673
- }
674
- // Pool singleton management
675
- let workerPool = null;
676
- export function getOrCreateWorkerPool() {
677
- const size = config.transform.maxWorkerScale === 0 ? 0 : POOL_MIN_WORKERS;
678
- if (!workerPool) {
679
- workerPool = new WorkerPool(size, DEFAULT_TIMEOUT_MS);
680
- logInfo('Initialized transform worker pool', {
681
- initialCapacity: workerPool.getCapacity(),
682
- timeoutMs: DEFAULT_TIMEOUT_MS,
683
- }, Loggers.LOG_TRANSFORM);
684
- }
685
- return workerPool;
686
- }
687
- export function getWorkerPoolStats() {
688
- if (!workerPool)
689
- return null;
690
- return {
691
- queueDepth: workerPool.getQueueDepth(),
692
- activeWorkers: workerPool.getActiveWorkers(),
693
- capacity: workerPool.getCapacity(),
694
- };
695
- }
696
- export async function shutdownWorkerPool() {
697
- if (!workerPool)
698
- return;
699
- await workerPool.close();
700
- workerPool = null;
701
- }
702
- // Worker thread message handling
703
- function bootstrapWorkerThread() {
704
- if (!isMainThread && parentPort) {
705
- const port = parentPort;
706
- const onMessage = createTransformMessageHandler({
707
- sendMessage: (message) => {
708
- port.postMessage(message);
709
- },
710
- runTransform: transformHtmlToMarkdownInProcess,
711
- });
712
- port.on('message', onMessage);
713
- }
714
- else if (process.send) {
715
- const send = process.send.bind(process);
716
- const onMessage = createTransformMessageHandler({
717
- sendMessage: (message) => {
718
- send(message);
719
- },
720
- runTransform: transformHtmlToMarkdownInProcess,
721
- });
722
- process.on('message', onMessage);
723
- }
724
- }
725
- bootstrapWorkerThread();