@j0hanz/fetch-url-mcp 1.9.1 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/http/auth.d.ts +0 -1
  2. package/dist/http/auth.d.ts.map +1 -1
  3. package/dist/http/auth.js +1 -13
  4. package/dist/http/native.d.ts.map +1 -1
  5. package/dist/http/native.js +2 -5
  6. package/dist/lib/content.d.ts.map +1 -1
  7. package/dist/lib/content.js +301 -350
  8. package/dist/lib/core.d.ts +78 -71
  9. package/dist/lib/core.d.ts.map +1 -1
  10. package/dist/lib/core.js +308 -372
  11. package/dist/lib/fetch-pipeline.d.ts +2 -6
  12. package/dist/lib/fetch-pipeline.d.ts.map +1 -1
  13. package/dist/lib/fetch-pipeline.js +51 -137
  14. package/dist/lib/http.d.ts.map +1 -1
  15. package/dist/lib/http.js +188 -130
  16. package/dist/lib/mcp-tools.d.ts +3 -5
  17. package/dist/lib/mcp-tools.d.ts.map +1 -1
  18. package/dist/lib/mcp-tools.js +22 -58
  19. package/dist/lib/task-handlers.js +4 -4
  20. package/dist/lib/utils.d.ts +6 -0
  21. package/dist/lib/utils.d.ts.map +1 -1
  22. package/dist/lib/utils.js +23 -0
  23. package/dist/resources/index.js +1 -1
  24. package/dist/schemas.d.ts +0 -1
  25. package/dist/schemas.d.ts.map +1 -1
  26. package/dist/schemas.js +4 -6
  27. package/dist/server.js +1 -1
  28. package/dist/tasks/owner.d.ts +1 -1
  29. package/dist/tasks/owner.d.ts.map +1 -1
  30. package/dist/tasks/tool-registry.d.ts +1 -1
  31. package/dist/tasks/tool-registry.d.ts.map +1 -1
  32. package/dist/tools/fetch-url.d.ts +2 -3
  33. package/dist/tools/fetch-url.d.ts.map +1 -1
  34. package/dist/tools/fetch-url.js +89 -152
  35. package/dist/transform/transform.d.ts +8 -0
  36. package/dist/transform/transform.d.ts.map +1 -1
  37. package/dist/transform/transform.js +109 -108
  38. package/dist/transform/worker-pool.d.ts +3 -6
  39. package/dist/transform/worker-pool.d.ts.map +1 -1
  40. package/dist/transform/worker-pool.js +148 -118
  41. package/package.json +2 -1
@@ -5,11 +5,8 @@ import process from 'node:process';
5
5
  import { isSharedArrayBuffer } from 'node:util/types';
6
6
  import { isMainThread, parentPort, Worker, } from 'node:worker_threads';
7
7
  import { z } from 'zod';
8
- import { config } from '../lib/core.js';
9
- import { logWarn } from '../lib/core.js';
10
- import { createAbortError } from '../lib/utils.js';
11
- import { FetchError, getErrorMessage } from '../lib/utils.js';
12
- import { createUnrefTimeout } from '../lib/utils.js';
8
+ import { config, logWarn } from '../lib/core.js';
9
+ import { createAbortError, createUnrefTimeout, FetchError, getErrorMessage, } from '../lib/utils.js';
13
10
  import { normalizeExtractedMetadata } from '../schemas.js';
14
11
  import { createTransformMessageHandler } from './shared.js';
15
12
  import { transformHtmlToMarkdownInProcess } from './transform.js';
@@ -107,6 +104,93 @@ const POOL_SCALE_THRESHOLD = 0.5;
107
104
  const WORKER_NAME_PREFIX = 'fetch-url-mcp-transform';
108
105
  const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
109
106
  const TRANSFORM_WORKER_PATH = new URL(import.meta.url);
107
+ // TaskQueue — array-deque with auto-compaction
108
+ class TaskQueue {
109
+ items = [];
110
+ head = 0;
111
+ get depth() {
112
+ const d = this.items.length - this.head;
113
+ return d > 0 ? d : 0;
114
+ }
115
+ enqueue(item) {
116
+ this.items.push(item);
117
+ }
118
+ dequeue() {
119
+ while (this.head < this.items.length) {
120
+ const item = this.items[this.head];
121
+ this.head += 1;
122
+ if (item) {
123
+ this.compact();
124
+ return item;
125
+ }
126
+ }
127
+ this.compact();
128
+ return null;
129
+ }
130
+ removeById(id) {
131
+ for (let i = this.head; i < this.items.length; i += 1) {
132
+ const item = this.items[i];
133
+ if (item?.id === id) {
134
+ this.items.splice(i, 1);
135
+ this.compact();
136
+ return item;
137
+ }
138
+ }
139
+ return undefined;
140
+ }
141
+ drain(callback) {
142
+ for (let i = this.head; i < this.items.length; i += 1) {
143
+ const item = this.items[i];
144
+ if (item)
145
+ callback(item);
146
+ }
147
+ this.items.length = 0;
148
+ this.head = 0;
149
+ }
150
+ compact() {
151
+ if (this.head === 0)
152
+ return;
153
+ if (this.head >= this.items.length ||
154
+ (this.head > 1024 && this.head > this.items.length / 2)) {
155
+ this.items.splice(0, this.head);
156
+ this.head = 0;
157
+ }
158
+ }
159
+ }
160
+ // CancelAckTracker — isolates the cancel-acknowledgement protocol
161
+ class CancelAckTracker {
162
+ pending = new Map();
163
+ resolve(id) {
164
+ const entry = this.pending.get(id);
165
+ if (!entry)
166
+ return;
167
+ entry.timeout.cancel();
168
+ entry.resolve();
169
+ }
170
+ wait(id, timeoutMs) {
171
+ const existing = this.pending.get(id);
172
+ if (existing)
173
+ return existing.promise;
174
+ let resolve = () => { };
175
+ const timeout = createUnrefTimeout(timeoutMs, undefined);
176
+ const racePromise = new Promise((finish) => {
177
+ resolve = finish;
178
+ });
179
+ const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
180
+ this.pending.delete(id);
181
+ timeout.cancel();
182
+ });
183
+ this.pending.set(id, { promise, resolve, timeout });
184
+ return promise;
185
+ }
186
+ dispose() {
187
+ for (const entry of this.pending.values()) {
188
+ entry.timeout.cancel();
189
+ entry.resolve();
190
+ }
191
+ this.pending.clear();
192
+ }
193
+ }
110
194
  // WorkerPool
111
195
  class WorkerPool {
112
196
  static CLOSED_MESSAGE = 'Transform worker pool closed';
@@ -114,14 +198,14 @@ class WorkerPool {
114
198
  capacity;
115
199
  minCapacity = POOL_MIN_WORKERS;
116
200
  maxCapacity = POOL_MAX_WORKERS;
117
- queue = [];
118
- queueHead = 0;
201
+ queue = new TaskQueue();
119
202
  inflight = new Map();
120
- cancelAcks = new Map();
203
+ cancelAcks = new CancelAckTracker();
121
204
  timeoutMs;
122
205
  queueMax;
123
206
  closed = false;
124
207
  taskIdSeq = 0;
208
+ busyCount = 0;
125
209
  constructor(size, timeoutMs) {
126
210
  this.capacity =
127
211
  size === 0
@@ -134,7 +218,7 @@ class WorkerPool {
134
218
  this.ensureOpen();
135
219
  if (options.signal?.aborted)
136
220
  throw createAbortError(url, 'transform:enqueue');
137
- if (this.getQueueDepth() >= this.queueMax) {
221
+ if (this.queue.depth >= this.queueMax) {
138
222
  throw new FetchError('Transform worker queue is full', url, 503, {
139
223
  reason: 'queue_full',
140
224
  stage: 'transform:enqueue',
@@ -142,16 +226,15 @@ class WorkerPool {
142
226
  }
143
227
  return new Promise((resolve, reject) => {
144
228
  const task = this.createPendingTask(htmlOrBuffer, url, options, resolve, reject);
145
- this.queue.push(task);
229
+ this.queue.enqueue(task);
146
230
  this.drainQueue();
147
231
  });
148
232
  }
149
233
  getQueueDepth() {
150
- const depth = this.queue.length - this.queueHead;
151
- return depth > 0 ? depth : 0;
234
+ return this.queue.depth;
152
235
  }
153
236
  getActiveWorkers() {
154
- return this.workers.filter((s) => s?.busy).length;
237
+ return this.busyCount;
155
238
  }
156
239
  getCapacity() {
157
240
  return this.capacity;
@@ -172,6 +255,8 @@ class WorkerPool {
172
255
  .filter((p) => p !== undefined);
173
256
  this.workers.fill(undefined);
174
257
  this.workers.length = 0;
258
+ this.busyCount = 0;
259
+ this.cancelAcks.dispose();
175
260
  for (const id of Array.from(this.inflight.keys())) {
176
261
  const inflight = this.takeInflight(id);
177
262
  if (!inflight)
@@ -180,17 +265,12 @@ class WorkerPool {
180
265
  inflight.reject(new Error(WorkerPool.CLOSED_MESSAGE));
181
266
  });
182
267
  }
183
- for (let i = this.queueHead; i < this.queue.length; i += 1) {
184
- const task = this.queue[i];
185
- if (!task)
186
- continue;
268
+ this.queue.drain((task) => {
187
269
  this.clearAbortListener(task.signal, task.abortListener);
188
270
  this.finalizeTask(task.context, () => {
189
271
  task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
190
272
  });
191
- }
192
- this.queue.length = 0;
193
- this.queueHead = 0;
273
+ });
194
274
  await Promise.allSettled(terminations);
195
275
  }
196
276
  ensureOpen() {
@@ -243,48 +323,13 @@ class WorkerPool {
243
323
  void this.abortInflight(id, url, inflight.workerIndex);
244
324
  return;
245
325
  }
246
- const queuedIndex = this.findQueuedIndex(id);
247
- if (queuedIndex !== null) {
248
- const task = this.queue[queuedIndex];
249
- if (task)
250
- this.clearAbortListener(task.signal, task.abortListener);
251
- this.queue.splice(queuedIndex, 1);
252
- if (task) {
253
- this.finalizeTask(task.context, () => {
254
- task.reject(createAbortError(url, 'transform:queued-abort'));
255
- });
256
- }
257
- else {
258
- this.finalizeTask(context, () => {
259
- reject(createAbortError(url, 'transform:queued-abort'));
260
- });
261
- }
262
- this.maybeCompactQueue();
263
- }
264
- }
265
- resolveCancelAck(id) {
266
- const pending = this.cancelAcks.get(id);
267
- if (!pending)
268
- return;
269
- pending.timeout.cancel();
270
- pending.resolve();
271
- }
272
- waitForCancelAck(id) {
273
- const existing = this.cancelAcks.get(id);
274
- if (existing) {
275
- return existing.promise;
326
+ const queuedTask = this.queue.removeById(id);
327
+ if (queuedTask) {
328
+ this.clearAbortListener(queuedTask.signal, queuedTask.abortListener);
329
+ this.finalizeTask(queuedTask.context, () => {
330
+ queuedTask.reject(createAbortError(url, 'transform:queued-abort'));
331
+ });
276
332
  }
277
- let resolve = () => { };
278
- const timeout = createUnrefTimeout(config.transform.cancelAckTimeoutMs, undefined);
279
- const racePromise = new Promise((finish) => {
280
- resolve = finish;
281
- });
282
- const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
283
- this.cancelAcks.delete(id);
284
- timeout.cancel();
285
- });
286
- this.cancelAcks.set(id, { promise, resolve, timeout });
287
- return promise;
288
333
  }
289
334
  async abortInflight(id, url, workerIndex) {
290
335
  const slot = this.workers[workerIndex];
@@ -300,7 +345,7 @@ class WorkerPool {
300
345
  // Worker may be unavailable; failure is acceptable during abort
301
346
  }
302
347
  }
303
- await this.waitForCancelAck(id);
348
+ await this.cancelAcks.wait(id, config.transform.cancelAckTimeoutMs);
304
349
  const taken = this.failTask(id, createAbortError(url, 'transform:signal-abort'));
305
350
  if (taken && slot)
306
351
  this.restartWorker(workerIndex, slot);
@@ -369,12 +414,12 @@ class WorkerPool {
369
414
  if (!message)
370
415
  return;
371
416
  if (message.type === 'cancelled') {
372
- this.resolveCancelAck(message.id);
417
+ this.cancelAcks.resolve(message.id);
373
418
  return;
374
419
  }
375
420
  const inflightPeek = this.inflight.get(message.id);
376
421
  if (inflightPeek?.cancelPending) {
377
- this.resolveCancelAck(message.id);
422
+ this.cancelAcks.resolve(message.id);
378
423
  return;
379
424
  }
380
425
  const inflight = this.takeInflight(message.id);
@@ -421,7 +466,10 @@ class WorkerPool {
421
466
  const slot = this.workers[workerIndex];
422
467
  if (!slot)
423
468
  return;
424
- slot.busy = false;
469
+ if (slot.busy) {
470
+ slot.busy = false;
471
+ this.busyCount -= 1;
472
+ }
425
473
  slot.currentTaskId = null;
426
474
  }
427
475
  failTask(id, error) {
@@ -441,43 +489,31 @@ class WorkerPool {
441
489
  }
442
490
  }
443
491
  drainQueue() {
444
- if (this.closed || this.getQueueDepth() === 0)
492
+ if (this.closed || this.queue.depth === 0)
445
493
  return;
446
494
  this.maybeScaleUp();
447
495
  for (let i = 0; i < this.workers.length; i += 1) {
448
496
  const slot = this.workers[i];
449
497
  if (slot && !slot.busy) {
450
498
  this.dispatchFromQueue(i, slot);
451
- if (this.getQueueDepth() === 0)
499
+ if (this.queue.depth === 0)
452
500
  return;
453
501
  }
454
502
  }
455
- if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
503
+ if (this.workers.length < this.capacity && this.queue.depth > 0) {
456
504
  const workerIndex = this.workers.length;
457
505
  const slot = this.spawnWorker(workerIndex);
458
506
  this.workers.push(slot);
459
507
  this.dispatchFromQueue(workerIndex, slot);
460
- if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
508
+ if (this.workers.length < this.capacity && this.queue.depth > 0) {
461
509
  setImmediate(() => {
462
510
  this.drainQueue();
463
511
  });
464
512
  }
465
513
  }
466
514
  }
467
- takeNextQueuedTask() {
468
- while (this.queueHead < this.queue.length) {
469
- const task = this.queue[this.queueHead];
470
- this.queueHead += 1;
471
- if (task) {
472
- this.maybeCompactQueue();
473
- return task;
474
- }
475
- }
476
- this.maybeCompactQueue();
477
- return null;
478
- }
479
515
  dispatchFromQueue(workerIndex, slot) {
480
- const task = this.takeNextQueuedTask();
516
+ const task = this.queue.dequeue();
481
517
  if (!task)
482
518
  return;
483
519
  if (this.closed) {
@@ -496,6 +532,11 @@ class WorkerPool {
496
532
  }
497
533
  slot.busy = true;
498
534
  slot.currentTaskId = task.id;
535
+ this.busyCount += 1;
536
+ const timeout = this.registerInflight(task, workerIndex, slot);
537
+ this.sendToWorker(task, slot, workerIndex, timeout);
538
+ }
539
+ registerInflight(task, workerIndex, slot) {
499
540
  const timeout = createUnrefTimeout(this.timeoutMs, null);
500
541
  void timeout.promise
501
542
  .then(() => {
@@ -529,6 +570,9 @@ class WorkerPool {
529
570
  context: task.context,
530
571
  cancelPending: false,
531
572
  });
573
+ return timeout;
574
+ }
575
+ sendToWorker(task, slot, workerIndex, timeout) {
532
576
  try {
533
577
  const { message, transferList } = buildWorkerDispatchPayload(task);
534
578
  slot.worker.postMessage(message, transferList);
@@ -554,23 +598,6 @@ class WorkerPool {
554
598
  context.dispose();
555
599
  }
556
600
  }
557
- findQueuedIndex(id) {
558
- for (let i = this.queueHead; i < this.queue.length; i += 1) {
559
- const task = this.queue[i];
560
- if (task?.id === id)
561
- return i;
562
- }
563
- return null;
564
- }
565
- maybeCompactQueue() {
566
- if (this.queueHead === 0)
567
- return;
568
- if (this.queueHead >= this.queue.length ||
569
- (this.queueHead > 1024 && this.queueHead > this.queue.length / 2)) {
570
- this.queue.splice(0, this.queueHead);
571
- this.queueHead = 0;
572
- }
573
- }
574
601
  }
575
602
  // Pool singleton management
576
603
  let workerPool = null;
@@ -595,23 +622,26 @@ export async function shutdownWorkerPool() {
595
622
  workerPool = null;
596
623
  }
597
624
  // Worker thread message handling
598
- if (!isMainThread && parentPort) {
599
- const port = parentPort;
600
- const onMessage = createTransformMessageHandler({
601
- sendMessage: (message) => {
602
- port.postMessage(message);
603
- },
604
- runTransform: transformHtmlToMarkdownInProcess,
605
- });
606
- port.on('message', onMessage);
607
- }
608
- else if (process.send) {
609
- const send = process.send.bind(process);
610
- const onMessage = createTransformMessageHandler({
611
- sendMessage: (message) => {
612
- send(message);
613
- },
614
- runTransform: transformHtmlToMarkdownInProcess,
615
- });
616
- process.on('message', onMessage);
625
+ function bootstrapWorkerThread() {
626
+ if (!isMainThread && parentPort) {
627
+ const port = parentPort;
628
+ const onMessage = createTransformMessageHandler({
629
+ sendMessage: (message) => {
630
+ port.postMessage(message);
631
+ },
632
+ runTransform: transformHtmlToMarkdownInProcess,
633
+ });
634
+ port.on('message', onMessage);
635
+ }
636
+ else if (process.send) {
637
+ const send = process.send.bind(process);
638
+ const onMessage = createTransformMessageHandler({
639
+ sendMessage: (message) => {
640
+ send(message);
641
+ },
642
+ runTransform: transformHtmlToMarkdownInProcess,
643
+ });
644
+ process.on('message', onMessage);
645
+ }
617
646
  }
647
+ bootstrapWorkerThread();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.9.1",
3
+ "version": "1.9.2",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
6
6
  "type": "module",
@@ -74,6 +74,7 @@
74
74
  "@mozilla/readability": "^0.6.0",
75
75
  "linkedom": "^0.18.12",
76
76
  "node-html-markdown": "^2.0.0",
77
+ "ts-morph": "^27.0.2",
77
78
  "undici": "^7.24.1",
78
79
  "zod": "^4.3.6"
79
80
  },