@j0hanz/fetch-url-mcp 1.9.1 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/http/auth.d.ts +0 -1
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +1 -13
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +2 -5
- package/dist/lib/content.d.ts.map +1 -1
- package/dist/lib/content.js +301 -350
- package/dist/lib/core.d.ts +78 -71
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +308 -372
- package/dist/lib/fetch-pipeline.d.ts +2 -6
- package/dist/lib/fetch-pipeline.d.ts.map +1 -1
- package/dist/lib/fetch-pipeline.js +51 -137
- package/dist/lib/http.d.ts.map +1 -1
- package/dist/lib/http.js +188 -130
- package/dist/lib/mcp-tools.d.ts +3 -5
- package/dist/lib/mcp-tools.d.ts.map +1 -1
- package/dist/lib/mcp-tools.js +22 -58
- package/dist/lib/task-handlers.js +4 -4
- package/dist/lib/utils.d.ts +6 -0
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +23 -0
- package/dist/resources/index.js +1 -1
- package/dist/schemas.d.ts +0 -1
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +4 -6
- package/dist/server.js +1 -1
- package/dist/tasks/owner.d.ts +1 -1
- package/dist/tasks/owner.d.ts.map +1 -1
- package/dist/tasks/tool-registry.d.ts +1 -1
- package/dist/tasks/tool-registry.d.ts.map +1 -1
- package/dist/tools/fetch-url.d.ts +2 -3
- package/dist/tools/fetch-url.d.ts.map +1 -1
- package/dist/tools/fetch-url.js +89 -152
- package/dist/transform/transform.d.ts +8 -0
- package/dist/transform/transform.d.ts.map +1 -1
- package/dist/transform/transform.js +109 -108
- package/dist/transform/worker-pool.d.ts +3 -6
- package/dist/transform/worker-pool.d.ts.map +1 -1
- package/dist/transform/worker-pool.js +148 -118
- package/package.json +2 -1
|
@@ -5,11 +5,8 @@ import process from 'node:process';
|
|
|
5
5
|
import { isSharedArrayBuffer } from 'node:util/types';
|
|
6
6
|
import { isMainThread, parentPort, Worker, } from 'node:worker_threads';
|
|
7
7
|
import { z } from 'zod';
|
|
8
|
-
import { config } from '../lib/core.js';
|
|
9
|
-
import {
|
|
10
|
-
import { createAbortError } from '../lib/utils.js';
|
|
11
|
-
import { FetchError, getErrorMessage } from '../lib/utils.js';
|
|
12
|
-
import { createUnrefTimeout } from '../lib/utils.js';
|
|
8
|
+
import { config, logWarn } from '../lib/core.js';
|
|
9
|
+
import { createAbortError, createUnrefTimeout, FetchError, getErrorMessage, } from '../lib/utils.js';
|
|
13
10
|
import { normalizeExtractedMetadata } from '../schemas.js';
|
|
14
11
|
import { createTransformMessageHandler } from './shared.js';
|
|
15
12
|
import { transformHtmlToMarkdownInProcess } from './transform.js';
|
|
@@ -107,6 +104,93 @@ const POOL_SCALE_THRESHOLD = 0.5;
|
|
|
107
104
|
const WORKER_NAME_PREFIX = 'fetch-url-mcp-transform';
|
|
108
105
|
const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
|
|
109
106
|
const TRANSFORM_WORKER_PATH = new URL(import.meta.url);
|
|
107
|
+
// TaskQueue — array-deque with auto-compaction
|
|
108
|
+
class TaskQueue {
|
|
109
|
+
items = [];
|
|
110
|
+
head = 0;
|
|
111
|
+
get depth() {
|
|
112
|
+
const d = this.items.length - this.head;
|
|
113
|
+
return d > 0 ? d : 0;
|
|
114
|
+
}
|
|
115
|
+
enqueue(item) {
|
|
116
|
+
this.items.push(item);
|
|
117
|
+
}
|
|
118
|
+
dequeue() {
|
|
119
|
+
while (this.head < this.items.length) {
|
|
120
|
+
const item = this.items[this.head];
|
|
121
|
+
this.head += 1;
|
|
122
|
+
if (item) {
|
|
123
|
+
this.compact();
|
|
124
|
+
return item;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
this.compact();
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
removeById(id) {
|
|
131
|
+
for (let i = this.head; i < this.items.length; i += 1) {
|
|
132
|
+
const item = this.items[i];
|
|
133
|
+
if (item?.id === id) {
|
|
134
|
+
this.items.splice(i, 1);
|
|
135
|
+
this.compact();
|
|
136
|
+
return item;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return undefined;
|
|
140
|
+
}
|
|
141
|
+
drain(callback) {
|
|
142
|
+
for (let i = this.head; i < this.items.length; i += 1) {
|
|
143
|
+
const item = this.items[i];
|
|
144
|
+
if (item)
|
|
145
|
+
callback(item);
|
|
146
|
+
}
|
|
147
|
+
this.items.length = 0;
|
|
148
|
+
this.head = 0;
|
|
149
|
+
}
|
|
150
|
+
compact() {
|
|
151
|
+
if (this.head === 0)
|
|
152
|
+
return;
|
|
153
|
+
if (this.head >= this.items.length ||
|
|
154
|
+
(this.head > 1024 && this.head > this.items.length / 2)) {
|
|
155
|
+
this.items.splice(0, this.head);
|
|
156
|
+
this.head = 0;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// CancelAckTracker — isolates the cancel-acknowledgement protocol
|
|
161
|
+
class CancelAckTracker {
|
|
162
|
+
pending = new Map();
|
|
163
|
+
resolve(id) {
|
|
164
|
+
const entry = this.pending.get(id);
|
|
165
|
+
if (!entry)
|
|
166
|
+
return;
|
|
167
|
+
entry.timeout.cancel();
|
|
168
|
+
entry.resolve();
|
|
169
|
+
}
|
|
170
|
+
wait(id, timeoutMs) {
|
|
171
|
+
const existing = this.pending.get(id);
|
|
172
|
+
if (existing)
|
|
173
|
+
return existing.promise;
|
|
174
|
+
let resolve = () => { };
|
|
175
|
+
const timeout = createUnrefTimeout(timeoutMs, undefined);
|
|
176
|
+
const racePromise = new Promise((finish) => {
|
|
177
|
+
resolve = finish;
|
|
178
|
+
});
|
|
179
|
+
const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
|
|
180
|
+
this.pending.delete(id);
|
|
181
|
+
timeout.cancel();
|
|
182
|
+
});
|
|
183
|
+
this.pending.set(id, { promise, resolve, timeout });
|
|
184
|
+
return promise;
|
|
185
|
+
}
|
|
186
|
+
dispose() {
|
|
187
|
+
for (const entry of this.pending.values()) {
|
|
188
|
+
entry.timeout.cancel();
|
|
189
|
+
entry.resolve();
|
|
190
|
+
}
|
|
191
|
+
this.pending.clear();
|
|
192
|
+
}
|
|
193
|
+
}
|
|
110
194
|
// WorkerPool
|
|
111
195
|
class WorkerPool {
|
|
112
196
|
static CLOSED_MESSAGE = 'Transform worker pool closed';
|
|
@@ -114,14 +198,14 @@ class WorkerPool {
|
|
|
114
198
|
capacity;
|
|
115
199
|
minCapacity = POOL_MIN_WORKERS;
|
|
116
200
|
maxCapacity = POOL_MAX_WORKERS;
|
|
117
|
-
queue =
|
|
118
|
-
queueHead = 0;
|
|
201
|
+
queue = new TaskQueue();
|
|
119
202
|
inflight = new Map();
|
|
120
|
-
cancelAcks = new
|
|
203
|
+
cancelAcks = new CancelAckTracker();
|
|
121
204
|
timeoutMs;
|
|
122
205
|
queueMax;
|
|
123
206
|
closed = false;
|
|
124
207
|
taskIdSeq = 0;
|
|
208
|
+
busyCount = 0;
|
|
125
209
|
constructor(size, timeoutMs) {
|
|
126
210
|
this.capacity =
|
|
127
211
|
size === 0
|
|
@@ -134,7 +218,7 @@ class WorkerPool {
|
|
|
134
218
|
this.ensureOpen();
|
|
135
219
|
if (options.signal?.aborted)
|
|
136
220
|
throw createAbortError(url, 'transform:enqueue');
|
|
137
|
-
if (this.
|
|
221
|
+
if (this.queue.depth >= this.queueMax) {
|
|
138
222
|
throw new FetchError('Transform worker queue is full', url, 503, {
|
|
139
223
|
reason: 'queue_full',
|
|
140
224
|
stage: 'transform:enqueue',
|
|
@@ -142,16 +226,15 @@ class WorkerPool {
|
|
|
142
226
|
}
|
|
143
227
|
return new Promise((resolve, reject) => {
|
|
144
228
|
const task = this.createPendingTask(htmlOrBuffer, url, options, resolve, reject);
|
|
145
|
-
this.queue.
|
|
229
|
+
this.queue.enqueue(task);
|
|
146
230
|
this.drainQueue();
|
|
147
231
|
});
|
|
148
232
|
}
|
|
149
233
|
getQueueDepth() {
|
|
150
|
-
|
|
151
|
-
return depth > 0 ? depth : 0;
|
|
234
|
+
return this.queue.depth;
|
|
152
235
|
}
|
|
153
236
|
getActiveWorkers() {
|
|
154
|
-
return this.
|
|
237
|
+
return this.busyCount;
|
|
155
238
|
}
|
|
156
239
|
getCapacity() {
|
|
157
240
|
return this.capacity;
|
|
@@ -172,6 +255,8 @@ class WorkerPool {
|
|
|
172
255
|
.filter((p) => p !== undefined);
|
|
173
256
|
this.workers.fill(undefined);
|
|
174
257
|
this.workers.length = 0;
|
|
258
|
+
this.busyCount = 0;
|
|
259
|
+
this.cancelAcks.dispose();
|
|
175
260
|
for (const id of Array.from(this.inflight.keys())) {
|
|
176
261
|
const inflight = this.takeInflight(id);
|
|
177
262
|
if (!inflight)
|
|
@@ -180,17 +265,12 @@ class WorkerPool {
|
|
|
180
265
|
inflight.reject(new Error(WorkerPool.CLOSED_MESSAGE));
|
|
181
266
|
});
|
|
182
267
|
}
|
|
183
|
-
|
|
184
|
-
const task = this.queue[i];
|
|
185
|
-
if (!task)
|
|
186
|
-
continue;
|
|
268
|
+
this.queue.drain((task) => {
|
|
187
269
|
this.clearAbortListener(task.signal, task.abortListener);
|
|
188
270
|
this.finalizeTask(task.context, () => {
|
|
189
271
|
task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
|
|
190
272
|
});
|
|
191
|
-
}
|
|
192
|
-
this.queue.length = 0;
|
|
193
|
-
this.queueHead = 0;
|
|
273
|
+
});
|
|
194
274
|
await Promise.allSettled(terminations);
|
|
195
275
|
}
|
|
196
276
|
ensureOpen() {
|
|
@@ -243,48 +323,13 @@ class WorkerPool {
|
|
|
243
323
|
void this.abortInflight(id, url, inflight.workerIndex);
|
|
244
324
|
return;
|
|
245
325
|
}
|
|
246
|
-
const
|
|
247
|
-
if (
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
if (task) {
|
|
253
|
-
this.finalizeTask(task.context, () => {
|
|
254
|
-
task.reject(createAbortError(url, 'transform:queued-abort'));
|
|
255
|
-
});
|
|
256
|
-
}
|
|
257
|
-
else {
|
|
258
|
-
this.finalizeTask(context, () => {
|
|
259
|
-
reject(createAbortError(url, 'transform:queued-abort'));
|
|
260
|
-
});
|
|
261
|
-
}
|
|
262
|
-
this.maybeCompactQueue();
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
resolveCancelAck(id) {
|
|
266
|
-
const pending = this.cancelAcks.get(id);
|
|
267
|
-
if (!pending)
|
|
268
|
-
return;
|
|
269
|
-
pending.timeout.cancel();
|
|
270
|
-
pending.resolve();
|
|
271
|
-
}
|
|
272
|
-
waitForCancelAck(id) {
|
|
273
|
-
const existing = this.cancelAcks.get(id);
|
|
274
|
-
if (existing) {
|
|
275
|
-
return existing.promise;
|
|
326
|
+
const queuedTask = this.queue.removeById(id);
|
|
327
|
+
if (queuedTask) {
|
|
328
|
+
this.clearAbortListener(queuedTask.signal, queuedTask.abortListener);
|
|
329
|
+
this.finalizeTask(queuedTask.context, () => {
|
|
330
|
+
queuedTask.reject(createAbortError(url, 'transform:queued-abort'));
|
|
331
|
+
});
|
|
276
332
|
}
|
|
277
|
-
let resolve = () => { };
|
|
278
|
-
const timeout = createUnrefTimeout(config.transform.cancelAckTimeoutMs, undefined);
|
|
279
|
-
const racePromise = new Promise((finish) => {
|
|
280
|
-
resolve = finish;
|
|
281
|
-
});
|
|
282
|
-
const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
|
|
283
|
-
this.cancelAcks.delete(id);
|
|
284
|
-
timeout.cancel();
|
|
285
|
-
});
|
|
286
|
-
this.cancelAcks.set(id, { promise, resolve, timeout });
|
|
287
|
-
return promise;
|
|
288
333
|
}
|
|
289
334
|
async abortInflight(id, url, workerIndex) {
|
|
290
335
|
const slot = this.workers[workerIndex];
|
|
@@ -300,7 +345,7 @@ class WorkerPool {
|
|
|
300
345
|
// Worker may be unavailable; failure is acceptable during abort
|
|
301
346
|
}
|
|
302
347
|
}
|
|
303
|
-
await this.
|
|
348
|
+
await this.cancelAcks.wait(id, config.transform.cancelAckTimeoutMs);
|
|
304
349
|
const taken = this.failTask(id, createAbortError(url, 'transform:signal-abort'));
|
|
305
350
|
if (taken && slot)
|
|
306
351
|
this.restartWorker(workerIndex, slot);
|
|
@@ -369,12 +414,12 @@ class WorkerPool {
|
|
|
369
414
|
if (!message)
|
|
370
415
|
return;
|
|
371
416
|
if (message.type === 'cancelled') {
|
|
372
|
-
this.
|
|
417
|
+
this.cancelAcks.resolve(message.id);
|
|
373
418
|
return;
|
|
374
419
|
}
|
|
375
420
|
const inflightPeek = this.inflight.get(message.id);
|
|
376
421
|
if (inflightPeek?.cancelPending) {
|
|
377
|
-
this.
|
|
422
|
+
this.cancelAcks.resolve(message.id);
|
|
378
423
|
return;
|
|
379
424
|
}
|
|
380
425
|
const inflight = this.takeInflight(message.id);
|
|
@@ -421,7 +466,10 @@ class WorkerPool {
|
|
|
421
466
|
const slot = this.workers[workerIndex];
|
|
422
467
|
if (!slot)
|
|
423
468
|
return;
|
|
424
|
-
slot.busy
|
|
469
|
+
if (slot.busy) {
|
|
470
|
+
slot.busy = false;
|
|
471
|
+
this.busyCount -= 1;
|
|
472
|
+
}
|
|
425
473
|
slot.currentTaskId = null;
|
|
426
474
|
}
|
|
427
475
|
failTask(id, error) {
|
|
@@ -441,43 +489,31 @@ class WorkerPool {
|
|
|
441
489
|
}
|
|
442
490
|
}
|
|
443
491
|
drainQueue() {
|
|
444
|
-
if (this.closed || this.
|
|
492
|
+
if (this.closed || this.queue.depth === 0)
|
|
445
493
|
return;
|
|
446
494
|
this.maybeScaleUp();
|
|
447
495
|
for (let i = 0; i < this.workers.length; i += 1) {
|
|
448
496
|
const slot = this.workers[i];
|
|
449
497
|
if (slot && !slot.busy) {
|
|
450
498
|
this.dispatchFromQueue(i, slot);
|
|
451
|
-
if (this.
|
|
499
|
+
if (this.queue.depth === 0)
|
|
452
500
|
return;
|
|
453
501
|
}
|
|
454
502
|
}
|
|
455
|
-
if (this.workers.length < this.capacity && this.
|
|
503
|
+
if (this.workers.length < this.capacity && this.queue.depth > 0) {
|
|
456
504
|
const workerIndex = this.workers.length;
|
|
457
505
|
const slot = this.spawnWorker(workerIndex);
|
|
458
506
|
this.workers.push(slot);
|
|
459
507
|
this.dispatchFromQueue(workerIndex, slot);
|
|
460
|
-
if (this.workers.length < this.capacity && this.
|
|
508
|
+
if (this.workers.length < this.capacity && this.queue.depth > 0) {
|
|
461
509
|
setImmediate(() => {
|
|
462
510
|
this.drainQueue();
|
|
463
511
|
});
|
|
464
512
|
}
|
|
465
513
|
}
|
|
466
514
|
}
|
|
467
|
-
takeNextQueuedTask() {
|
|
468
|
-
while (this.queueHead < this.queue.length) {
|
|
469
|
-
const task = this.queue[this.queueHead];
|
|
470
|
-
this.queueHead += 1;
|
|
471
|
-
if (task) {
|
|
472
|
-
this.maybeCompactQueue();
|
|
473
|
-
return task;
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
this.maybeCompactQueue();
|
|
477
|
-
return null;
|
|
478
|
-
}
|
|
479
515
|
dispatchFromQueue(workerIndex, slot) {
|
|
480
|
-
const task = this.
|
|
516
|
+
const task = this.queue.dequeue();
|
|
481
517
|
if (!task)
|
|
482
518
|
return;
|
|
483
519
|
if (this.closed) {
|
|
@@ -496,6 +532,11 @@ class WorkerPool {
|
|
|
496
532
|
}
|
|
497
533
|
slot.busy = true;
|
|
498
534
|
slot.currentTaskId = task.id;
|
|
535
|
+
this.busyCount += 1;
|
|
536
|
+
const timeout = this.registerInflight(task, workerIndex, slot);
|
|
537
|
+
this.sendToWorker(task, slot, workerIndex, timeout);
|
|
538
|
+
}
|
|
539
|
+
registerInflight(task, workerIndex, slot) {
|
|
499
540
|
const timeout = createUnrefTimeout(this.timeoutMs, null);
|
|
500
541
|
void timeout.promise
|
|
501
542
|
.then(() => {
|
|
@@ -529,6 +570,9 @@ class WorkerPool {
|
|
|
529
570
|
context: task.context,
|
|
530
571
|
cancelPending: false,
|
|
531
572
|
});
|
|
573
|
+
return timeout;
|
|
574
|
+
}
|
|
575
|
+
sendToWorker(task, slot, workerIndex, timeout) {
|
|
532
576
|
try {
|
|
533
577
|
const { message, transferList } = buildWorkerDispatchPayload(task);
|
|
534
578
|
slot.worker.postMessage(message, transferList);
|
|
@@ -554,23 +598,6 @@ class WorkerPool {
|
|
|
554
598
|
context.dispose();
|
|
555
599
|
}
|
|
556
600
|
}
|
|
557
|
-
findQueuedIndex(id) {
|
|
558
|
-
for (let i = this.queueHead; i < this.queue.length; i += 1) {
|
|
559
|
-
const task = this.queue[i];
|
|
560
|
-
if (task?.id === id)
|
|
561
|
-
return i;
|
|
562
|
-
}
|
|
563
|
-
return null;
|
|
564
|
-
}
|
|
565
|
-
maybeCompactQueue() {
|
|
566
|
-
if (this.queueHead === 0)
|
|
567
|
-
return;
|
|
568
|
-
if (this.queueHead >= this.queue.length ||
|
|
569
|
-
(this.queueHead > 1024 && this.queueHead > this.queue.length / 2)) {
|
|
570
|
-
this.queue.splice(0, this.queueHead);
|
|
571
|
-
this.queueHead = 0;
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
601
|
}
|
|
575
602
|
// Pool singleton management
|
|
576
603
|
let workerPool = null;
|
|
@@ -595,23 +622,26 @@ export async function shutdownWorkerPool() {
|
|
|
595
622
|
workerPool = null;
|
|
596
623
|
}
|
|
597
624
|
// Worker thread message handling
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
625
|
+
function bootstrapWorkerThread() {
|
|
626
|
+
if (!isMainThread && parentPort) {
|
|
627
|
+
const port = parentPort;
|
|
628
|
+
const onMessage = createTransformMessageHandler({
|
|
629
|
+
sendMessage: (message) => {
|
|
630
|
+
port.postMessage(message);
|
|
631
|
+
},
|
|
632
|
+
runTransform: transformHtmlToMarkdownInProcess,
|
|
633
|
+
});
|
|
634
|
+
port.on('message', onMessage);
|
|
635
|
+
}
|
|
636
|
+
else if (process.send) {
|
|
637
|
+
const send = process.send.bind(process);
|
|
638
|
+
const onMessage = createTransformMessageHandler({
|
|
639
|
+
sendMessage: (message) => {
|
|
640
|
+
send(message);
|
|
641
|
+
},
|
|
642
|
+
runTransform: transformHtmlToMarkdownInProcess,
|
|
643
|
+
});
|
|
644
|
+
process.on('message', onMessage);
|
|
645
|
+
}
|
|
617
646
|
}
|
|
647
|
+
bootstrapWorkerThread();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.9.
|
|
3
|
+
"version": "1.9.2",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
|
|
6
6
|
"type": "module",
|
|
@@ -74,6 +74,7 @@
|
|
|
74
74
|
"@mozilla/readability": "^0.6.0",
|
|
75
75
|
"linkedom": "^0.18.12",
|
|
76
76
|
"node-html-markdown": "^2.0.0",
|
|
77
|
+
"ts-morph": "^27.0.2",
|
|
77
78
|
"undici": "^7.24.1",
|
|
78
79
|
"zod": "^4.3.6"
|
|
79
80
|
},
|