@j0hanz/superfetch 2.4.4 → 2.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tasks.js CHANGED
@@ -1,13 +1,26 @@
1
1
  import { randomUUID } from 'node:crypto';
2
+ import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
2
3
  const DEFAULT_TTL_MS = 60000;
3
4
  const DEFAULT_POLL_INTERVAL_MS = 1000;
5
+ const DEFAULT_OWNER_KEY = 'default';
6
+ const DEFAULT_PAGE_SIZE = 50;
7
+ const TERMINAL_STATUSES = new Set([
8
+ 'completed',
9
+ 'failed',
10
+ 'cancelled',
11
+ ]);
12
+ function isTerminalStatus(status) {
13
+ return TERMINAL_STATUSES.has(status);
14
+ }
4
15
  export class TaskManager {
5
16
  tasks = new Map();
6
- createTask(options, statusMessage = 'Task started') {
17
+ waiters = new Map();
18
+ createTask(options, statusMessage = 'Task started', ownerKey = DEFAULT_OWNER_KEY) {
7
19
  const taskId = randomUUID();
8
20
  const now = new Date().toISOString();
9
21
  const task = {
10
22
  taskId,
23
+ ownerKey,
11
24
  status: 'working',
12
25
  statusMessage,
13
26
  createdAt: now,
@@ -18,26 +31,40 @@ export class TaskManager {
18
31
  this.tasks.set(taskId, task);
19
32
  return task;
20
33
  }
21
- getTask(taskId) {
22
- return this.tasks.get(taskId);
34
+ getTask(taskId, ownerKey) {
35
+ const task = this.tasks.get(taskId);
36
+ if (!task)
37
+ return undefined;
38
+ if (ownerKey && task.ownerKey !== ownerKey)
39
+ return undefined;
40
+ if (this.isExpired(task)) {
41
+ this.tasks.delete(taskId);
42
+ return undefined;
43
+ }
44
+ return task;
23
45
  }
24
46
  updateTask(taskId, updates) {
25
47
  const task = this.tasks.get(taskId);
26
48
  if (!task)
27
49
  return;
50
+ if (updates.status && task.status !== updates.status) {
51
+ if (isTerminalStatus(task.status))
52
+ return;
53
+ }
28
54
  Object.assign(task, {
29
55
  ...updates,
30
56
  lastUpdatedAt: new Date().toISOString(),
31
57
  });
58
+ this.notifyWaiters(task);
32
59
  }
33
- cancelTask(taskId) {
34
- const task = this.tasks.get(taskId);
60
+ cancelTask(taskId, ownerKey) {
61
+ const task = this.getTask(taskId, ownerKey);
35
62
  if (!task)
36
63
  return undefined;
37
64
  if (task.status === 'completed' ||
38
65
  task.status === 'failed' ||
39
66
  task.status === 'cancelled') {
40
- throw new Error(`Cannot cancel task: already in terminal status '${task.status}'`);
67
+ throw new McpError(ErrorCode.InvalidParams, `Cannot cancel task: already in terminal status '${task.status}'`);
41
68
  }
42
69
  this.updateTask(taskId, {
43
70
  status: 'cancelled',
@@ -45,8 +72,26 @@ export class TaskManager {
45
72
  });
46
73
  return this.tasks.get(taskId);
47
74
  }
48
- listTasks() {
49
- return Array.from(this.tasks.values());
75
+ listTasks(options) {
76
+ const { ownerKey, cursor, limit } = options;
77
+ const pageSize = limit && limit > 0 ? limit : DEFAULT_PAGE_SIZE;
78
+ const startIndex = cursor ? this.decodeCursor(cursor) : 0;
79
+ if (startIndex === null) {
80
+ throw new McpError(ErrorCode.InvalidParams, 'Invalid cursor');
81
+ }
82
+ const allTasks = Array.from(this.tasks.values()).filter((task) => {
83
+ if (task.ownerKey !== ownerKey)
84
+ return false;
85
+ if (this.isExpired(task)) {
86
+ this.tasks.delete(task.taskId);
87
+ return false;
88
+ }
89
+ return true;
90
+ });
91
+ const page = allTasks.slice(startIndex, startIndex + pageSize);
92
+ const nextIndex = startIndex + page.length;
93
+ const nextCursor = nextIndex < allTasks.length ? this.encodeCursor(nextIndex) : undefined;
94
+ return nextCursor ? { tasks: page, nextCursor } : { tasks: page };
50
95
  }
51
96
  // Helper to check if task is expired and could be cleaned up
52
97
  // In a real implementation, this would be called by a periodic job
@@ -62,5 +107,77 @@ export class TaskManager {
62
107
  }
63
108
  return count;
64
109
  }
110
+ async waitForTerminalTask(taskId, ownerKey, signal) {
111
+ const task = this.getTask(taskId, ownerKey);
112
+ if (!task)
113
+ return undefined;
114
+ if (isTerminalStatus(task.status))
115
+ return task;
116
+ return new Promise((resolve, reject) => {
117
+ const onAbort = () => {
118
+ cleanup();
119
+ removeWaiter();
120
+ reject(new McpError(ErrorCode.ConnectionClosed, 'Request was cancelled'));
121
+ };
122
+ const cleanup = () => {
123
+ if (signal) {
124
+ signal.removeEventListener('abort', onAbort);
125
+ }
126
+ };
127
+ const removeWaiter = () => {
128
+ const waiters = this.waiters.get(taskId);
129
+ if (!waiters)
130
+ return;
131
+ waiters.delete(waiter);
132
+ if (waiters.size === 0)
133
+ this.waiters.delete(taskId);
134
+ };
135
+ const waiter = (updated) => {
136
+ cleanup();
137
+ resolve(updated);
138
+ };
139
+ if (signal?.aborted) {
140
+ onAbort();
141
+ return;
142
+ }
143
+ const waiters = this.waiters.get(taskId) ?? new Set();
144
+ waiters.add(waiter);
145
+ this.waiters.set(taskId, waiters);
146
+ if (signal) {
147
+ signal.addEventListener('abort', onAbort, { once: true });
148
+ }
149
+ });
150
+ }
151
+ notifyWaiters(task) {
152
+ if (!isTerminalStatus(task.status))
153
+ return;
154
+ const waiters = this.waiters.get(task.taskId);
155
+ if (!waiters)
156
+ return;
157
+ this.waiters.delete(task.taskId);
158
+ for (const waiter of waiters)
159
+ waiter(task);
160
+ }
161
+ isExpired(task) {
162
+ const createdAt = Date.parse(task.createdAt);
163
+ if (!Number.isFinite(createdAt))
164
+ return false;
165
+ return Date.now() - createdAt > task.ttl;
166
+ }
167
+ encodeCursor(index) {
168
+ return Buffer.from(String(index)).toString('base64');
169
+ }
170
+ decodeCursor(cursor) {
171
+ try {
172
+ const decoded = Buffer.from(cursor, 'base64').toString('utf8');
173
+ const value = Number.parseInt(decoded, 10);
174
+ if (!Number.isFinite(value) || value < 0)
175
+ return null;
176
+ return value;
177
+ }
178
+ catch {
179
+ return null;
180
+ }
181
+ }
65
182
  }
66
183
  export const taskManager = new TaskManager();
package/dist/tools.d.ts CHANGED
@@ -1,6 +1,5 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
2
  import type { CallToolResult, ContentBlock } from '@modelcontextprotocol/sdk/types.js';
3
- import * as cache from './cache.js';
4
3
  import type { MarkdownTransformResult } from './transform-types.js';
5
4
  export interface FetchUrlInput {
6
5
  url: string;
@@ -73,7 +72,7 @@ export interface ToolHandlerExtra {
73
72
  sendNotification?: (notification: ProgressNotification) => Promise<void>;
74
73
  }
75
74
  export declare const FETCH_URL_TOOL_NAME = "fetch-url";
76
- export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
75
+ export declare const FETCH_URL_TOOL_DESCRIPTION: string;
77
76
  interface ProgressReporter {
78
77
  report: (progress: number, message: string) => Promise<void>;
79
78
  }
@@ -119,5 +118,5 @@ type MarkdownPipelineResult = MarkdownTransformResult & {
119
118
  export declare function parseCachedMarkdownResult(cached: string): MarkdownPipelineResult | undefined;
120
119
  export declare function fetchUrlToolHandler(input: FetchUrlInput, extra?: ToolHandlerExtra): Promise<ToolResponseBase>;
121
120
  export declare function withRequestContextIfMissing<TParams, TResult, TExtra = unknown>(handler: (params: TParams, extra?: TExtra) => Promise<TResult>): (params: TParams, extra?: TExtra) => Promise<TResult>;
122
- export declare function registerTools(server: McpServer, serverIcons?: cache.McpIcon[]): void;
121
+ export declare function registerTools(server: McpServer): void;
123
122
  export {};
package/dist/tools.js CHANGED
@@ -15,7 +15,7 @@ const fetchUrlInputSchema = z.strictObject({
15
15
  .url({ protocol: /^https?$/i })
16
16
  .min(1)
17
17
  .max(config.constants.maxUrlLength)
18
- .describe('The URL to fetch'),
18
+ .describe('The URL of the webpage to fetch and convert to Markdown'),
19
19
  });
20
20
  const fetchUrlOutputSchema = z.strictObject({
21
21
  url: z
@@ -46,24 +46,52 @@ const fetchUrlOutputSchema = z.strictObject({
46
46
  .describe('Error message if the request failed'),
47
47
  });
48
48
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
49
- export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
49
+ export const FETCH_URL_TOOL_DESCRIPTION = `
50
+ Fetches a webpage and converts it to clean Markdown format optimized for LLM context.
51
+
52
+ This tool is useful for:
53
+ - Reading documentation, blog posts, or articles.
54
+ - Extracting main content while removing navigation and ads (noise removal).
55
+ - Caching content to speed up repeated queries.
56
+
57
+ Limitations:
58
+ - Returns truncated content if it exceeds ${config.constants.maxInlineContentChars} characters.
59
+ - Does not execute complex client-side JavaScript interactions.
60
+ `.trim();
61
+ // Specific icon for the fetch-url tool (download cloud / web)
62
+ const TOOL_ICON = {
63
+ src: '',
64
+ mimeType: 'image/svg+xml',
65
+ };
50
66
  /* -------------------------------------------------------------------------------------------------
51
67
  * Progress reporting
52
68
  * ------------------------------------------------------------------------------------------------- */
69
+ function resolveRelatedTaskMeta(meta) {
70
+ if (!meta)
71
+ return undefined;
72
+ const related = meta['io.modelcontextprotocol/related-task'];
73
+ if (!isObject(related))
74
+ return undefined;
75
+ const { taskId } = related;
76
+ return typeof taskId === 'string' ? { taskId } : undefined;
77
+ }
53
78
  class ToolProgressReporter {
54
79
  token;
55
80
  sendNotification;
56
- constructor(token, sendNotification) {
81
+ relatedTaskMeta;
82
+ constructor(token, sendNotification, relatedTaskMeta) {
57
83
  this.token = token;
58
84
  this.sendNotification = sendNotification;
85
+ this.relatedTaskMeta = relatedTaskMeta;
59
86
  }
60
87
  static create(extra) {
61
88
  const token = extra?._meta?.progressToken ?? null;
62
89
  const sendNotification = extra?.sendNotification;
90
+ const relatedTaskMeta = resolveRelatedTaskMeta(extra?._meta);
63
91
  if (token === null || !sendNotification) {
64
92
  return { report: async () => { } };
65
93
  }
66
- return new ToolProgressReporter(token, sendNotification);
94
+ return new ToolProgressReporter(token, sendNotification, relatedTaskMeta);
67
95
  }
68
96
  async report(progress, message) {
69
97
  try {
@@ -75,6 +103,13 @@ class ToolProgressReporter {
75
103
  progress,
76
104
  total: FETCH_PROGRESS_TOTAL,
77
105
  message,
106
+ ...(this.relatedTaskMeta
107
+ ? {
108
+ _meta: {
109
+ 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
110
+ },
111
+ }
112
+ : {}),
78
113
  },
79
114
  }),
80
115
  new Promise((_, reject) => {
@@ -461,7 +496,6 @@ async function executeFetch(input, extra) {
461
496
  await progress.report(1, 'Validating URL');
462
497
  logDebug('Fetching URL', { url });
463
498
  await progress.report(2, 'Fetching content');
464
- await progress.report(2, 'Fetching content'); // preserve existing behavior
465
499
  const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress);
466
500
  if (pipeline.fromCache) {
467
501
  await progress.report(3, 'Using cached content');
@@ -485,6 +519,9 @@ const TOOL_DEFINITION = {
485
519
  inputSchema: fetchUrlInputSchema,
486
520
  outputSchema: fetchUrlOutputSchema,
487
521
  handler: fetchUrlToolHandler,
522
+ execution: {
523
+ taskSupport: 'optional',
524
+ },
488
525
  annotations: {
489
526
  readOnlyHint: true,
490
527
  destructiveHint: false,
@@ -499,7 +536,12 @@ export function withRequestContextIfMissing(handler) {
499
536
  return handler(params, extra);
500
537
  }
501
538
  const derivedRequestId = resolveRequestIdFromExtra(extra) ?? randomUUID();
502
- return runWithRequestContext({ requestId: derivedRequestId, operationId: derivedRequestId }, () => handler(params, extra));
539
+ const derivedSessionId = resolveSessionIdFromExtra(extra);
540
+ return runWithRequestContext({
541
+ requestId: derivedRequestId,
542
+ operationId: derivedRequestId,
543
+ ...(derivedSessionId ? { sessionId: derivedSessionId } : {}),
544
+ }, () => handler(params, extra));
503
545
  };
504
546
  }
505
547
  function resolveRequestIdFromExtra(extra) {
@@ -512,13 +554,32 @@ function resolveRequestIdFromExtra(extra) {
512
554
  return String(requestId);
513
555
  return undefined;
514
556
  }
515
- export function registerTools(server, serverIcons) {
516
- server.registerTool(TOOL_DEFINITION.name, {
517
- title: TOOL_DEFINITION.title,
518
- description: TOOL_DEFINITION.description,
519
- inputSchema: TOOL_DEFINITION.inputSchema,
520
- outputSchema: TOOL_DEFINITION.outputSchema,
521
- annotations: TOOL_DEFINITION.annotations,
522
- ...(serverIcons ? { icons: serverIcons } : {}),
523
- }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
557
+ function resolveSessionIdFromExtra(extra) {
558
+ if (!isObject(extra))
559
+ return undefined;
560
+ const { sessionId } = extra;
561
+ if (typeof sessionId === 'string')
562
+ return sessionId;
563
+ const { requestInfo } = extra;
564
+ if (!isObject(requestInfo))
565
+ return undefined;
566
+ const { headers } = requestInfo;
567
+ if (!isObject(headers))
568
+ return undefined;
569
+ const headerValue = headers['mcp-session-id'];
570
+ return typeof headerValue === 'string' ? headerValue : undefined;
571
+ }
572
+ export function registerTools(server) {
573
+ if (config.tools.enabled.includes(FETCH_URL_TOOL_NAME)) {
574
+ server.registerTool(TOOL_DEFINITION.name, {
575
+ title: TOOL_DEFINITION.title,
576
+ description: TOOL_DEFINITION.description,
577
+ inputSchema: TOOL_DEFINITION.inputSchema,
578
+ outputSchema: TOOL_DEFINITION.outputSchema,
579
+ annotations: TOOL_DEFINITION.annotations,
580
+ execution: TOOL_DEFINITION.execution,
581
+ // Use specific tool icon here
582
+ icons: [TOOL_ICON],
583
+ }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
584
+ }
524
585
  }
package/dist/transform.js CHANGED
@@ -876,8 +876,8 @@ function buildContentSource(params) {
876
876
  return { sourceHtml: article.content, title: article.title, metadata };
877
877
  }
878
878
  if (document) {
879
- const cleanedHtml = removeNoiseFromHtml(html, undefined, url);
880
- const { document: cleanedDoc } = parseHTML(cleanedHtml);
879
+ removeNoiseFromHtml(html, document, url);
880
+ const cleanedDoc = document;
881
881
  const contentRoot = findContentRoot(cleanedDoc);
882
882
  if (contentRoot) {
883
883
  logDebug('Using content root fallback instead of full HTML', {
@@ -993,6 +993,7 @@ class WorkerPool {
993
993
  minCapacity = POOL_MIN_WORKERS;
994
994
  maxCapacity = POOL_MAX_WORKERS;
995
995
  queue = [];
996
+ queueHead = 0;
996
997
  inflight = new Map();
997
998
  timeoutMs;
998
999
  queueMax;
@@ -1006,7 +1007,7 @@ class WorkerPool {
1006
1007
  this.ensureOpen();
1007
1008
  if (options.signal?.aborted)
1008
1009
  throw abortPolicy.createAbortError(url, 'transform:enqueue');
1009
- if (this.queue.length >= this.queueMax) {
1010
+ if (this.getQueueDepth() >= this.queueMax) {
1010
1011
  throw new FetchError('Transform worker queue is full', url, 503, {
1011
1012
  reason: 'queue_full',
1012
1013
  stage: 'transform:enqueue',
@@ -1019,7 +1020,8 @@ class WorkerPool {
1019
1020
  });
1020
1021
  }
1021
1022
  getQueueDepth() {
1022
- return this.queue.length;
1023
+ const depth = this.queue.length - this.queueHead;
1024
+ return depth > 0 ? depth : 0;
1023
1025
  }
1024
1026
  getActiveWorkers() {
1025
1027
  return this.workers.filter((s) => s?.busy).length;
@@ -1042,9 +1044,13 @@ class WorkerPool {
1042
1044
  inflight.reject(new Error('Transform worker pool closed'));
1043
1045
  this.inflight.delete(id);
1044
1046
  }
1045
- for (const task of this.queue)
1046
- task.reject(new Error('Transform worker pool closed'));
1047
+ for (let i = this.queueHead; i < this.queue.length; i += 1) {
1048
+ const task = this.queue[i];
1049
+ if (task)
1050
+ task.reject(new Error('Transform worker pool closed'));
1051
+ }
1047
1052
  this.queue.length = 0;
1053
+ this.queueHead = 0;
1048
1054
  await Promise.allSettled(terminations);
1049
1055
  }
1050
1056
  ensureOpen() {
@@ -1081,10 +1087,11 @@ class WorkerPool {
1081
1087
  this.abortInflight(id, url, inflight.workerIndex);
1082
1088
  return;
1083
1089
  }
1084
- const queuedIndex = this.queue.findIndex((t) => t.id === id);
1085
- if (queuedIndex !== -1) {
1090
+ const queuedIndex = this.findQueuedIndex(id);
1091
+ if (queuedIndex !== null) {
1086
1092
  this.queue.splice(queuedIndex, 1);
1087
1093
  reject(abortPolicy.createAbortError(url, 'transform:queued-abort'));
1094
+ this.maybeCompactQueue();
1088
1095
  }
1089
1096
  }
1090
1097
  abortInflight(id, url, workerIndex) {
@@ -1196,29 +1203,29 @@ class WorkerPool {
1196
1203
  this.markIdle(inflight.workerIndex);
1197
1204
  }
1198
1205
  maybeScaleUp() {
1199
- if (this.queue.length > this.capacity * POOL_SCALE_THRESHOLD &&
1206
+ if (this.getQueueDepth() > this.capacity * POOL_SCALE_THRESHOLD &&
1200
1207
  this.capacity < this.maxCapacity) {
1201
1208
  this.capacity += 1;
1202
1209
  }
1203
1210
  }
1204
1211
  drainQueue() {
1205
- if (this.closed || this.queue.length === 0)
1212
+ if (this.closed || this.getQueueDepth() === 0)
1206
1213
  return;
1207
1214
  this.maybeScaleUp();
1208
1215
  for (let i = 0; i < this.workers.length; i += 1) {
1209
1216
  const slot = this.workers[i];
1210
1217
  if (slot && !slot.busy) {
1211
1218
  this.dispatchFromQueue(i, slot);
1212
- if (this.queue.length === 0)
1219
+ if (this.getQueueDepth() === 0)
1213
1220
  return;
1214
1221
  }
1215
1222
  }
1216
- if (this.workers.length < this.capacity && this.queue.length > 0) {
1223
+ if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
1217
1224
  const workerIndex = this.workers.length;
1218
1225
  const slot = this.spawnWorker(workerIndex);
1219
1226
  this.workers.push(slot);
1220
1227
  this.dispatchFromQueue(workerIndex, slot);
1221
- if (this.workers.length < this.capacity && this.queue.length > 0) {
1228
+ if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
1222
1229
  setImmediate(() => {
1223
1230
  this.drainQueue();
1224
1231
  });
@@ -1226,9 +1233,11 @@ class WorkerPool {
1226
1233
  }
1227
1234
  }
1228
1235
  dispatchFromQueue(workerIndex, slot) {
1229
- const task = this.queue.shift();
1236
+ const task = this.queue[this.queueHead];
1230
1237
  if (!task)
1231
1238
  return;
1239
+ this.queueHead += 1;
1240
+ this.maybeCompactQueue();
1232
1241
  if (this.closed) {
1233
1242
  task.reject(new Error('Transform worker pool closed'));
1234
1243
  return;
@@ -1285,6 +1294,23 @@ class WorkerPool {
1285
1294
  this.restartWorker(workerIndex, slot);
1286
1295
  }
1287
1296
  }
1297
+ findQueuedIndex(id) {
1298
+ for (let i = this.queueHead; i < this.queue.length; i += 1) {
1299
+ const task = this.queue[i];
1300
+ if (task?.id === id)
1301
+ return i;
1302
+ }
1303
+ return null;
1304
+ }
1305
+ maybeCompactQueue() {
1306
+ if (this.queueHead === 0)
1307
+ return;
1308
+ if (this.queueHead >= this.queue.length ||
1309
+ (this.queueHead > 1024 && this.queueHead > this.queue.length / 2)) {
1310
+ this.queue.splice(0, this.queueHead);
1311
+ this.queueHead = 0;
1312
+ }
1313
+ }
1288
1314
  }
1289
1315
  class TransformWorkerPoolManager {
1290
1316
  pool = null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "2.4.4",
3
+ "version": "2.4.6",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",