@j0hanz/superfetch 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cache.js CHANGED
@@ -452,7 +452,7 @@ function registerCacheContentResource(server) {
452
452
  }), {
453
453
  title: 'Cached Content',
454
454
  description: 'Access previously fetched web content from cache. Namespace: markdown. UrlHash: SHA-256 hash of the URL.',
455
- mimeType: 'text/plain',
455
+ mimeType: 'text/markdown',
456
456
  }, (uri, params) => {
457
457
  const { namespace, urlHash } = resolveCacheParams(params);
458
458
  const cacheKey = `${namespace}:${urlHash}`;
package/dist/http.d.ts CHANGED
@@ -47,6 +47,7 @@ interface McpSessionOptions {
47
47
  readonly sessionStore: SessionStore;
48
48
  readonly maxSessions: number;
49
49
  }
50
+ type JsonRpcId = string | number | null;
50
51
  export declare function createSessionStore(sessionTtlMs: number): SessionStore;
51
52
  export declare function reserveSessionSlot(store: SessionStore, maxSessions: number): boolean;
52
53
  interface SlotTracker {
@@ -55,11 +56,12 @@ interface SlotTracker {
55
56
  readonly isInitialized: () => boolean;
56
57
  }
57
58
  export declare function createSlotTracker(): SlotTracker;
58
- export declare function ensureSessionCapacity({ store, maxSessions, res, evictOldest, }: {
59
+ export declare function ensureSessionCapacity({ store, maxSessions, res, evictOldest, requestId, }: {
59
60
  store: SessionStore;
60
61
  maxSessions: number;
61
62
  res: Response;
62
63
  evictOldest: (store: SessionStore) => boolean;
64
+ requestId?: JsonRpcId;
63
65
  }): boolean;
64
66
  type CloseHandler = (() => void) | undefined;
65
67
  export declare function composeCloseHandlers(first: CloseHandler, second: CloseHandler): CloseHandler;
package/dist/http.js CHANGED
@@ -864,6 +864,13 @@ function sendJsonRpcError(res, code, message, status = 400, id = null) {
864
864
  id,
865
865
  });
866
866
  }
867
+ function sendJsonRpcErrorOrNoContent(res, code, message, status, id) {
868
+ if (id === null) {
869
+ res.sendStatus(204);
870
+ return;
871
+ }
872
+ sendJsonRpcError(res, code, message, status, id ?? null);
873
+ }
867
874
  function getSessionId(req) {
868
875
  const header = req.headers['mcp-session-id'];
869
876
  return Array.isArray(header) ? header[0] : header;
@@ -965,21 +972,21 @@ function tryEvictSlot(store, maxSessions, evictOldest) {
965
972
  currentSize - 1 + inFlightSessions < maxSessions;
966
973
  return canFreeSlot && evictOldest(store);
967
974
  }
968
- export function ensureSessionCapacity({ store, maxSessions, res, evictOldest, }) {
975
+ export function ensureSessionCapacity({ store, maxSessions, res, evictOldest, requestId, }) {
969
976
  if (!isServerAtCapacity(store, maxSessions)) {
970
977
  return true;
971
978
  }
972
979
  if (tryEvictSlot(store, maxSessions, evictOldest)) {
973
980
  return !isServerAtCapacity(store, maxSessions);
974
981
  }
975
- respondServerBusy(res);
982
+ respondServerBusy(res, requestId);
976
983
  return false;
977
984
  }
978
- function respondServerBusy(res) {
979
- sendJsonRpcError(res, -32000, 'Server busy: maximum sessions reached', 503, null);
985
+ function respondServerBusy(res, requestId) {
986
+ sendJsonRpcErrorOrNoContent(res, -32000, 'Server busy: maximum sessions reached', 503, requestId);
980
987
  }
981
988
  function respondBadRequest(res, id) {
982
- sendJsonRpcError(res, -32000, 'Bad Request: Missing session ID or not an initialize request', 400, id);
989
+ sendJsonRpcErrorOrNoContent(res, -32000, 'Bad Request: Missing session ID or not an initialize request', 400, id);
983
990
  }
984
991
  function createTimeoutController() {
985
992
  let initTimeout = null;
@@ -1132,13 +1139,15 @@ function evictOldestSessionWithClose(store) {
1132
1139
  });
1133
1140
  return true;
1134
1141
  }
1135
- function reserveSessionIfPossible({ options, res, }) {
1136
- if (!ensureSessionCapacity({
1142
+ function reserveSessionIfPossible({ options, res, requestId, }) {
1143
+ const capacityArgs = {
1137
1144
  store: options.sessionStore,
1138
1145
  maxSessions: options.maxSessions,
1139
1146
  res,
1140
1147
  evictOldest: evictOldestSessionWithClose,
1141
- })) {
1148
+ ...(requestId !== undefined ? { requestId } : {}),
1149
+ };
1150
+ if (!ensureSessionCapacity(capacityArgs)) {
1142
1151
  return false;
1143
1152
  }
1144
1153
  if (!reserveSessionSlot(options.sessionStore, options.maxSessions)) {
@@ -1154,7 +1163,7 @@ function resolveExistingSessionTransport(store, sessionId, res, requestId) {
1154
1163
  return existingSession.transport;
1155
1164
  }
1156
1165
  // Client supplied a session id but it doesn't exist; Streamable HTTP: invalid session IDs => 404.
1157
- sendJsonRpcError(res, -32600, 'Session not found', 404, requestId);
1166
+ sendJsonRpcErrorOrNoContent(res, -32600, 'Session not found', 404, requestId);
1158
1167
  return null;
1159
1168
  }
1160
1169
  function createSessionContext() {
@@ -1163,12 +1172,12 @@ function createSessionContext() {
1163
1172
  const transport = createSessionTransport({ tracker, timeoutController });
1164
1173
  return { tracker, timeoutController, transport };
1165
1174
  }
1166
- function finalizeSessionIfValid({ store, transport, tracker, clearInitTimeout, res, }) {
1175
+ function finalizeSessionIfValid({ store, transport, tracker, clearInitTimeout, res, requestId, }) {
1167
1176
  const { sessionId } = transport;
1168
1177
  if (typeof sessionId !== 'string') {
1169
1178
  clearInitTimeout();
1170
1179
  tracker.releaseSlot();
1171
- respondBadRequest(res, null);
1180
+ respondBadRequest(res, requestId ?? null);
1172
1181
  return false;
1173
1182
  }
1174
1183
  finalizeSession({
@@ -1197,8 +1206,13 @@ function finalizeSession({ store, transport, sessionId, tracker, clearInitTimeou
1197
1206
  });
1198
1207
  logInfo('Session initialized');
1199
1208
  }
1200
- async function createAndConnectTransport({ options, res, }) {
1201
- if (!reserveSessionIfPossible({ options, res }))
1209
+ async function createAndConnectTransport({ options, res, requestId, }) {
1210
+ const reserveArgs = {
1211
+ options,
1212
+ res,
1213
+ ...(requestId !== undefined ? { requestId } : {}),
1214
+ };
1215
+ if (!reserveSessionIfPossible(reserveArgs))
1202
1216
  return null;
1203
1217
  const { tracker, timeoutController, transport } = createSessionContext();
1204
1218
  await connectTransportOrThrow({
@@ -1212,6 +1226,7 @@ async function createAndConnectTransport({ options, res, }) {
1212
1226
  tracker,
1213
1227
  clearInitTimeout: timeoutController.clear,
1214
1228
  res,
1229
+ ...(requestId !== undefined ? { requestId } : {}),
1215
1230
  })) {
1216
1231
  return null;
1217
1232
  }
@@ -1227,7 +1242,7 @@ export async function resolveTransportForPost({ res, body, sessionId, options, }
1227
1242
  return null;
1228
1243
  }
1229
1244
  evictExpiredSessionsWithClose(options.sessionStore);
1230
- return createAndConnectTransport({ options, res });
1245
+ return createAndConnectTransport({ options, res, requestId });
1231
1246
  }
1232
1247
  function startSessionCleanupLoop(store, sessionTtlMs) {
1233
1248
  const controller = new AbortController();
@@ -1,66 +1,39 @@
1
- # superFetch MCP Server — AI Usage Instructions
1
+ # superFetch Instructions
2
2
 
3
- Use this server to fetch single public http(s) URLs, extract readable content, and return clean Markdown suitable for summarization, RAG ingestion, and citation. Prefer these tools over "remembering" state in chat.
3
+ > **Guidance for the Agent:** These instructions are available as a resource (`internal://instructions`). Load them when you are confused about tool usage.
4
4
 
5
- ## Operating Rules
5
+ ## 1. Core Capability
6
6
 
7
- - Only fetch sources that are necessary and likely authoritative.
8
- - Cite using `resolvedUrl` (when present) and keep `fetchedAt`/metadata intact.
9
- - If content is missing/truncated, check for a `resource_link` in the output and read the cache resource.
10
- - If request is vague, ask clarifying questions.
7
+ - **Domain:** Fetch public http(s) URLs, extract readable content, and return clean Markdown.
8
+ - **Primary Resources:** `fetch-url` output (`markdown`, `title`, `url`) and cache resources (`superfetch://cache/markdown/{urlHash}`).
11
9
 
12
- ### Strategies
10
+ ## 2. The "Golden Path" Workflows (Critical)
13
11
 
14
- - **Discovery:** Use `fetch-url` to retrieve content. Review the output for `resource_link` if the page is large.
15
- - **Action:** Read the Markdown content directly from the tool output or the referenced resource.
12
+ ### Workflow A: Fetch and Read
16
13
 
17
- ## Data Model
14
+ 1. Call `fetch-url` with a public http(s) URL.
15
+ 2. Read `structuredContent.markdown` and `structuredContent.title`.
16
+ 3. Cite using `resolvedUrl` or `url` from the response.
18
17
 
19
- - **Markdown Content:** `markdown` content, `title`, and `url` metadata.
20
- - **Resources:** Cached content accessible via `superfetch://cache/{namespace}/{hash}`.
18
+ ### Workflow B: Large Content / Cache Resource
21
19
 
22
- ## Workflows
20
+ 1. If the response includes a `resource_link`, read that resource URI.
21
+ 2. If content is missing, list resources and select the matching `superfetch://cache/markdown/{urlHash}` entry.
22
+ > **Constraint:** Never guess resource URIs. Use the returned `resource_link` or list resources first.
23
23
 
24
- ### 1) Fetch and Read
24
+ ## 3. Tool Nuances & "Gotchas"
25
25
 
26
- ```text
27
- fetch-url(url) Get markdown content
28
- If content truncated read resource(superfetch://cache/...)
29
- ```
26
+ - **`fetch-url`**:
27
+ - **Latency:** Network-bound; expect slower responses for large pages.
28
+ - **Side Effects:** Calls external websites (open-world).
29
+ - **Input:** `url` must be public http/https. Private/internal addresses are blocked.
30
+ - **Output:** Large content may return a `resource_link` instead of full inline markdown.
31
+ - **Cache resources (`superfetch://cache/markdown/{urlHash}`)**:
32
+ - **Namespace:** Only `markdown` is valid.
33
+ - **Discovery:** Use resource listing or the `resource_link` returned by `fetch-url`.
30
34
 
31
- ## Tools
35
+ ## 4. Error Handling Strategy
32
36
 
33
- ### fetch-url
34
-
35
- Fetches a webpage and converts it to clean Markdown format (HTML Readability → Markdown).
36
-
37
- - **Use when:** You need the text content of a specific public URL.
38
- - **Args:**
39
- - `url` (string, required): The URL to fetch (must be http/https).
40
- - **Returns:**
41
- - `structuredContent` with `markdown`, `title`, `url`.
42
- - Content block with standard text.
43
- - Or `resource_link` block if content exceeds inline limits.
44
-
45
- ## Response Shape
46
-
47
- Success: `{ "content": [...], "structuredContent": { "markdown": "...", "title": "...", "url": "..." } }`
48
- Error: `{ "isError": true, "structuredContent": { "error": "...", "url": "..." } }`
49
-
50
- ### Common Errors
51
-
52
- | Code | Meaning | Resolution |
53
- | ------------------ | -------------------- | ------------------------------- |
54
- | `VALIDATION_ERROR` | Invalid input URL | Ensure URL is valid http/https |
55
- | `FETCH_ERROR` | Network/HTTP failure | Verify URL is public/accessible |
56
-
57
- ## Limits
58
-
59
- - **Max Inline Characters:** 20000
60
- - **Max Content Size:** 10MB
61
- - **Fetch Timeout:** 15000ms
62
-
63
- ## Security
64
-
65
- - Server blocks private/internal IP ranges (localhost, 127.x, 192.168.x, metadata services).
66
- - Do not attempt to fetch internal network targets.
37
+ - **`VALIDATION_ERROR`**: URL is invalid or blocked. Confirm it is a public http(s) URL.
38
+ - **`FETCH_ERROR`**: Network/HTTP failure. Retry or verify the site is reachable.
39
+ - **Cache miss (`Content not found`)**: Re-run `fetch-url` or verify the cache entry exists.
package/dist/mcp.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { readFileSync } from 'node:fs';
2
- import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { McpServer, ResourceTemplate, } from '@modelcontextprotocol/sdk/server/mcp.js';
3
3
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
4
  import { registerCachedContentResource } from './cache.js';
5
5
  import { config } from './config.js';
@@ -17,7 +17,6 @@ function createServerCapabilities() {
17
17
  return {
18
18
  tools: { listChanged: false },
19
19
  resources: { listChanged: true, subscribe: true },
20
- logging: {},
21
20
  };
22
21
  }
23
22
  function createServerInstructions(serverVersion) {
@@ -32,6 +31,21 @@ function createServerInstructions(serverVersion) {
32
31
  return `superFetch MCP server |${serverVersion}| A high-performance web content fetching and processing server.`;
33
32
  }
34
33
  }
34
+ function registerInstructionsResource(server) {
35
+ server.registerResource('instructions', new ResourceTemplate('internal://instructions', { list: undefined }), {
36
+ title: 'Server Instructions',
37
+ description: 'Usage guidance for the superFetch MCP server.',
38
+ mimeType: 'text/markdown',
39
+ }, (uri) => ({
40
+ contents: [
41
+ {
42
+ uri: uri.href,
43
+ mimeType: 'text/markdown',
44
+ text: createServerInstructions(config.server.version),
45
+ },
46
+ ],
47
+ }));
48
+ }
35
49
  export function createMcpServer() {
36
50
  const server = new McpServer(createServerInfo(), {
37
51
  capabilities: createServerCapabilities(),
@@ -39,6 +53,7 @@ export function createMcpServer() {
39
53
  });
40
54
  registerTools(server);
41
55
  registerCachedContentResource(server);
56
+ registerInstructionsResource(server);
42
57
  return server;
43
58
  }
44
59
  function attachServerErrorHandler(server) {
package/dist/tools.d.ts CHANGED
@@ -62,9 +62,27 @@ export interface PipelineResult<T> {
62
62
  fetchedAt: string;
63
63
  cacheKey?: string | null;
64
64
  }
65
+ export type ProgressToken = string | number;
66
+ export interface RequestMeta {
67
+ progressToken?: ProgressToken | undefined;
68
+ [key: string]: unknown;
69
+ }
70
+ export interface ProgressNotificationParams {
71
+ progressToken: ProgressToken;
72
+ progress: number;
73
+ total?: number;
74
+ message?: string;
75
+ _meta?: Record<string, unknown>;
76
+ }
77
+ export interface ProgressNotification {
78
+ method: 'notifications/progress';
79
+ params: ProgressNotificationParams;
80
+ }
65
81
  export interface ToolHandlerExtra {
66
82
  signal?: AbortSignal;
67
83
  requestId?: string | number;
84
+ _meta?: RequestMeta;
85
+ sendNotification?: (notification: ProgressNotification) => Promise<void>;
68
86
  }
69
87
  export declare const FETCH_URL_TOOL_NAME = "fetch-url";
70
88
  export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
package/dist/tools.js CHANGED
@@ -2,12 +2,13 @@ import { randomUUID } from 'node:crypto';
2
2
  import { z } from 'zod';
3
3
  import * as cache from './cache.js';
4
4
  import { config } from './config.js';
5
- import { FetchError, isSystemError } from './errors.js';
5
+ import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
6
  import { fetchNormalizedUrl, normalizeUrl, transformToRawUrl, } from './fetch.js';
7
7
  import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
8
  import { transformHtmlToMarkdown, } from './transform.js';
9
9
  import { isRecord } from './utils.js';
10
10
  const TRUNCATION_MARKER = '...[truncated]';
11
+ const FETCH_PROGRESS_TOTAL = 4;
11
12
  const fetchUrlInputSchema = z.strictObject({
12
13
  url: z.url({ protocol: /^https?$/i }).describe('The URL to fetch'),
13
14
  });
@@ -30,6 +31,33 @@ const fetchUrlOutputSchema = z.strictObject({
30
31
  });
31
32
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
32
33
  export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
34
+ function createProgressReporter(extra) {
35
+ const token = extra?._meta?.progressToken ?? null;
36
+ const sendNotification = extra?.sendNotification;
37
+ if (token === null || !sendNotification) {
38
+ return { report: async () => { } };
39
+ }
40
+ return {
41
+ report: async (progress, message) => {
42
+ try {
43
+ await sendNotification({
44
+ method: 'notifications/progress',
45
+ params: {
46
+ progressToken: token,
47
+ progress,
48
+ total: FETCH_PROGRESS_TOTAL,
49
+ message,
50
+ },
51
+ });
52
+ }
53
+ catch (error) {
54
+ logWarn('Failed to send progress notification', {
55
+ error: getErrorMessage(error),
56
+ });
57
+ }
58
+ },
59
+ };
60
+ }
33
61
  function serializeStructuredContent(structuredContent, fromCache) {
34
62
  return JSON.stringify(structuredContent, fromCache ? undefined : null, fromCache ? undefined : 2);
35
63
  }
@@ -354,11 +382,16 @@ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
354
382
  function logFetchStart(url) {
355
383
  logDebug('Fetching URL', { url });
356
384
  }
357
- async function fetchPipeline(url, signal) {
385
+ async function fetchPipeline(url, signal, progress) {
358
386
  return performSharedFetch({
359
387
  url,
360
388
  ...(signal === undefined ? {} : { signal }),
361
- transform: (html, normalizedUrl) => buildMarkdownTransform()(html, normalizedUrl, signal),
389
+ transform: async (html, normalizedUrl) => {
390
+ if (progress) {
391
+ await progress.report(3, 'Transforming content');
392
+ }
393
+ return buildMarkdownTransform()(html, normalizedUrl, signal);
394
+ },
362
395
  serialize: serializeMarkdownResult,
363
396
  deserialize: deserializeMarkdownResult,
364
397
  });
@@ -376,11 +409,18 @@ async function executeFetch(input, extra) {
376
409
  if (!url) {
377
410
  return createToolErrorResponse('URL is required', '');
378
411
  }
412
+ const progress = createProgressReporter(extra);
413
+ await progress.report(1, 'Validating URL');
379
414
  logFetchStart(url);
380
- const { pipeline, inlineResult } = await fetchPipeline(url, extra?.signal);
415
+ await progress.report(2, 'Fetching content');
416
+ const { pipeline, inlineResult } = await fetchPipeline(url, extra?.signal, progress);
417
+ if (pipeline.fromCache) {
418
+ await progress.report(3, 'Using cached content');
419
+ }
381
420
  if (inlineResult.error) {
382
421
  return createToolErrorResponse(inlineResult.error, url);
383
422
  }
423
+ await progress.report(4, 'Finalizing response');
384
424
  return buildResponse(pipeline, inlineResult, url);
385
425
  }
386
426
  export async function fetchUrlToolHandler(input, extra) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "2.1.2",
3
+ "version": "2.1.3",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",