@j0hanz/superfetch 1.2.5 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +131 -156
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +35 -64
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +254 -23
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/host-allowlist.d.ts +3 -0
  29. package/dist/http/host-allowlist.js +117 -0
  30. package/dist/http/jsonrpc-http.d.ts +2 -0
  31. package/dist/http/jsonrpc-http.js +10 -0
  32. package/dist/http/mcp-routes.d.ts +8 -3
  33. package/dist/http/mcp-routes.js +137 -31
  34. package/dist/http/mcp-session-eviction.d.ts +3 -0
  35. package/dist/http/mcp-session-eviction.js +24 -0
  36. package/dist/http/mcp-session-helpers.d.ts +0 -1
  37. package/dist/http/mcp-session-helpers.js +1 -1
  38. package/dist/http/mcp-session-init.d.ts +7 -0
  39. package/dist/http/mcp-session-init.js +94 -0
  40. package/dist/http/mcp-session-slots.d.ts +17 -0
  41. package/dist/http/mcp-session-slots.js +55 -0
  42. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  43. package/dist/http/mcp-session-transport-init.js +41 -0
  44. package/dist/http/mcp-session-transport.d.ts +7 -0
  45. package/dist/http/mcp-session-transport.js +57 -0
  46. package/dist/http/mcp-session-types.d.ts +5 -0
  47. package/dist/http/mcp-session-types.js +1 -0
  48. package/dist/http/mcp-session.d.ts +9 -9
  49. package/dist/http/mcp-session.js +15 -137
  50. package/dist/http/mcp-sessions.d.ts +43 -0
  51. package/dist/http/mcp-sessions.js +392 -0
  52. package/dist/http/mcp-validation.d.ts +1 -0
  53. package/dist/http/mcp-validation.js +11 -10
  54. package/dist/http/protocol-policy.d.ts +2 -0
  55. package/dist/http/protocol-policy.js +31 -0
  56. package/dist/http/rate-limit.js +7 -4
  57. package/dist/http/server-config.d.ts +1 -0
  58. package/dist/http/server-config.js +40 -0
  59. package/dist/http/server-middleware.d.ts +7 -9
  60. package/dist/http/server-middleware.js +9 -70
  61. package/dist/http/server-shutdown.d.ts +4 -0
  62. package/dist/http/server-shutdown.js +43 -0
  63. package/dist/http/server.d.ts +10 -0
  64. package/dist/http/server.js +546 -61
  65. package/dist/http/session-cleanup.js +8 -5
  66. package/dist/middleware/error-handler.d.ts +1 -1
  67. package/dist/middleware/error-handler.js +32 -33
  68. package/dist/resources/cached-content-params.d.ts +5 -0
  69. package/dist/resources/cached-content-params.js +36 -0
  70. package/dist/resources/cached-content.js +67 -125
  71. package/dist/resources/index.js +0 -82
  72. package/dist/server.js +50 -29
  73. package/dist/services/cache-events.d.ts +8 -0
  74. package/dist/services/cache-events.js +19 -0
  75. package/dist/services/cache-keys.d.ts +7 -0
  76. package/dist/services/cache-keys.js +57 -0
  77. package/dist/services/cache.d.ts +4 -9
  78. package/dist/services/cache.js +77 -139
  79. package/dist/services/context.d.ts +0 -1
  80. package/dist/services/context.js +0 -7
  81. package/dist/services/extractor.js +55 -116
  82. package/dist/services/fetcher/agents.d.ts +2 -2
  83. package/dist/services/fetcher/agents.js +35 -96
  84. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  85. package/dist/services/fetcher/dns-selection.js +72 -0
  86. package/dist/services/fetcher/interceptors.d.ts +0 -22
  87. package/dist/services/fetcher/interceptors.js +18 -32
  88. package/dist/services/fetcher/redirects.js +16 -7
  89. package/dist/services/fetcher/response.js +79 -34
  90. package/dist/services/fetcher.d.ts +22 -3
  91. package/dist/services/fetcher.js +544 -44
  92. package/dist/services/fifo-queue.d.ts +8 -0
  93. package/dist/services/fifo-queue.js +25 -0
  94. package/dist/services/logger.js +2 -2
  95. package/dist/services/metadata-collector.d.ts +1 -9
  96. package/dist/services/metadata-collector.js +71 -2
  97. package/dist/services/transform-worker-pool.d.ts +4 -14
  98. package/dist/services/transform-worker-pool.js +177 -129
  99. package/dist/services/transform-worker-types.d.ts +32 -0
  100. package/dist/services/transform-worker-types.js +14 -0
  101. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  102. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  103. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -22
  104. package/dist/tools/handlers/fetch-single.shared.js +175 -89
  105. package/dist/tools/handlers/fetch-url.tool.d.ts +7 -1
  106. package/dist/tools/handlers/fetch-url.tool.js +84 -119
  107. package/dist/tools/index.js +21 -40
  108. package/dist/tools/schemas.d.ts +1 -51
  109. package/dist/tools/schemas.js +1 -107
  110. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  111. package/dist/tools/utils/cached-markdown.js +46 -0
  112. package/dist/tools/utils/content-shaping.d.ts +4 -0
  113. package/dist/tools/utils/content-shaping.js +67 -0
  114. package/dist/tools/utils/content-transform.d.ts +5 -17
  115. package/dist/tools/utils/content-transform.js +134 -114
  116. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  117. package/dist/tools/utils/fetch-pipeline.js +57 -63
  118. package/dist/tools/utils/frontmatter.d.ts +3 -0
  119. package/dist/tools/utils/frontmatter.js +73 -0
  120. package/dist/tools/utils/inline-content.d.ts +1 -2
  121. package/dist/tools/utils/inline-content.js +4 -7
  122. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  123. package/dist/tools/utils/markdown-heuristics.js +19 -0
  124. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  125. package/dist/tools/utils/markdown-signals.js +19 -0
  126. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  127. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  128. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  129. package/dist/tools/utils/raw-markdown.js +135 -0
  130. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  131. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  132. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  133. package/dist/transformers/markdown/frontmatter.js +45 -0
  134. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  135. package/dist/transformers/markdown/noise-rule.js +80 -0
  136. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  137. package/dist/transformers/markdown/turndown-instance.js +19 -0
  138. package/dist/transformers/markdown.d.ts +2 -0
  139. package/dist/transformers/markdown.js +185 -0
  140. package/dist/transformers/markdown.transformer.js +5 -117
  141. package/dist/utils/cached-payload.d.ts +7 -0
  142. package/dist/utils/cached-payload.js +36 -0
  143. package/dist/utils/code-language-bash.d.ts +1 -0
  144. package/dist/utils/code-language-bash.js +48 -0
  145. package/dist/utils/code-language-core.d.ts +2 -0
  146. package/dist/utils/code-language-core.js +13 -0
  147. package/dist/utils/code-language-detectors.d.ts +5 -0
  148. package/dist/utils/code-language-detectors.js +142 -0
  149. package/dist/utils/code-language-helpers.d.ts +5 -0
  150. package/dist/utils/code-language-helpers.js +62 -0
  151. package/dist/utils/code-language-parsing.d.ts +5 -0
  152. package/dist/utils/code-language-parsing.js +62 -0
  153. package/dist/utils/code-language.d.ts +9 -0
  154. package/dist/utils/code-language.js +250 -46
  155. package/dist/utils/error-details.d.ts +3 -0
  156. package/dist/utils/error-details.js +12 -0
  157. package/dist/utils/error-utils.js +1 -1
  158. package/dist/utils/filename-generator.js +34 -12
  159. package/dist/utils/guards.d.ts +1 -0
  160. package/dist/utils/guards.js +3 -0
  161. package/dist/utils/header-normalizer.d.ts +0 -3
  162. package/dist/utils/header-normalizer.js +3 -3
  163. package/dist/utils/ip-address.d.ts +4 -0
  164. package/dist/utils/ip-address.js +6 -0
  165. package/dist/utils/tool-error-handler.d.ts +2 -2
  166. package/dist/utils/tool-error-handler.js +14 -46
  167. package/dist/utils/url-transformer.d.ts +7 -0
  168. package/dist/utils/url-transformer.js +147 -0
  169. package/dist/utils/url-validator.d.ts +1 -2
  170. package/dist/utils/url-validator.js +53 -114
  171. package/dist/workers/content-transform.worker.d.ts +1 -0
  172. package/dist/workers/content-transform.worker.js +40 -0
  173. package/package.json +17 -18
package/README.md CHANGED
@@ -10,9 +10,9 @@
10
10
 
11
11
  [![Install in Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
12
12
 
13
- A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly JSONL or Markdown.
13
+ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly Markdown.
14
14
 
15
- [Quick Start](#quick-start) | [How to Choose a Tool](#how-to-choose-a-tool) | [Tools](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
15
+ [Quick Start](#quick-start) | [Tool](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
16
16
 
17
17
  > **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
18
18
 
@@ -23,45 +23,15 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that f
23
23
 
24
24
  ## Features
25
25
 
26
- | Feature | Description |
27
- | ------------------ | ------------------------------------------------------------------------- |
28
- | Smart extraction | Mozilla Readability removes ads, navigation, and boilerplate when enabled |
29
- | JSONL + Markdown | JSONL semantic blocks or clean Markdown with frontmatter |
30
- | Structured blocks | Headings, paragraphs, lists, code, tables, images, blockquotes |
31
- | Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
32
- | Resilient fetching | Redirect handling plus retry with exponential backoff + jitter |
33
- | Security first | URL validation, SSRF/DNS/IP blocklists, header sanitization |
34
- | HTTP mode | API key auth, session management, rate limiting, CORS |
35
-
36
- ---
37
-
38
- ## How to Choose a Tool
39
-
40
- Use this guide to select the right tool for your web content extraction needs.
41
-
42
- ### Decision Tree
43
-
44
- ```text
45
- Need web content for AI?
46
- - Want structured JSONL blocks -> fetch-url (format: jsonl)
47
- - Want clean Markdown -> fetch-markdown
48
- - Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
49
- ```
50
-
51
- ### Quick Reference Table
52
-
53
- | Tool | Best For | Output Format | Use When |
54
- | ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
55
- | `fetch-url` | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
56
- | `fetch-markdown` | Single page in readable format | Markdown + frontmatter | Documentation, summaries, human review |
57
-
58
- ### Common Use Cases
59
-
60
- | Task | Recommended Tool | Why |
61
- | ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
62
- | Parse a blog post for AI | `fetch-url` | Returns semantic blocks (headings, paragraphs, code) |
63
- | Generate documentation | `fetch-markdown` | Clean markdown with frontmatter |
64
- | Extract article for RAG | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content |
26
+ | Feature | Description |
27
+ | -------------------- | ------------------------------------------------------------------------------------------------------------------ |
28
+ | Smart extraction | Mozilla Readability with quality gates to strip boilerplate when it improves results |
29
+ | Clean Markdown | Markdown output with optional YAML frontmatter (title + source) |
30
+ | Raw content handling | Preserves raw markdown/text, detects common text extensions, and rewrites GitHub/GitLab/Bitbucket/Gist URLs to raw |
31
+ | Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
32
+ | Resilient fetching | Redirect handling with validation, timeouts, and response size limits |
33
+ | Security first | URL validation plus SSRF/DNS/IP blocklists |
34
+ | HTTP mode | Static token or OAuth auth, session management, rate limiting, host/origin validation |
65
35
 
66
36
  ---
67
37
 
@@ -230,7 +200,7 @@ npm install -g @j0hanz/superfetch
230
200
  # Run in stdio mode
231
201
  superfetch --stdio
232
202
 
233
- # Run HTTP server (requires API_KEY)
203
+ # Run HTTP server (requires auth token)
234
204
  superfetch
235
205
  ```
236
206
 
@@ -257,7 +227,7 @@ node dist/index.js --stdio
257
227
  <details>
258
228
  <summary><strong>HTTP Mode</strong> (default)</summary>
259
229
 
260
- HTTP mode requires `API_KEY` and only binds to loopback addresses unless `ALLOW_REMOTE=true`.
230
+ HTTP mode requires authentication. By default it binds to `127.0.0.1`. To listen on all interfaces, set `HOST=0.0.0.0` or `HOST=::` and configure OAuth (remote bindings require OAuth). Other non-loopback `HOST` values are rejected.
261
231
 
262
232
  ```bash
263
233
  API_KEY=supersecret npx -y @j0hanz/superfetch@latest
@@ -271,13 +241,17 @@ $env:API_KEY = "supersecret"
271
241
  npx -y @j0hanz/superfetch@latest
272
242
  ```
273
243
 
274
- Endpoints (all require `Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`):
244
+ For multiple static tokens, set `ACCESS_TOKENS` (comma/space separated).
245
+
246
+ Auth is required for `/mcp` and `/mcp/downloads` via `Authorization: Bearer <token>` (static mode also accepts `X-API-Key`).
247
+
248
+ Endpoints:
275
249
 
276
- - `GET /health`
277
- - `POST /mcp`
278
- - `GET /mcp` (SSE stream)
279
- - `DELETE /mcp`
280
- - `GET /mcp/downloads/:namespace/:hash`
250
+ - `GET /health` (no auth; returns status, name, version, uptime)
251
+ - `POST /mcp` (auth required)
252
+ - `GET /mcp` (auth required; SSE stream; requires `Accept: text/event-stream`)
253
+ - `DELETE /mcp` (auth required)
254
+ - `GET /mcp/downloads/:namespace/:hash` (auth required)
281
255
 
282
256
  Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#http-mode-details)).
283
257
 
@@ -289,111 +263,62 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
289
263
 
290
264
  ### Tool Response Notes
291
265
 
292
- Both tools return:
266
+ The tool returns `structuredContent` with `url`, optional `title`, and `markdown` when inline content is available. On errors, `error` is included instead of content.
293
267
 
294
- - `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`; Markdown responses may also include `file`)
295
- - `content` blocks that include:
296
- - a `text` block containing JSON of `structuredContent`
297
- - in stdio mode, a `resource` block with a `file:///...` URI embedding the full content
298
- - in HTTP mode, a `resource` block when inline content is available
299
- - when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled, a `resource_link` block points to `superfetch://cache/...` and `structuredContent.resourceUri` is set
268
+ The response includes:
300
269
 
301
- If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output, appends `...[truncated]`, and sets `truncated: true`.
270
+ - a `text` block containing JSON of `structuredContent`
271
+ - a `resource` block embedding markdown when inline content is available (always in stdio mode)
272
+ - when content exceeds the inline limit and cache is enabled, a `resource_link` block pointing to `superfetch://cache/...` (inline markdown may be omitted)
273
+ - error responses set `isError: true` and return `structuredContent` with `error` and `url`
302
274
 
303
275
  ---
304
276
 
305
277
  ### `fetch-url`
306
278
 
307
- Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
279
+ Fetches a webpage and converts it to clean Markdown format with optional frontmatter.
308
280
 
309
- | Parameter | Type | Default | Description |
310
- | ---------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------ |
311
- | `url` | string | required | URL to fetch |
312
- | `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
313
- | `includeContentBlocks` | boolean | `true` (jsonl), `false` (markdown) | Include content block counts when `format: "markdown"` |
314
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
315
- | `includeMetadata` | boolean | `true` | Include page metadata |
316
- | `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
317
- | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
318
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
319
- | `retries` | number | `3` | Number of retry attempts (1-10) |
320
-
321
- When `format: "markdown"` and `includeContentBlocks` is `false`, `contentBlocks` will be `0`.
281
+ | Parameter | Type | Default | Description |
282
+ | --------- | ------ | -------- | ------------ |
283
+ | `url` | string | required | URL to fetch |
322
284
 
323
285
  **Example `structuredContent`:**
324
286
 
325
287
  ```json
326
288
  {
327
- "url": "https://example.com/article",
328
- "title": "Example Article",
329
- "contentBlocks": 42,
330
- "fetchedAt": "2025-12-11T10:30:00.000Z",
331
- "format": "jsonl",
332
- "contentSize": 12345,
333
- "cached": false,
334
- "content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
289
+ "url": "https://example.com/docs",
290
+ "title": "Documentation",
291
+ "markdown": "---\ntitle: Documentation\n---\n\n# Getting Started\n\nWelcome..."
335
292
  }
336
293
  ```
337
294
 
338
- ---
339
-
340
- ### `fetch-markdown`
341
-
342
- Fetches a webpage and converts it to clean Markdown with optional frontmatter.
343
-
344
- | Parameter | Type | Default | Description |
345
- | -------------------- | ------- | -------- | ---------------------------------------------------- |
346
- | `url` | string | required | URL to fetch |
347
- | `extractMainContent` | boolean | `true` | Extract main content only |
348
- | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
349
- | `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
350
- | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
351
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
352
- | `retries` | number | `3` | Number of retry attempts (1-10) |
353
-
354
- **Example `structuredContent`:**
295
+ **Error response:**
355
296
 
356
297
  ```json
357
298
  {
358
- "url": "https://example.com/docs",
359
- "title": "Documentation",
360
- "fetchedAt": "2025-12-11T10:30:00.000Z",
361
- "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
362
- "contentSize": 9876,
363
- "cached": false,
364
- "truncated": false,
365
- "file": {
366
- "downloadUrl": "/mcp/downloads/markdown/abc123def456",
367
- "fileName": "documentation.md",
368
- "expiresAt": "2025-12-11T11:30:00.000Z"
369
- }
299
+ "url": "https://example.com/broken",
300
+ "error": "Failed to fetch: 404 Not Found"
370
301
  }
371
302
  ```
372
303
 
373
- `file` is included only in HTTP mode when content is cached and too large to inline.
374
-
375
304
  ---
376
305
 
377
306
  ### Large Content Handling
378
307
 
379
- - Inline limit is configurable via `MAX_INLINE_CONTENT_CHARS` (see `CONFIGURATION.md`).
380
- - If content exceeds the limit and cache is enabled, responses include `resourceUri`/`resourceMimeType` and a `resource_link` block.
381
- - If cache is disabled, content is truncated with `...[truncated]` and `truncated: true`.
382
- - Use `maxContentLength` per request to enforce a lower limit (hard cap: 5,242,880 characters).
308
+ - Inline markdown is capped at 20,000 characters (`maxInlineContentChars`).
309
+ - **Stdio mode:** full markdown is embedded as a `resource` block.
310
+ - **HTTP mode:** if content exceeds the inline limit and cache is enabled, the response includes a `resource_link` to `superfetch://cache/...` (no embedded markdown). If cache is disabled, the inline markdown is truncated with `...[truncated]`.
383
311
  - Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
384
312
 
385
313
  ---
386
314
 
387
315
  ## Resources
388
316
 
389
- | URI | Description |
390
- | ------------------------------------------ | -------------------------------------------------------------------------- |
391
- | `superfetch://health` | Real-time server health and memory checks |
392
- | `superfetch://stats` | Server stats and cache metrics |
393
- | `superfetch://cache/list` | List cached entries and their resource URIs |
394
- | `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`; `links` is reserved) |
317
+ | URI | Description |
318
+ | ------------------------------------------ | ---------------------------------------------- |
319
+ | `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `markdown`) |
395
320
 
396
- Resource subscriptions notify clients when cache entries update.
321
+ Resource listings enumerate cached entries, and subscriptions notify clients when cache entries update.
397
322
 
398
323
  ---
399
324
 
@@ -407,21 +332,21 @@ When running in HTTP mode, cached content can be downloaded directly. Downloads
407
332
  GET /mcp/downloads/:namespace/:hash
408
333
  ```
409
334
 
410
- - `namespace`: `markdown` or `url`
411
- - Auth required (`Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`)
335
+ - `namespace`: `markdown`
336
+ - Auth required (`Authorization: Bearer <token>`; in static token mode, `X-API-Key` is accepted)
412
337
 
413
338
  ### Response Headers
414
339
 
415
- | Header | Value |
416
- | --------------------- | ----------------------------------------------------------------------- |
417
- | `Content-Type` | `text/markdown; charset=utf-8` or `application/x-ndjson; charset=utf-8` |
418
- | `Content-Disposition` | `attachment; filename="<name>"` |
419
- | `Cache-Control` | `private, max-age=<CACHE_TTL>` |
340
+ | Header | Value |
341
+ | --------------------- | ------------------------------- |
342
+ | `Content-Type` | `text/markdown; charset=utf-8` |
343
+ | `Content-Disposition` | `attachment; filename="<name>"` |
344
+ | `Cache-Control` | `private, max-age=<CACHE_TTL>` |
420
345
 
421
346
  ### Example Usage
422
347
 
423
348
  ```bash
424
- curl -H "Authorization: Bearer $API_KEY" \
349
+ curl -H "Authorization: Bearer $TOKEN" \
425
350
  http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
426
351
  -o article.md
427
352
  ```
@@ -438,7 +363,66 @@ curl -H "Authorization: Bearer $API_KEY" \
438
363
 
439
364
  ## Configuration
440
365
 
441
- Configuration details live in `CONFIGURATION.md`, including all environment variables, defaults, ranges, presets, and dev-only flags.
366
+ Set environment variables in your MCP client `env` or in the shell before starting the server.
367
+
368
+ ### Core Server Settings
369
+
370
+ | Variable | Default | Description |
371
+ | --------------- | -------------------- | ------------------------------------------------------------- |
372
+ | `HOST` | `127.0.0.1` | HTTP bind address |
373
+ | `PORT` | `3000` | HTTP server port (1024-65535) |
374
+ | `USER_AGENT` | `superFetch-MCP/2.0` | User-Agent header for outgoing requests |
375
+ | `CACHE_ENABLED` | `true` | Enable response caching |
376
+ | `CACHE_TTL` | `3600` | Cache TTL in seconds (60-86400) |
377
+ | `LOG_LEVEL` | `info` | `debug`, `info`, `warn`, `error` |
378
+ | `ALLOWED_HOSTS` | (empty) | Additional allowed Host/Origin values (comma/space separated) |
379
+
380
+ ### Auth (HTTP Mode)
381
+
382
+ | Variable | Default | Description |
383
+ | --------------- | ------- | ------------------------------------------------------------ |
384
+ | `AUTH_MODE` | auto | `static` or `oauth`. Auto-selects OAuth if any OAUTH URL set |
385
+ | `ACCESS_TOKENS` | (empty) | Comma/space-separated static bearer tokens |
386
+ | `API_KEY` | (empty) | Adds a static bearer token and enables `X-API-Key` header |
387
+
388
+ Static mode requires at least one token (`ACCESS_TOKENS` or `API_KEY`).
389
+
390
+ ### OAuth (HTTP Mode)
391
+
392
+ Required when `AUTH_MODE=oauth` (or auto-selected by presence of OAuth URLs):
393
+
394
+ | Variable | Default | Description |
395
+ | ------------------------- | ------- | ---------------------- |
396
+ | `OAUTH_ISSUER_URL` | - | OAuth issuer |
397
+ | `OAUTH_AUTHORIZATION_URL` | - | Authorization endpoint |
398
+ | `OAUTH_TOKEN_URL` | - | Token endpoint |
399
+ | `OAUTH_INTROSPECTION_URL` | - | Introspection endpoint |
400
+
401
+ Optional:
402
+
403
+ | Variable | Default | Description |
404
+ | -------------------------------- | -------------------------- | --------------------------------------- |
405
+ | `OAUTH_REVOCATION_URL` | - | Revocation endpoint |
406
+ | `OAUTH_REGISTRATION_URL` | - | Dynamic client registration endpoint |
407
+ | `OAUTH_RESOURCE_URL` | `http://<host>:<port>/mcp` | Protected resource URL |
408
+ | `OAUTH_REQUIRED_SCOPES` | (empty) | Required scopes (comma/space separated) |
409
+ | `OAUTH_CLIENT_ID` | - | Client ID for introspection |
410
+ | `OAUTH_CLIENT_SECRET` | - | Client secret for introspection |
411
+ | `OAUTH_INTROSPECTION_TIMEOUT_MS` | `5000` | Introspection timeout (1000-30000) |
412
+
413
+ ### Fixed Limits (Not Configurable via env)
414
+
415
+ - Fetch timeout: 15 seconds
416
+ - Max redirects: 5
417
+ - Max HTML response size: 10 MB
418
+ - Inline markdown limit: 20,000 characters
419
+ - Cache max entries: 100
420
+ - Session TTL: 30 minutes
421
+ - Session init timeout: 10 seconds
422
+ - Max sessions: 200
423
+ - Rate limit: 100 req/min per IP (60s window)
424
+
425
+ See `CONFIGURATION.md` for preset examples and quick-start snippets.
442
426
 
443
427
  ---
444
428
 
@@ -450,28 +434,18 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
450
434
  2. The server returns `mcp-session-id` in the response headers.
451
435
  3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
452
436
 
453
- `GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
454
-
455
- If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
437
+ If the `mcp-protocol-version` header is missing, the server defaults it to `2025-03-26`. Supported versions are `2025-03-26` and `2025-11-25`.
456
438
 
457
- Host header validation is always enforced in HTTP mode. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send. If an `Origin` header is present, it must be allowed by `ALLOWED_ORIGINS` or `CORS_ALLOW_ALL`.
439
+ `GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
458
440
 
459
- ---
441
+ Additional HTTP transport notes:
460
442
 
461
- ## Content Block Types
443
+ - `GET /mcp` requires `Accept: text/event-stream` (otherwise 406).
444
+ - JSON-RPC batch requests are not supported (400).
462
445
 
463
- JSONL output includes semantic content blocks:
446
+ If the server reaches its session cap (200), it evicts the oldest session when possible; otherwise it returns a 503.
464
447
 
465
- | Type | Description |
466
- | ------------ | ---------------------------------------- |
467
- | `metadata` | Minimal page metadata (type, title, url) |
468
- | `heading` | Headings (h1-h6) with level indicator |
469
- | `paragraph` | Text paragraphs |
470
- | `list` | Ordered/unordered lists |
471
- | `code` | Code blocks with optional language |
472
- | `table` | Tables with headers and rows |
473
- | `image` | Images with src and alt text |
474
- | `blockquote` | Block quote text |
448
+ Host and Origin headers are always validated. Allowed values include loopback hosts, the configured `HOST` (if not a wildcard), and any entries in `ALLOWED_HOSTS`. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send.
475
449
 
476
450
  ---
477
451
 
@@ -498,13 +472,14 @@ DNS resolution is performed and blocked if any resolved IP matches a blocked ran
498
472
  - Max URL length: 2048 characters
499
473
  - Hostnames ending in `.local` or `.internal` are rejected
500
474
 
501
- ### Header Sanitization
475
+ ### Host/Origin Validation (HTTP Mode)
502
476
 
503
- Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-ip`, `proxy-authorization`
477
+ - Host header must match loopback, configured `HOST` (if not a wildcard), or `ALLOWED_HOSTS`
478
+ - Origin header (when present) is validated against the same allow-list
504
479
 
505
480
  ### Rate Limiting
506
481
 
507
- Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RATE_LIMIT_ENABLED`, `RATE_LIMIT_MAX`, `RATE_LIMIT_WINDOW_MS`, and `RATE_LIMIT_CLEANUP_MS` (see `CONFIGURATION.md`).
482
+ Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s window). OPTIONS requests are not rate-limited.
508
483
 
509
484
  ---
510
485
 
@@ -518,14 +493,14 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
518
493
  | `npm run build` | Compile TypeScript |
519
494
  | `npm start` | Production server |
520
495
  | `npm run lint` | Run ESLint |
496
+ | `npm run lint:fix` | Auto-fix lint issues |
521
497
  | `npm run type-check` | TypeScript type checking |
522
498
  | `npm run format` | Format with Prettier |
523
499
  | `npm test` | Run Node test runner (builds dist) |
524
500
  | `npm run test:coverage` | Run tests with experimental coverage |
525
- | `npm run bench` | Run minimal performance benchmark |
526
- | `npm run release` | Create new release |
527
501
  | `npm run knip` | Find unused exports/dependencies |
528
502
  | `npm run knip:fix` | Auto-fix unused code |
503
+ | `npm run inspector` | Launch MCP Inspector |
529
504
 
530
505
  > **Note:** Tests run via `node --test` with `--experimental-transform-types` to execute `.ts` test files. Node will emit an experimental warning.
531
506
 
@@ -535,12 +510,12 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
535
510
  | ------------------ | --------------------------------- |
536
511
  | Runtime | Node.js >=20.12 |
537
512
  | Language | TypeScript 5.9 |
538
- | MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
513
+ | MCP SDK | @modelcontextprotocol/sdk ^1.25.2 |
539
514
  | Content Extraction | @mozilla/readability ^0.6.0 |
540
- | HTML Parsing | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
515
+ | HTML Parsing | linkedom ^0.18.12 |
541
516
  | Markdown | Turndown ^7.2.2 |
542
- | HTTP | Express ^5.2.1, undici ^6.22.0 |
543
- | Validation | Zod ^4.3.4 |
517
+ | HTTP | Express ^5.2.1, undici ^6.23.0 |
518
+ | Validation | Zod ^4.3.5 |
544
519
 
545
520
  ---
546
521
 
@@ -0,0 +1,16 @@
1
+ export interface AuthConfig {
2
+ mode: 'oauth' | 'static';
3
+ issuerUrl: URL | undefined;
4
+ authorizationUrl: URL | undefined;
5
+ tokenUrl: URL | undefined;
6
+ revocationUrl: URL | undefined;
7
+ registrationUrl: URL | undefined;
8
+ introspectionUrl: URL | undefined;
9
+ resourceUrl: URL;
10
+ requiredScopes: string[];
11
+ clientId: string | undefined;
12
+ clientSecret: string | undefined;
13
+ introspectionTimeoutMs: number;
14
+ staticTokens: string[];
15
+ }
16
+ export declare function buildAuthConfig(baseUrl: URL): AuthConfig;
@@ -0,0 +1,53 @@
1
+ import { parseInteger, parseList, parseUrlEnv } from './env-parsers.js';
2
+ function readCoreOAuthUrls() {
3
+ return {
4
+ issuerUrl: parseUrlEnv(process.env.OAUTH_ISSUER_URL, 'OAUTH_ISSUER_URL'),
5
+ authorizationUrl: parseUrlEnv(process.env.OAUTH_AUTHORIZATION_URL, 'OAUTH_AUTHORIZATION_URL'),
6
+ tokenUrl: parseUrlEnv(process.env.OAUTH_TOKEN_URL, 'OAUTH_TOKEN_URL'),
7
+ };
8
+ }
9
+ function readOptionalOAuthUrls(baseUrl) {
10
+ return {
11
+ revocationUrl: parseUrlEnv(process.env.OAUTH_REVOCATION_URL, 'OAUTH_REVOCATION_URL'),
12
+ registrationUrl: parseUrlEnv(process.env.OAUTH_REGISTRATION_URL, 'OAUTH_REGISTRATION_URL'),
13
+ introspectionUrl: parseUrlEnv(process.env.OAUTH_INTROSPECTION_URL, 'OAUTH_INTROSPECTION_URL'),
14
+ resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
15
+ new URL('/mcp', baseUrl),
16
+ };
17
+ }
18
+ function readOAuthUrls(baseUrl) {
19
+ return { ...readCoreOAuthUrls(), ...readOptionalOAuthUrls(baseUrl) };
20
+ }
21
+ function resolveAuthMode(authModeEnv, urls) {
22
+ if (authModeEnv === 'oauth')
23
+ return 'oauth';
24
+ if (authModeEnv === 'static')
25
+ return 'static';
26
+ const oauthConfigured = [
27
+ urls.issuerUrl,
28
+ urls.authorizationUrl,
29
+ urls.tokenUrl,
30
+ urls.introspectionUrl,
31
+ ].some((value) => value !== undefined);
32
+ return oauthConfigured ? 'oauth' : 'static';
33
+ }
34
+ function collectStaticTokens() {
35
+ const staticTokens = new Set(parseList(process.env.ACCESS_TOKENS));
36
+ if (process.env.API_KEY) {
37
+ staticTokens.add(process.env.API_KEY);
38
+ }
39
+ return Array.from(staticTokens);
40
+ }
41
+ export function buildAuthConfig(baseUrl) {
42
+ const urls = readOAuthUrls(baseUrl);
43
+ const mode = resolveAuthMode(process.env.AUTH_MODE?.toLowerCase(), urls);
44
+ return {
45
+ mode,
46
+ ...urls,
47
+ requiredScopes: parseList(process.env.OAUTH_REQUIRED_SCOPES),
48
+ clientId: process.env.OAUTH_CLIENT_ID,
49
+ clientSecret: process.env.OAUTH_CLIENT_SECRET,
50
+ introspectionTimeoutMs: parseInteger(process.env.OAUTH_INTROSPECTION_TIMEOUT_MS, 5000, 1000, 30000),
51
+ staticTokens: collectStaticTokens(),
52
+ };
53
+ }
@@ -1,19 +1,17 @@
1
1
  export declare const SIZE_LIMITS: {
2
- readonly ONE_MB: number;
3
- readonly FIVE_MB: number;
4
- readonly TEN_MB: number;
5
- readonly FIFTY_MB: number;
6
- readonly HUNDRED_MB: number;
2
+ ONE_MB: number;
3
+ FIVE_MB: number;
4
+ TEN_MB: number;
5
+ FIFTY_MB: number;
6
+ HUNDRED_MB: number;
7
7
  };
8
8
  export declare const CACHE_HASH: {
9
- readonly URL_HASH_LENGTH: 16;
10
- readonly VARY_HASH_LENGTH: 12;
9
+ URL_HASH_LENGTH: number;
10
+ VARY_HASH_LENGTH: number;
11
11
  };
12
12
  export declare const TIMEOUT: {
13
- readonly MIN_FETCH_TIMEOUT_MS: 5000;
14
- readonly DEFAULT_FETCH_TIMEOUT_MS: 30000;
15
- readonly MAX_FETCH_TIMEOUT_MS: 120000;
16
- readonly MIN_SESSION_TTL_MS: number;
17
- readonly DEFAULT_SESSION_TTL_MS: number;
18
- readonly MAX_SESSION_TTL_MS: number;
13
+ DEFAULT_FETCH_TIMEOUT_MS: number;
14
+ MIN_SESSION_TTL_MS: number;
15
+ DEFAULT_SESSION_TTL_MS: number;
16
+ MAX_SESSION_TTL_MS: number;
19
17
  };
@@ -15,9 +15,7 @@ export const CACHE_HASH = {
15
15
  VARY_HASH_LENGTH: 12,
16
16
  };
17
17
  export const TIMEOUT = {
18
- MIN_FETCH_TIMEOUT_MS: 5000,
19
- DEFAULT_FETCH_TIMEOUT_MS: 30000,
20
- MAX_FETCH_TIMEOUT_MS: 120000,
18
+ DEFAULT_FETCH_TIMEOUT_MS: 15000,
21
19
  MIN_SESSION_TTL_MS: 60 * 1000,
22
20
  DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
23
21
  MAX_SESSION_TTL_MS: 24 * 60 * 60 * 1000,
@@ -0,0 +1,7 @@
1
+ import type { LogLevel } from './types/runtime.js';
2
+ export declare function parseInteger(envValue: string | undefined, defaultValue: number, min?: number, max?: number): number;
3
+ export declare function parseBoolean(envValue: string | undefined, defaultValue: boolean): boolean;
4
+ export declare function parseList(envValue: string | undefined): string[];
5
+ export declare function parseUrlEnv(value: string | undefined, name: string): URL | undefined;
6
+ export declare function parseAllowedHosts(envValue: string | undefined): Set<string>;
7
+ export declare function parseLogLevel(envValue: string | undefined): LogLevel;
@@ -0,0 +1,84 @@
1
+ function normalizeHostValue(value) {
2
+ const trimmed = value.trim().toLowerCase();
3
+ if (!trimmed)
4
+ return null;
5
+ if (trimmed.startsWith('[')) {
6
+ const end = trimmed.indexOf(']');
7
+ if (end === -1)
8
+ return null;
9
+ return trimmed.slice(1, end);
10
+ }
11
+ const colonIndex = trimmed.indexOf(':');
12
+ if (colonIndex !== -1) {
13
+ return trimmed.slice(0, colonIndex);
14
+ }
15
+ return trimmed;
16
+ }
17
+ const ALLOWED_LOG_LEVELS = new Set([
18
+ 'debug',
19
+ 'info',
20
+ 'warn',
21
+ 'error',
22
+ ]);
23
+ function isLogLevel(value) {
24
+ return ALLOWED_LOG_LEVELS.has(value);
25
+ }
26
+ function isBelowMin(value, min) {
27
+ if (min === undefined)
28
+ return false;
29
+ return value < min;
30
+ }
31
+ function isAboveMax(value, max) {
32
+ if (max === undefined)
33
+ return false;
34
+ return value > max;
35
+ }
36
+ export function parseInteger(envValue, defaultValue, min, max) {
37
+ if (!envValue)
38
+ return defaultValue;
39
+ const parsed = parseInt(envValue, 10);
40
+ if (Number.isNaN(parsed))
41
+ return defaultValue;
42
+ if (isBelowMin(parsed, min))
43
+ return defaultValue;
44
+ if (isAboveMax(parsed, max))
45
+ return defaultValue;
46
+ return parsed;
47
+ }
48
+ export function parseBoolean(envValue, defaultValue) {
49
+ if (!envValue)
50
+ return defaultValue;
51
+ return envValue !== 'false';
52
+ }
53
+ export function parseList(envValue) {
54
+ if (!envValue)
55
+ return [];
56
+ return envValue
57
+ .split(/[\s,]+/)
58
+ .map((entry) => entry.trim())
59
+ .filter((entry) => entry.length > 0);
60
+ }
61
+ export function parseUrlEnv(value, name) {
62
+ if (!value)
63
+ return undefined;
64
+ if (!URL.canParse(value)) {
65
+ throw new Error(`Invalid ${name} value: ${value}`);
66
+ }
67
+ return new URL(value);
68
+ }
69
+ export function parseAllowedHosts(envValue) {
70
+ const hosts = new Set();
71
+ for (const entry of parseList(envValue)) {
72
+ const normalized = normalizeHostValue(entry);
73
+ if (normalized) {
74
+ hosts.add(normalized);
75
+ }
76
+ }
77
+ return hosts;
78
+ }
79
+ export function parseLogLevel(envValue) {
80
+ const level = envValue?.toLowerCase();
81
+ if (!level)
82
+ return 'info';
83
+ return isLogLevel(level) ? level : 'info';
84
+ }
@@ -1,7 +1,7 @@
1
1
  export declare const TRUNCATION_MARKER = "...[truncated]";
2
2
  export declare const CODE_BLOCK: {
3
- readonly fence: "```";
4
- readonly format: (code: string, language?: string) => string;
3
+ fence: string;
4
+ format: (code: string, language?: string) => string;
5
5
  };
6
6
  export declare const FRONTMATTER_DELIMITER = "---";
7
7
  export declare const joinLines: (lines: readonly string[]) => string;