@j0hanz/superfetch 1.1.9 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/README.md +258 -362
  2. package/dist/config/constants.d.ts +20 -0
  3. package/dist/config/constants.d.ts.map +1 -0
  4. package/dist/config/constants.js +25 -0
  5. package/dist/config/constants.js.map +1 -0
  6. package/dist/config/formatting.d.ts +0 -1
  7. package/dist/config/formatting.d.ts.map +1 -1
  8. package/dist/config/formatting.js +1 -1
  9. package/dist/config/formatting.js.map +1 -1
  10. package/dist/config/index.d.ts +8 -1
  11. package/dist/config/index.d.ts.map +1 -1
  12. package/dist/config/index.js +14 -5
  13. package/dist/config/index.js.map +1 -1
  14. package/dist/config/types/content.d.ts +1 -19
  15. package/dist/config/types/content.d.ts.map +1 -1
  16. package/dist/config/types/runtime.d.ts +7 -4
  17. package/dist/config/types/runtime.d.ts.map +1 -1
  18. package/dist/config/types/tools.d.ts +5 -49
  19. package/dist/config/types/tools.d.ts.map +1 -1
  20. package/dist/http/auth.d.ts.map +1 -1
  21. package/dist/http/auth.js +17 -12
  22. package/dist/http/auth.js.map +1 -1
  23. package/dist/http/cors.js +4 -0
  24. package/dist/http/cors.js.map +1 -1
  25. package/dist/http/download-routes.d.ts +15 -0
  26. package/dist/http/download-routes.d.ts.map +1 -0
  27. package/dist/http/download-routes.js +132 -0
  28. package/dist/http/download-routes.js.map +1 -0
  29. package/dist/http/mcp-routes.d.ts +1 -1
  30. package/dist/http/mcp-routes.d.ts.map +1 -1
  31. package/dist/http/mcp-routes.js +1 -1
  32. package/dist/http/mcp-routes.js.map +1 -1
  33. package/dist/http/mcp-session-helpers.d.ts +14 -0
  34. package/dist/http/mcp-session-helpers.d.ts.map +1 -0
  35. package/dist/http/mcp-session-helpers.js +65 -0
  36. package/dist/http/mcp-session-helpers.js.map +1 -0
  37. package/dist/http/mcp-session.d.ts +0 -1
  38. package/dist/http/mcp-session.d.ts.map +1 -1
  39. package/dist/http/mcp-session.js +7 -70
  40. package/dist/http/mcp-session.js.map +1 -1
  41. package/dist/http/server-middleware.d.ts +10 -0
  42. package/dist/http/server-middleware.d.ts.map +1 -0
  43. package/dist/http/server-middleware.js +56 -0
  44. package/dist/http/server-middleware.js.map +1 -0
  45. package/dist/http/server.d.ts.map +1 -1
  46. package/dist/http/server.js +20 -98
  47. package/dist/http/server.js.map +1 -1
  48. package/dist/http/session-cleanup.d.ts +3 -0
  49. package/dist/http/session-cleanup.d.ts.map +1 -0
  50. package/dist/http/session-cleanup.js +38 -0
  51. package/dist/http/session-cleanup.js.map +1 -0
  52. package/dist/index.js +13 -5
  53. package/dist/index.js.map +1 -1
  54. package/dist/resources/cached-content.d.ts.map +1 -1
  55. package/dist/resources/cached-content.js +76 -11
  56. package/dist/resources/cached-content.js.map +1 -1
  57. package/dist/services/cache.d.ts +6 -2
  58. package/dist/services/cache.d.ts.map +1 -1
  59. package/dist/services/cache.js +86 -25
  60. package/dist/services/cache.js.map +1 -1
  61. package/dist/services/context.d.ts +2 -1
  62. package/dist/services/context.d.ts.map +1 -1
  63. package/dist/services/extractor.d.ts.map +1 -1
  64. package/dist/services/extractor.js +45 -17
  65. package/dist/services/extractor.js.map +1 -1
  66. package/dist/services/fetcher/agents.d.ts.map +1 -1
  67. package/dist/services/fetcher/agents.js +3 -6
  68. package/dist/services/fetcher/agents.js.map +1 -1
  69. package/dist/services/fetcher/headers.d.ts.map +1 -1
  70. package/dist/services/fetcher/headers.js +2 -24
  71. package/dist/services/fetcher/headers.js.map +1 -1
  72. package/dist/services/fetcher/interceptors.d.ts +2 -1
  73. package/dist/services/fetcher/interceptors.d.ts.map +1 -1
  74. package/dist/services/fetcher/interceptors.js +30 -20
  75. package/dist/services/fetcher/interceptors.js.map +1 -1
  76. package/dist/services/fetcher/redirects.d.ts +0 -1
  77. package/dist/services/fetcher/redirects.d.ts.map +1 -1
  78. package/dist/services/fetcher/redirects.js +19 -16
  79. package/dist/services/fetcher/redirects.js.map +1 -1
  80. package/dist/services/fetcher/retry-policy.d.ts +1 -27
  81. package/dist/services/fetcher/retry-policy.d.ts.map +1 -1
  82. package/dist/services/fetcher/retry-policy.js +119 -125
  83. package/dist/services/fetcher/retry-policy.js.map +1 -1
  84. package/dist/services/fetcher.d.ts.map +1 -1
  85. package/dist/services/fetcher.js +15 -9
  86. package/dist/services/fetcher.js.map +1 -1
  87. package/dist/services/parser.d.ts +0 -1
  88. package/dist/services/parser.d.ts.map +1 -1
  89. package/dist/services/parser.js +5 -38
  90. package/dist/services/parser.js.map +1 -1
  91. package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -1
  92. package/dist/tools/handlers/fetch-links/link-extractor.js +15 -19
  93. package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -1
  94. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  95. package/dist/tools/handlers/fetch-links.tool.js +0 -2
  96. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  97. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  98. package/dist/tools/handlers/fetch-markdown.tool.js +16 -17
  99. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  100. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -2
  101. package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -1
  102. package/dist/tools/handlers/fetch-single.shared.js +61 -2
  103. package/dist/tools/handlers/fetch-single.shared.js.map +1 -1
  104. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  105. package/dist/tools/handlers/fetch-url.tool.js +3 -14
  106. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  107. package/dist/tools/handlers/fetch-urls/validation.d.ts +0 -1
  108. package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -1
  109. package/dist/tools/handlers/fetch-urls/validation.js +1 -1
  110. package/dist/tools/handlers/fetch-urls/validation.js.map +1 -1
  111. package/dist/tools/index.d.ts.map +1 -1
  112. package/dist/tools/index.js +1 -19
  113. package/dist/tools/index.js.map +1 -1
  114. package/dist/tools/schemas.d.ts +44 -236
  115. package/dist/tools/schemas.d.ts.map +1 -1
  116. package/dist/tools/schemas.js +38 -197
  117. package/dist/tools/schemas.js.map +1 -1
  118. package/dist/tools/utils/cache-vary.d.ts +0 -1
  119. package/dist/tools/utils/cache-vary.d.ts.map +1 -1
  120. package/dist/tools/utils/cache-vary.js +11 -25
  121. package/dist/tools/utils/cache-vary.js.map +1 -1
  122. package/dist/tools/utils/common.d.ts +1 -2
  123. package/dist/tools/utils/common.d.ts.map +1 -1
  124. package/dist/tools/utils/common.js.map +1 -1
  125. package/dist/tools/utils/content-transform.d.ts.map +1 -1
  126. package/dist/tools/utils/content-transform.js +28 -13
  127. package/dist/tools/utils/content-transform.js.map +1 -1
  128. package/dist/tools/utils/fetch-pipeline.js +14 -3
  129. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  130. package/dist/tools/utils/inline-content.d.ts +3 -2
  131. package/dist/tools/utils/inline-content.d.ts.map +1 -1
  132. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  133. package/dist/transformers/markdown.transformer.js +3 -6
  134. package/dist/transformers/markdown.transformer.js.map +1 -1
  135. package/dist/utils/code-language.d.ts +3 -0
  136. package/dist/utils/code-language.d.ts.map +1 -0
  137. package/dist/utils/code-language.js +57 -0
  138. package/dist/utils/code-language.js.map +1 -0
  139. package/dist/utils/content-cleaner.d.ts +0 -1
  140. package/dist/utils/content-cleaner.d.ts.map +1 -1
  141. package/dist/utils/content-cleaner.js +0 -3
  142. package/dist/utils/content-cleaner.js.map +1 -1
  143. package/dist/utils/crypto.d.ts +3 -0
  144. package/dist/utils/crypto.d.ts.map +1 -0
  145. package/dist/utils/crypto.js +33 -0
  146. package/dist/utils/crypto.js.map +1 -0
  147. package/dist/utils/download-url.d.ts +9 -0
  148. package/dist/utils/download-url.d.ts.map +1 -0
  149. package/dist/utils/download-url.js +28 -0
  150. package/dist/utils/download-url.js.map +1 -0
  151. package/dist/utils/error-utils.d.ts +4 -0
  152. package/dist/utils/error-utils.d.ts.map +1 -0
  153. package/dist/utils/error-utils.js +14 -0
  154. package/dist/utils/error-utils.js.map +1 -0
  155. package/dist/utils/filename-generator.d.ts +2 -0
  156. package/dist/utils/filename-generator.d.ts.map +1 -0
  157. package/dist/utils/filename-generator.js +60 -0
  158. package/dist/utils/filename-generator.js.map +1 -0
  159. package/dist/utils/header-normalizer.d.ts +7 -3
  160. package/dist/utils/header-normalizer.d.ts.map +1 -1
  161. package/dist/utils/header-normalizer.js +23 -16
  162. package/dist/utils/header-normalizer.js.map +1 -1
  163. package/dist/utils/tool-error-handler.d.ts +0 -1
  164. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  165. package/dist/utils/tool-error-handler.js +11 -5
  166. package/dist/utils/tool-error-handler.js.map +1 -1
  167. package/dist/utils/url-sanitizer.d.ts +2 -0
  168. package/dist/utils/url-sanitizer.d.ts.map +1 -0
  169. package/dist/utils/url-sanitizer.js +12 -0
  170. package/dist/utils/url-sanitizer.js.map +1 -0
  171. package/dist/utils/url-validator.d.ts.map +1 -1
  172. package/dist/utils/url-validator.js +46 -44
  173. package/dist/utils/url-validator.js.map +1 -1
  174. package/package.json +4 -6
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
- # 🚀 superFetch MCP Server
1
+ # superFetch MCP Server
2
2
 
3
3
  <img src="docs/logo.png" alt="SuperFetch MCP Logo" width="200">
4
4
 
5
- [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-≥20.0.0-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
5
+ [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-%3E=20.12-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
6
6
 
7
7
  ## One-Click Install
8
8
 
@@ -10,71 +10,64 @@
10
10
 
11
11
  [![Install in Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
12
12
 
13
- A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches, extracts, and transforms web content into AI-optimized formats using Mozilla Readability.
13
+ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly JSONL or Markdown.
14
14
 
15
- [Quick Start](#quick-start) · [How to Choose a Tool](#-how-to-choose-a-tool) · [Tools](#available-tools) · [Configuration](#configuration) · [Contributing](#contributing)
15
+ [Quick Start](#quick-start) | [How to Choose a Tool](#how-to-choose-a-tool) | [Tools](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
16
16
 
17
- > 📦 **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** Search for `io.github.j0hanz/superfetch`
17
+ > **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
18
18
 
19
19
  ---
20
20
 
21
21
  > [!CAUTION]
22
22
  > This server can access URLs on behalf of AI assistants. Built-in SSRF protection blocks private IP ranges and cloud metadata endpoints, but exercise caution when deploying in sensitive environments.
23
23
 
24
- ## Features
24
+ ## Features
25
25
 
26
- | Feature | Description |
27
- | ------------------------- | ------------------------------------------------------------- |
28
- | 🧠 **Smart Extraction** | Mozilla Readability removes ads, navigation, and boilerplate |
29
- | 📄 **Multiple Formats** | JSONL semantic blocks or clean Markdown with YAML frontmatter |
30
- | 🔗 **Link Discovery** | Extract and classify internal/external links |
31
- | ⚡ **Built-in Caching** | Configurable TTL and max entries |
32
- | 🛡️ **Security First** | SSRF protection, URL validation, header sanitization |
33
- | 🔄 **Resilient Fetching** | Exponential backoff with jitter |
34
- | 📊 **Monitoring** | Stats resource for cache performance and health |
26
+ | Feature | Description |
27
+ | ------------------ | ------------------------------------------------------------------------- |
28
+ | Smart extraction | Mozilla Readability removes ads, navigation, and boilerplate when enabled |
29
+ | JSONL + Markdown | JSONL semantic blocks or clean Markdown with frontmatter |
30
+ | Structured blocks | Headings, paragraphs, lists, code, tables, images, blockquotes |
31
+ | Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
32
+ | Resilient fetching | Redirect handling plus retry with exponential backoff + jitter |
33
+ | Security first | URL validation, SSRF/DNS/IP blocklists, header sanitization |
34
+ | HTTP mode | API key auth, session management, rate limiting, CORS |
35
35
 
36
36
  ---
37
37
 
38
- ## 🎯 How to Choose a Tool
38
+ ## How to Choose a Tool
39
39
 
40
- Use this guide to select the right tool for your web content extraction needs:
40
+ Use this guide to select the right tool for your web content extraction needs.
41
41
 
42
42
  ### Decision Tree
43
43
 
44
44
  ```text
45
45
  Need web content for AI?
46
- ├─ Single URL?
47
- │ ├─ Need structured semantic blocks fetch-url (JSONL)
48
- │ ├─ Need readable markdown fetch-markdown
49
- │ └─ Need links only → fetch-links
50
- └─ Multiple URLs?
51
- └─ Use fetch-urls (batch processing)
46
+ - Want structured JSONL blocks -> fetch-url (format: jsonl)
47
+ - Want clean Markdown -> fetch-markdown
48
+ - Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
52
49
  ```
53
50
 
54
51
  ### Quick Reference Table
55
52
 
56
- | Tool | Best For | Output Format | Use When |
57
- | ---------------- | -------------------------------- | ----------------------- | ------------------------------------------- |
58
- | `fetch-url` | Single page structured content | JSONL semantic blocks | AI analysis, RAG pipelines, content parsing |
59
- | `fetch-markdown` | Single page readable format | Clean Markdown + TOC | Documentation, human-readable output |
60
- | `fetch-links` | Link discovery & classification | URL array with types | Sitemap building, finding related pages |
61
- | `fetch-urls` | Batch processing multiple pages | Multiple JSONL/Markdown | Comparing pages, bulk extraction |
53
+ | Tool | Best For | Output Format | Use When |
54
+ | ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
55
+ | `fetch-url` | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
56
+ | `fetch-markdown` | Single page in readable format | Markdown + frontmatter | Documentation, summaries, human review |
62
57
 
63
58
  ### Common Use Cases
64
59
 
65
60
  | Task | Recommended Tool | Why |
66
61
  | ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
67
62
  | Parse a blog post for AI | `fetch-url` | Returns semantic blocks (headings, paragraphs, code) |
68
- | Generate documentation | `fetch-markdown` | Clean markdown with optional TOC |
69
- | Build a sitemap | `fetch-links` | Extracts and classifies all links |
70
- | Compare multiple docs | `fetch-urls` | Parallel fetching with concurrency control |
63
+ | Generate documentation | `fetch-markdown` | Clean markdown with frontmatter |
71
64
  | Extract article for RAG | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content |
72
65
 
73
66
  ---
74
67
 
75
68
  ## Quick Start
76
69
 
77
- Add superFetch to your MCP client configuration no installation required!
70
+ Add superFetch to your MCP client configuration - no installation required.
78
71
 
79
72
  ### Claude Desktop
80
73
 
@@ -126,7 +119,7 @@ Configure SuperFetch behavior by adding environment variables to the `env` prope
126
119
  }
127
120
  ```
128
121
 
129
- See [Configuration](#configuration) section below for all available options and presets.
122
+ See [Configuration](#configuration) for all available options.
130
123
 
131
124
  ### Cursor
132
125
 
@@ -146,7 +139,7 @@ See [Configuration](#configuration) section below for all available options and
146
139
  }
147
140
  ```
148
141
 
149
- > **Tip:** On Windows, if you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
142
+ > **Tip (Windows):** If you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
150
143
 
151
144
  <details>
152
145
  <summary><strong>Codex IDE</strong></summary>
@@ -170,7 +163,7 @@ args = ["-y", "@j0hanz/superfetch@latest", "--stdio"]
170
163
  env = { CACHE_TTL = "7200", LOG_LEVEL = "debug", FETCH_TIMEOUT = "60000" }
171
164
  ```
172
165
 
173
- > **Access config file:** Click the gear icon "Codex Settings &gt; Open config.toml"
166
+ > **Access config file:** Click the gear icon -> "Codex Settings > Open config.toml"
174
167
  >
175
168
  > **Documentation:** [Codex MCP Guide](https://codex.com/docs/mcp)
176
169
 
@@ -261,7 +254,7 @@ npm install -g @j0hanz/superfetch
261
254
  # Run in stdio mode
262
255
  superfetch --stdio
263
256
 
264
- # Run HTTP server
257
+ # Run HTTP server (requires API_KEY)
265
258
  superfetch
266
259
  ```
267
260
 
@@ -277,216 +270,263 @@ npm run build
277
270
  ### Running the Server
278
271
 
279
272
  <details>
280
- <summary><strong>HTTP Mode</strong> (default)</summary>
273
+ <summary><strong>stdio Mode</strong> (direct MCP integration)</summary>
281
274
 
282
275
  ```bash
283
- # Development with hot reload
284
- npm run dev
285
-
286
- # Production
287
- npm start
276
+ node dist/index.js --stdio
288
277
  ```
289
278
 
290
- Server runs at `http://127.0.0.1:3000`:
291
-
292
- - Health check: `GET /health`
293
- - MCP endpoint: `POST /mcp`
294
-
295
279
  </details>
296
280
 
297
281
  <details>
298
- <summary><strong>stdio Mode</strong> (direct MCP integration)</summary>
282
+ <summary><strong>HTTP Mode</strong> (default)</summary>
283
+
284
+ HTTP mode requires `API_KEY` and only binds to loopback addresses unless `ALLOW_REMOTE=true`.
299
285
 
300
286
  ```bash
301
- node dist/index.js --stdio
287
+ API_KEY=supersecret npx -y @j0hanz/superfetch@latest
288
+ # Server runs at http://127.0.0.1:3000
302
289
  ```
303
290
 
291
+ **Windows (PowerShell):**
292
+
293
+ ```powershell
294
+ $env:API_KEY = "supersecret"
295
+ npx -y @j0hanz/superfetch@latest
296
+ ```
297
+
298
+ Endpoints (all require `Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`):
299
+
300
+ - `GET /health`
301
+ - `POST /mcp`
302
+ - `GET /mcp` (SSE stream)
303
+ - `DELETE /mcp`
304
+ - `GET /mcp/downloads/:namespace/:hash`
305
+
306
+ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#http-mode-details)).
307
+
304
308
  </details>
305
309
 
306
310
  ---
307
311
 
308
312
  ## Available Tools
309
313
 
310
- > **Note:** If extracted content exceeds `MAX_INLINE_CONTENT_CHARS`, the tool response includes a `resourceUri` and a `resource_link` content block instead of embedding the full text.
314
+ ### Tool Response Notes
315
+
316
+ Both tools return:
317
+
318
+ - `structuredContent` for machine-readable fields
319
+ - `content` blocks that include:
320
+ - a `text` block containing JSON of `structuredContent`
321
+ - a `resource` block with a `file:///...` URI containing the full content (stdio-friendly)
322
+ - a `resource_link` block when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled
323
+
324
+ If content is too large and cache is disabled, the server truncates output and appends `...[truncated]`.
325
+
326
+ ---
311
327
 
312
328
  ### `fetch-url`
313
329
 
314
- Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks.
330
+ Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
315
331
 
316
- | Parameter | Type | Default | Description |
317
- | -------------------- | ------- | ---------- | -------------------------------------------- |
318
- | `url` | string | _required_ | URL to fetch |
319
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
320
- | `includeMetadata` | boolean | `true` | Include page metadata (title, description) |
321
- | `maxContentLength` | number | | Maximum content length in characters |
322
- | `customHeaders` | object | | Custom HTTP headers for the request |
323
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
324
- | `retries` | number | `3` | Number of retry attempts (1-10) |
332
+ | Parameter | Type | Default | Description |
333
+ | -------------------- | --------------------- | --------- | --------------------------------------------- |
334
+ | `url` | string | required | URL to fetch |
335
+ | `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
336
+ | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
337
+ | `includeMetadata` | boolean | `true` | Include page metadata |
338
+ | `maxContentLength` | number | - | Maximum content length in characters |
339
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
340
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
341
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
325
342
 
326
- **Example Response:**
343
+ **Example `structuredContent`:**
327
344
 
328
345
  ```json
329
346
  {
330
347
  "url": "https://example.com/article",
331
348
  "title": "Example Article",
349
+ "contentBlocks": 42,
332
350
  "fetchedAt": "2025-12-11T10:30:00.000Z",
333
- "contentBlocks": [
334
- {
335
- "type": "metadata",
336
- "title": "Example Article",
337
- "description": "A sample article"
338
- },
339
- { "type": "heading", "level": 1, "text": "Introduction" },
340
- {
341
- "type": "paragraph",
342
- "text": "This is the main content of the article..."
343
- },
344
- {
345
- "type": "code",
346
- "language": "javascript",
347
- "content": "console.log('Hello');"
348
- }
349
- ],
350
- "cached": false
351
+ "format": "jsonl",
352
+ "contentSize": 12345,
353
+ "cached": false,
354
+ "content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
351
355
  }
352
356
  ```
353
357
 
354
- ### `fetch-links`
358
+ ---
359
+
360
+ ### `fetch-markdown`
355
361
 
356
- Extracts hyperlinks from a webpage with classification. Supports filtering, image links, and link limits.
362
+ Fetches a webpage and converts it to clean Markdown with optional frontmatter.
357
363
 
358
- | Parameter | Type | Default | Description |
359
- | ----------------- | ------- | ---------- | -------------------------------------------- |
360
- | `url` | string | _required_ | URL to extract links from |
361
- | `includeExternal` | boolean | `true` | Include external links |
362
- | `includeInternal` | boolean | `true` | Include internal links |
363
- | `includeImages` | boolean | `false` | Include image links (img src attributes) |
364
- | `maxLinks` | number | | Maximum number of links to return (1-1000) |
365
- | `filterPattern` | string | | Regex pattern to filter links (matches href) |
366
- | `customHeaders` | object | | Custom HTTP headers for the request |
367
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
368
- | `retries` | number | `3` | Number of retry attempts (1-10) |
364
+ | Parameter | Type | Default | Description |
365
+ | -------------------- | ------- | -------- | --------------------------------------------- |
366
+ | `url` | string | required | URL to fetch |
367
+ | `extractMainContent` | boolean | `true` | Extract main content only |
368
+ | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
369
+ | `maxContentLength` | number | - | Maximum content length in characters |
370
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
371
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
372
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
369
373
 
370
- **Example Response:**
374
+ **Example `structuredContent`:**
371
375
 
372
376
  ```json
373
377
  {
374
- "url": "https://example.com/",
375
- "linkCount": 15,
376
- "links": [
377
- {
378
- "href": "https://example.com/about",
379
- "text": "About Us",
380
- "type": "internal"
381
- },
382
- {
383
- "href": "https://github.com/example",
384
- "text": "GitHub",
385
- "type": "external"
386
- },
387
- { "href": "https://example.com/logo.png", "text": "", "type": "image" }
388
- ],
378
+ "url": "https://example.com/docs",
379
+ "title": "Documentation",
380
+ "fetchedAt": "2025-12-11T10:30:00.000Z",
381
+ "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
382
+ "contentSize": 9876,
389
383
  "cached": false,
390
- "truncated": false
384
+ "truncated": false,
385
+ "file": {
386
+ "downloadUrl": "/mcp/downloads/markdown/abc123def456",
387
+ "fileName": "documentation.md",
388
+ "expiresAt": "2025-12-11T11:30:00.000Z"
389
+ }
391
390
  }
392
391
  ```
393
392
 
394
- ### `fetch-markdown`
393
+ `file` is included only in HTTP mode when content is cached and too large to inline.
395
394
 
396
- Fetches a webpage and converts it to clean Markdown with optional table of contents.
395
+ ---
397
396
 
398
- | Parameter | Type | Default | Description |
399
- | -------------------- | ------- | ---------- | -------------------------------------------- |
400
- | `url` | string | _required_ | URL to fetch |
401
- | `extractMainContent` | boolean | `true` | Extract main content only |
402
- | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
403
- | `maxContentLength` | number | – | Maximum content length in characters |
404
- | `generateToc` | boolean | `false` | Generate table of contents from headings |
405
- | `customHeaders` | object | – | Custom HTTP headers for the request |
406
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
407
- | `retries` | number | `3` | Number of retry attempts (1-10) |
397
+ ### Large Content Handling
408
398
 
409
- **Example Response:**
399
+ - Inline limit: `MAX_INLINE_CONTENT_CHARS` (default `20000`).
400
+ - If content exceeds the limit and cache is enabled, responses include `resourceUri` and a `resource_link` block.
401
+ - If cache is disabled, content is truncated with `...[truncated]`.
402
+ - Use `maxContentLength` per request to enforce a lower limit.
410
403
 
411
- ````json
412
- {
413
- "url": "https://example.com/docs",
414
- "title": "Documentation",
415
- "fetchedAt": "2025-12-11T10:30:00.000Z",
416
- "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome to our documentation...\n\n## Installation\n\n```bash\nnpm install example\n```",
417
- "toc": [
418
- { "level": 1, "text": "Getting Started", "slug": "getting-started" },
419
- { "level": 2, "text": "Installation", "slug": "installation" }
420
- ],
421
- "cached": false,
422
- "truncated": false
423
- }
424
- ````
404
+ ---
425
405
 
426
- ### `fetch-urls` (Batch)
406
+ ## Resources
427
407
 
428
- Fetches multiple URLs in parallel with concurrency control. Ideal for comparing content or processing multiple pages efficiently.
408
+ | URI | Description |
409
+ | ------------------------------------------ | ----------------------------------------------------- |
410
+ | `superfetch://health` | Real-time server health and memory checks |
411
+ | `superfetch://stats` | Server stats and cache metrics |
412
+ | `superfetch://cache/list` | List cached entries and their resource URIs |
413
+ | `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`) |
429
414
 
430
- | Parameter | Type | Default | Description |
431
- | -------------------- | -------- | ---------- | -------------------------------------------- |
432
- | `urls` | string[] | _required_ | Array of URLs to fetch (1-10 URLs) |
433
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
434
- | `includeMetadata` | boolean | `true` | Include page metadata |
435
- | `maxContentLength` | number | – | Maximum content length per URL in characters |
436
- | `format` | string | `'jsonl'` | Output format: `'jsonl'` or `'markdown'` |
437
- | `concurrency` | number | `3` | Maximum concurrent requests (1-5) |
438
- | `continueOnError` | boolean | `true` | Continue processing if some URLs fail |
439
- | `customHeaders` | object | – | Custom HTTP headers for all requests |
440
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
441
- | `retries` | number | `3` | Number of retry attempts (1-10) |
415
+ Resource subscriptions notify clients when cache entries update.
442
416
 
443
- **Example Output:**
417
+ ---
444
418
 
445
- ```json
446
- {
447
- "results": [
448
- {
449
- "url": "https://example.com",
450
- "success": true,
451
- "title": "Example",
452
- "content": "...",
453
- "cached": false
454
- },
455
- {
456
- "url": "https://example.org",
457
- "success": true,
458
- "title": "Example Org",
459
- "content": "...",
460
- "cached": false
461
- }
462
- ],
463
- "summary": {
464
- "total": 2,
465
- "successful": 2,
466
- "failed": 0,
467
- "cached": 0,
468
- "totalContentBlocks": 15
469
- },
470
- "fetchedAt": "2024-12-11T10:30:00.000Z"
471
- }
419
+ ## Download Endpoint (HTTP Mode)
420
+
421
+ When running in HTTP mode, cached content can be downloaded directly.
422
+
423
+ ### Endpoint
424
+
425
+ ```text
426
+ GET /mcp/downloads/:namespace/:hash
472
427
  ```
473
428
 
474
- ### Resources
429
+ - `namespace`: `markdown` or `url`
430
+ - Auth required (`Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`)
431
+
432
+ ### Response Headers
433
+
434
+ | Header | Value |
435
+ | --------------------- | ----------------------------------------------------------------------- |
436
+ | `Content-Type` | `text/markdown; charset=utf-8` or `application/x-ndjson; charset=utf-8` |
437
+ | `Content-Disposition` | `attachment; filename="<name>"` |
438
+ | `Cache-Control` | `private, max-age=<CACHE_TTL>` |
475
439
 
476
- | URI | Description |
477
- | --------------------- | --------------------------------------------------- |
478
- | `superfetch://stats` | Server statistics and cache metrics |
479
- | `superfetch://health` | Real-time server health and dependency status |
480
- | Dynamic resources | Cached content available via resource subscriptions |
440
+ ### Example Usage
441
+
442
+ ```bash
443
+ curl -H "Authorization: Bearer $API_KEY" \
444
+ http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
445
+ -o article.md
446
+ ```
447
+
448
+ ### Error Responses
449
+
450
+ | Status | Code | Description |
451
+ | ------ | --------------------- | -------------------------------- |
452
+ | 400 | `BAD_REQUEST` | Invalid namespace or hash format |
453
+ | 404 | `NOT_FOUND` | Content not found or expired |
454
+ | 503 | `SERVICE_UNAVAILABLE` | Download service disabled |
481
455
 
482
456
  ---
483
457
 
484
458
  ## Configuration
485
459
 
460
+ Configure SuperFetch behavior by adding environment variables to your MCP client configuration's `env` property.
461
+
462
+ ### Fetcher Settings
463
+
464
+ | Variable | Default | Valid Values | Description |
465
+ | --------------- | -------------------- | -------------------- | ------------------------------- |
466
+ | `FETCH_TIMEOUT` | `30000` | `5000`-`120000` | Request timeout in milliseconds |
467
+ | `USER_AGENT` | `superFetch-MCP/1.0` | Any valid user agent | Custom user agent |
468
+
469
+ ### Cache Settings
470
+
471
+ | Variable | Default | Valid Values | Description |
472
+ | ---------------- | ------- | ---------------- | ------------------------- |
473
+ | `CACHE_ENABLED` | `true` | `true` / `false` | Enable response caching |
474
+ | `CACHE_TTL` | `3600` | `60`-`86400` | Cache lifetime in seconds |
475
+ | `CACHE_MAX_KEYS` | `100` | `10`-`1000` | Maximum cached entries |
476
+
477
+ ### Output Settings
478
+
479
+ | Variable | Default | Valid Values | Description |
480
+ | -------------------------- | ------- | --------------- | ----------------------------------------- |
481
+ | `MAX_INLINE_CONTENT_CHARS` | `20000` | `1000`-`200000` | Inline content limit before resource_link |
482
+
483
+ ### Logging Settings
484
+
485
+ | Variable | Default | Valid Values | Description |
486
+ | ---------------- | ------- | ----------------------------------- | ---------------------- |
487
+ | `LOG_LEVEL` | `info` | `debug` / `info` / `warn` / `error` | Logging verbosity |
488
+ | `ENABLE_LOGGING` | `true` | `true` / `false` | Enable/disable logging |
489
+
490
+ ### Extraction Settings
491
+
492
+ | Variable | Default | Valid Values | Description |
493
+ | ---------------------- | ------- | ---------------- | --------------------------------------- |
494
+ | `EXTRACT_MAIN_CONTENT` | `true` | `true` / `false` | Use Readability to extract main content |
495
+ | `INCLUDE_METADATA` | `true` | `true` / `false` | Include metadata/frontmatter |
496
+
497
+ ### HTTP Server Settings
498
+
499
+ | Variable | Default | Description |
500
+ | ------------------------- | ----------- | -------------------------------------------- |
501
+ | `API_KEY` | - | **Required for HTTP mode** |
502
+ | `HOST` | `127.0.0.1` | HTTP server host |
503
+ | `PORT` | `3000` | HTTP server port |
504
+ | `ALLOW_REMOTE` | `false` | Allow binding to non-loopback interfaces |
505
+ | `TRUST_PROXY` | `false` | Trust proxy headers for client IP resolution |
506
+ | `SESSION_TTL_MS` | `1800000` | Session TTL in milliseconds (30 min) |
507
+ | `SESSION_INIT_TIMEOUT_MS` | `10000` | Time allowed for session initialization |
508
+ | `MAX_SESSIONS` | `200` | Maximum active sessions |
509
+
510
+ ### CORS Settings
511
+
512
+ | Variable | Default | Description |
513
+ | ----------------- | ------- | --------------------------------------- |
514
+ | `ALLOWED_ORIGINS` | `[]` | Comma-separated list of allowed origins |
515
+ | `CORS_ALLOW_ALL` | `false` | Allow all origins (dev only) |
516
+
517
+ ### Rate Limiting
518
+
519
+ | Variable | Default | Valid Values | Description |
520
+ | ----------------------- | ------- | ----------------- | ------------------------------------ |
521
+ | `RATE_LIMIT_ENABLED` | `true` | `true` / `false` | Enable/disable HTTP rate limiting |
522
+ | `RATE_LIMIT_MAX` | `100` | `1`-`10000` | Max requests per window per IP |
523
+ | `RATE_LIMIT_WINDOW_MS` | `60000` | `1000`-`3600000` | Rate limit window in milliseconds |
524
+ | `RATE_LIMIT_CLEANUP_MS` | `60000` | `10000`-`3600000` | Cleanup interval for limiter entries |
525
+
486
526
  ### Configuration Presets
487
527
 
488
528
  <details open>
489
- <summary><strong>Default (Recommended)</strong> No configuration needed</summary>
529
+ <summary><strong>Default (Recommended)</strong> - No configuration needed</summary>
490
530
 
491
531
  ```json
492
532
  {
@@ -502,9 +542,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
502
542
  </details>
503
543
 
504
544
  <details>
505
- <summary><strong>Debug Mode</strong> Verbose logging and no cache</summary>
506
-
507
- **VS Code** (`.vscode/mcp.json`):
545
+ <summary><strong>Debug Mode</strong> - Verbose logging and no cache</summary>
508
546
 
509
547
  ```json
510
548
  {
@@ -521,44 +559,10 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
521
559
  }
522
560
  ```
523
561
 
524
- **Claude Desktop** (`claude_desktop_config.json`):
525
-
526
- ```json
527
- {
528
- "mcpServers": {
529
- "superFetch": {
530
- "command": "npx",
531
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
532
- "env": {
533
- "LOG_LEVEL": "debug",
534
- "CACHE_ENABLED": "false"
535
- }
536
- }
537
- }
538
- }
539
- ```
540
-
541
- **Cursor** (MCP settings):
542
-
543
- ```json
544
- {
545
- "mcpServers": {
546
- "superFetch": {
547
- "command": "npx",
548
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
549
- "env": {
550
- "LOG_LEVEL": "debug",
551
- "CACHE_ENABLED": "false"
552
- }
553
- }
554
- }
555
- }
556
- ```
557
-
558
562
  </details>
559
563
 
560
564
  <details>
561
- <summary><strong>Performance Mode</strong> Aggressive caching for speed</summary>
565
+ <summary><strong>Performance Mode</strong> - Aggressive caching for speed</summary>
562
566
 
563
567
  ```json
564
568
  {
@@ -579,7 +583,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
579
583
  </details>
580
584
 
581
585
  <details>
582
- <summary><strong>Custom User Agent</strong> For sites that block bots</summary>
586
+ <summary><strong>Custom User Agent</strong> - For sites that block bots</summary>
583
587
 
584
588
  ```json
585
589
  {
@@ -598,7 +602,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
598
602
  </details>
599
603
 
600
604
  <details>
601
- <summary><strong>Slow Networks / CI/CD</strong> Extended timeouts</summary>
605
+ <summary><strong>Slow Networks / CI</strong> - Extended timeouts</summary>
602
606
 
603
607
  ```json
604
608
  {
@@ -618,118 +622,17 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
618
622
 
619
623
  </details>
620
624
 
621
- ### Available Environment Variables
622
-
623
- Configure SuperFetch behavior by adding environment variables to your MCP client configuration's `env` property.
624
-
625
- #### 🌐 Fetcher Settings
626
-
627
- | Variable | Default | Valid Values | Description |
628
- | --------------- | -------------------- | -------------------- | --------------------------------------------------------------- |
629
- | `FETCH_TIMEOUT` | `30000` | `5000`-`120000` | Request timeout in milliseconds (5s-2min) |
630
- | `USER_AGENT` | `superFetch-MCP/1.0` | Any valid user agent | Custom user agent for requests (useful for sites blocking bots) |
631
-
632
- #### 💾 Cache Settings
633
-
634
- | Variable | Default | Valid Values | Description |
635
- | ---------------- | ------- | ---------------- | -------------------------------------- |
636
- | `CACHE_ENABLED` | `true` | `true` / `false` | Enable response caching |
637
- | `CACHE_TTL` | `3600` | `60`-`86400` | Cache lifetime in seconds (1min-24hrs) |
638
- | `CACHE_MAX_KEYS` | `100` | `10`-`1000` | Maximum number of cached entries |
639
-
640
- #### 📦 Output Settings
641
-
642
- | Variable | Default | Valid Values | Description |
643
- | -------------------------- | ------- | --------------- | --------------------------------------------------------------- |
644
- | `MAX_INLINE_CONTENT_CHARS` | `20000` | `1000`-`200000` | Inline content limit before returning a `resource_link` instead |
645
-
646
- #### 📝 Logging Settings
647
-
648
- | Variable | Default | Valid Values | Description |
649
- | ---------------- | ------- | ----------------------------------- | -------------------------- |
650
- | `LOG_LEVEL` | `info` | `debug` / `info` / `warn` / `error` | Logging verbosity level |
651
- | `ENABLE_LOGGING` | `true` | `true` / `false` | Enable/disable all logging |
652
-
653
- #### 🔍 Extraction Settings
654
-
655
- | Variable | Default | Valid Values | Description |
656
- | ---------------------- | ------- | ---------------- | -------------------------------------------------- |
657
- | `EXTRACT_MAIN_CONTENT` | `true` | `true` / `false` | Use Mozilla Readability to extract main content |
658
- | `INCLUDE_METADATA` | `true` | `true` / `false` | Include page metadata (title, description, author) |
659
-
660
- #### 🛡️ Security Settings
661
-
662
- | Variable | Default | Description |
663
- | -------------- | ------- | -------------------------------------------------------- |
664
- | `API_KEY` | - | API Key for HTTP authentication (required for HTTP mode) |
665
- | `ALLOW_REMOTE` | `false` | Allow binding to non-loopback interfaces |
666
-
667
- #### Rate Limiting
668
-
669
- | Variable | Default | Valid Values | Description |
670
- | ----------------------- | ------- | ----------------- | ------------------------------------ |
671
- | `RATE_LIMIT_ENABLED` | `true` | `true` / `false` | Enable/disable HTTP rate limiting |
672
- | `RATE_LIMIT_MAX` | `100` | `1`-`10000` | Max requests per window per IP |
673
- | `RATE_LIMIT_WINDOW_MS` | `60000` | `1000`-`3600000` | Rate limit window in milliseconds |
674
- | `RATE_LIMIT_CLEANUP_MS` | `60000` | `10000`-`3600000` | Cleanup interval for limiter entries |
675
-
676
- ### HTTP Mode Configuration
677
-
678
- <details>
679
- <summary><strong>HTTP Mode</strong> (Advanced) — For running as a standalone HTTP server</summary>
680
-
681
- SuperFetch can run as an HTTP server for custom integrations. HTTP mode requires additional configuration and an `API_KEY` for authenticated access (send `Authorization: Bearer <key>` or `X-API-Key: <key>`).
682
-
683
- #### Start HTTP Server
684
-
685
- ```bash
686
- npx -y @j0hanz/superfetch@latest
687
- # Server runs at http://127.0.0.1:3000
688
- ```
689
-
690
- #### HTTP-Specific Environment Variables
691
-
692
- | Variable | Default | Description |
693
- | ------------------------- | ----------- | ------------------------------------------------ |
694
- | `PORT` | `3000` | HTTP server port |
695
- | `HOST` | `127.0.0.1` | HTTP server host (`0.0.0.0` for Docker/K8s) |
696
- | `ALLOWED_ORIGINS` | `[]` | Comma-separated CORS origins |
697
- | `CORS_ALLOW_ALL` | `false` | Allow all CORS origins (dev only, security risk) |
698
- | `SESSION_TTL_MS` | `1800000` | Session time-to-live in milliseconds (30 mins) |
699
- | `SESSION_INIT_TIMEOUT_MS` | `10000` | Time allowed for session initialization (ms) |
700
- | `MAX_SESSIONS` | `200` | Maximum number of active sessions |
701
-
702
- #### VS Code HTTP Mode Setup
703
-
704
- ```json
705
- {
706
- "servers": {
707
- "superFetch": {
708
- "type": "http",
709
- "url": "http://127.0.0.1:3000/mcp"
710
- }
711
- }
712
- }
713
- ```
714
-
715
- #### Docker/Kubernetes Example
625
+ ---
716
626
 
717
- ```bash
718
- PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@latest
719
- ```
627
+ ## HTTP Mode Details
720
628
 
721
- </details>
629
+ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
722
630
 
723
- ### Configuration Cookbook
631
+ 1. `POST /mcp` with an `initialize` request and **no** `mcp-session-id` header.
632
+ 2. The server returns `mcp-session-id` in the response headers.
633
+ 3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
724
634
 
725
- | Use Case | Configuration |
726
- | ---------------------------- | -------------------------------------------------------------- |
727
- | 🐛 **Debugging issues** | `LOG_LEVEL=debug`, `CACHE_ENABLED=false` |
728
- | 🚀 **Maximum performance** | `CACHE_TTL=7200`, `CACHE_MAX_KEYS=500`, `LOG_LEVEL=error` |
729
- | 🌐 **Slow target sites** | `FETCH_TIMEOUT=60000` |
730
- | 🤖 **Bypass bot detection** | `USER_AGENT="Mozilla/5.0 (compatible; MyBot/1.0)"` |
731
- | 🔄 **CI/CD (always fresh)** | `CACHE_ENABLED=false`, `FETCH_TIMEOUT=60000`, `LOG_LEVEL=warn` |
732
- | 📊 **Production monitoring** | `LOG_LEVEL=warn` or `error` |
635
+ If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
733
636
 
734
637
  ---
735
638
 
@@ -737,15 +640,16 @@ PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@
737
640
 
738
641
  JSONL output includes semantic content blocks:
739
642
 
740
- | Type | Description |
741
- | ----------- | ----------------------------------------------- |
742
- | `metadata` | Page title, description, author, URL, timestamp |
743
- | `heading` | Headings (h1-h6) with level indicator |
744
- | `paragraph` | Text paragraphs |
745
- | `list` | Ordered/unordered lists |
746
- | `code` | Code blocks with language |
747
- | `table` | Tables with headers and rows |
748
- | `image` | Images with src and alt text |
643
+ | Type | Description |
644
+ | ------------ | ---------------------------------------- |
645
+ | `metadata` | Minimal page metadata (type, title, url) |
646
+ | `heading` | Headings (h1-h6) with level indicator |
647
+ | `paragraph` | Text paragraphs |
648
+ | `list` | Ordered/unordered lists |
649
+ | `code` | Code blocks with optional language |
650
+ | `table` | Tables with headers and rows |
651
+ | `image` | Images with src and alt text |
652
+ | `blockquote` | Block quote text |
749
653
 
750
654
  ---
751
655
 
@@ -753,12 +657,19 @@ JSONL output includes semantic content blocks:
753
657
 
754
658
  ### SSRF Protection
755
659
 
756
- Blocked destinations:
660
+ Blocked destinations include:
757
661
 
758
662
  - Localhost and loopback addresses
759
663
  - Private IP ranges (`10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`)
760
664
  - Cloud metadata endpoints (AWS, GCP, Azure)
761
665
  - IPv6 link-local and unique local addresses
666
+ - Internal suffixes such as `.local` and `.internal`
667
+
668
+ ### URL Validation
669
+
670
+ - Only `http` and `https` URLs
671
+ - No embedded credentials in URLs
672
+ - Max URL length: 2048 characters
762
673
 
763
674
  ### Header Sanitization
764
675
 
@@ -766,21 +677,7 @@ Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-i
766
677
 
767
678
  ### Rate Limiting
768
679
 
769
- Default: **100 requests/minute** per IP. Configure with `RATE_LIMIT_MAX` and
770
- `RATE_LIMIT_WINDOW_MS`.
771
-
772
- ### HTTP Mode Endpoints
773
-
774
- When running without `--stdio`, the following endpoints are available:
775
-
776
- | Endpoint | Method | Description |
777
- | --------- | ------ | --------------------------------------- |
778
- | `/health` | GET | Health check with uptime and version |
779
- | `/mcp` | POST | MCP request handling (requires session) |
780
- | `/mcp` | GET | SSE stream for notifications |
781
- | `/mcp` | DELETE | Close session |
782
-
783
- Sessions are managed via `mcp-session-id` header with 30-minute TTL.
680
+ Default: **100 requests/minute** per IP (HTTP mode only). Configure with `RATE_LIMIT_MAX` and `RATE_LIMIT_WINDOW_MS`.
784
681
 
785
682
  ---
786
683
 
@@ -807,14 +704,13 @@ Sessions are managed via `mcp-session-id` header with 30-minute TTL.
807
704
 
808
705
  | Category | Technology |
809
706
  | ------------------ | --------------------------------- |
810
- | Runtime | Node.js 20.0.0 |
707
+ | Runtime | Node.js >=20.12 |
811
708
  | Language | TypeScript 5.9 |
812
709
  | MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
813
710
  | Content Extraction | @mozilla/readability ^0.6.0 |
814
711
  | HTML Parsing | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
815
712
  | Markdown | Turndown ^7.2.2 |
816
- | HTTP | Express ^5.2.1, Axios ^1.7.9 |
817
- | Caching | node-cache ^5.1.2 |
713
+ | HTTP | Express ^5.2.1, undici ^6.22.0 |
818
714
  | Validation | Zod ^3.24.1 |
819
715
 
820
716
  ---