@j0hanz/superfetch 1.1.9 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/README.md +179 -469
  2. package/dist/config/constants.d.ts +19 -0
  3. package/dist/config/constants.d.ts.map +1 -0
  4. package/dist/config/constants.js +24 -0
  5. package/dist/config/constants.js.map +1 -0
  6. package/dist/config/formatting.d.ts +0 -2
  7. package/dist/config/formatting.d.ts.map +1 -1
  8. package/dist/config/formatting.js +1 -3
  9. package/dist/config/formatting.js.map +1 -1
  10. package/dist/config/index.d.ts +9 -3
  11. package/dist/config/index.d.ts.map +1 -1
  12. package/dist/config/index.js +19 -16
  13. package/dist/config/index.js.map +1 -1
  14. package/dist/config/types/content.d.ts +1 -20
  15. package/dist/config/types/content.d.ts.map +1 -1
  16. package/dist/config/types/content.js +0 -1
  17. package/dist/config/types/runtime.d.ts +7 -5
  18. package/dist/config/types/runtime.d.ts.map +1 -1
  19. package/dist/config/types/runtime.js +0 -1
  20. package/dist/config/types/tools.d.ts +5 -50
  21. package/dist/config/types/tools.d.ts.map +1 -1
  22. package/dist/config/types/tools.js +0 -1
  23. package/dist/errors/app-error.d.ts +0 -1
  24. package/dist/errors/app-error.js +0 -1
  25. package/dist/http/auth.d.ts +0 -1
  26. package/dist/http/auth.d.ts.map +1 -1
  27. package/dist/http/auth.js +17 -13
  28. package/dist/http/auth.js.map +1 -1
  29. package/dist/http/cors.d.ts +0 -1
  30. package/dist/http/cors.js +4 -1
  31. package/dist/http/cors.js.map +1 -1
  32. package/dist/http/download-routes.d.ts +14 -0
  33. package/dist/http/download-routes.d.ts.map +1 -0
  34. package/dist/http/download-routes.js +131 -0
  35. package/dist/http/download-routes.js.map +1 -0
  36. package/dist/http/mcp-routes.d.ts +1 -2
  37. package/dist/http/mcp-routes.d.ts.map +1 -1
  38. package/dist/http/mcp-routes.js +1 -2
  39. package/dist/http/mcp-routes.js.map +1 -1
  40. package/dist/http/mcp-session-helpers.d.ts +13 -0
  41. package/dist/http/mcp-session-helpers.d.ts.map +1 -0
  42. package/dist/http/mcp-session-helpers.js +64 -0
  43. package/dist/http/mcp-session-helpers.js.map +1 -0
  44. package/dist/http/mcp-session.d.ts +1 -3
  45. package/dist/http/mcp-session.d.ts.map +1 -1
  46. package/dist/http/mcp-session.js +7 -71
  47. package/dist/http/mcp-session.js.map +1 -1
  48. package/dist/http/mcp-validation.d.ts +1 -2
  49. package/dist/http/mcp-validation.d.ts.map +1 -1
  50. package/dist/http/mcp-validation.js +6 -27
  51. package/dist/http/mcp-validation.js.map +1 -1
  52. package/dist/http/rate-limit.d.ts +1 -2
  53. package/dist/http/rate-limit.d.ts.map +1 -1
  54. package/dist/http/rate-limit.js +0 -1
  55. package/dist/http/rate-limit.js.map +1 -1
  56. package/dist/http/server-middleware.d.ts +9 -0
  57. package/dist/http/server-middleware.d.ts.map +1 -0
  58. package/dist/http/server-middleware.js +111 -0
  59. package/dist/http/server-middleware.js.map +1 -0
  60. package/dist/http/server.d.ts +0 -1
  61. package/dist/http/server.d.ts.map +1 -1
  62. package/dist/http/server.js +20 -99
  63. package/dist/http/server.js.map +1 -1
  64. package/dist/http/session-cleanup.d.ts +2 -0
  65. package/dist/http/session-cleanup.d.ts.map +1 -0
  66. package/dist/http/session-cleanup.js +37 -0
  67. package/dist/http/session-cleanup.js.map +1 -0
  68. package/dist/http/sessions.d.ts +1 -2
  69. package/dist/http/sessions.d.ts.map +1 -1
  70. package/dist/http/sessions.js +0 -1
  71. package/dist/index.d.ts +0 -1
  72. package/dist/index.js +13 -6
  73. package/dist/index.js.map +1 -1
  74. package/dist/middleware/error-handler.d.ts +0 -1
  75. package/dist/middleware/error-handler.js +0 -1
  76. package/dist/resources/cached-content.d.ts +0 -1
  77. package/dist/resources/cached-content.d.ts.map +1 -1
  78. package/dist/resources/cached-content.js +76 -12
  79. package/dist/resources/cached-content.js.map +1 -1
  80. package/dist/resources/index.d.ts +0 -1
  81. package/dist/resources/index.js +0 -1
  82. package/dist/server.d.ts +0 -1
  83. package/dist/server.d.ts.map +1 -1
  84. package/dist/server.js +8 -3
  85. package/dist/server.js.map +1 -1
  86. package/dist/services/cache.d.ts +7 -4
  87. package/dist/services/cache.d.ts.map +1 -1
  88. package/dist/services/cache.js +86 -26
  89. package/dist/services/cache.js.map +1 -1
  90. package/dist/services/context.d.ts +2 -2
  91. package/dist/services/context.d.ts.map +1 -1
  92. package/dist/services/context.js +0 -1
  93. package/dist/services/extractor.d.ts +1 -2
  94. package/dist/services/extractor.d.ts.map +1 -1
  95. package/dist/services/extractor.js +45 -18
  96. package/dist/services/extractor.js.map +1 -1
  97. package/dist/services/fetcher/agents.d.ts +0 -1
  98. package/dist/services/fetcher/agents.d.ts.map +1 -1
  99. package/dist/services/fetcher/agents.js +3 -7
  100. package/dist/services/fetcher/agents.js.map +1 -1
  101. package/dist/services/fetcher/errors.d.ts +0 -1
  102. package/dist/services/fetcher/errors.js +0 -1
  103. package/dist/services/fetcher/headers.d.ts.map +1 -1
  104. package/dist/services/fetcher/headers.js +2 -24
  105. package/dist/services/fetcher/headers.js.map +1 -1
  106. package/dist/services/fetcher/interceptors.d.ts +2 -2
  107. package/dist/services/fetcher/interceptors.d.ts.map +1 -1
  108. package/dist/services/fetcher/interceptors.js +30 -21
  109. package/dist/services/fetcher/interceptors.js.map +1 -1
  110. package/dist/services/fetcher/redirects.d.ts +0 -2
  111. package/dist/services/fetcher/redirects.d.ts.map +1 -1
  112. package/dist/services/fetcher/redirects.js +20 -18
  113. package/dist/services/fetcher/redirects.js.map +1 -1
  114. package/dist/services/fetcher/response.d.ts +0 -1
  115. package/dist/services/fetcher/response.js +4 -5
  116. package/dist/services/fetcher/retry-policy.d.ts +1 -28
  117. package/dist/services/fetcher/retry-policy.d.ts.map +1 -1
  118. package/dist/services/fetcher/retry-policy.js +119 -126
  119. package/dist/services/fetcher/retry-policy.js.map +1 -1
  120. package/dist/services/fetcher.d.ts +1 -2
  121. package/dist/services/fetcher.d.ts.map +1 -1
  122. package/dist/services/fetcher.js +18 -13
  123. package/dist/services/fetcher.js.map +1 -1
  124. package/dist/services/logger.d.ts +1 -2
  125. package/dist/services/logger.d.ts.map +1 -1
  126. package/dist/services/logger.js +0 -1
  127. package/dist/services/parser.d.ts +1 -3
  128. package/dist/services/parser.d.ts.map +1 -1
  129. package/dist/services/parser.js +5 -39
  130. package/dist/services/parser.js.map +1 -1
  131. package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -1
  132. package/dist/tools/handlers/fetch-links/link-extractor.js +15 -19
  133. package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -1
  134. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  135. package/dist/tools/handlers/fetch-links.tool.js +0 -2
  136. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  137. package/dist/tools/handlers/fetch-markdown.tool.d.ts +1 -2
  138. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  139. package/dist/tools/handlers/fetch-markdown.tool.js +50 -20
  140. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  141. package/dist/tools/handlers/fetch-single.shared.d.ts +14 -3
  142. package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -1
  143. package/dist/tools/handlers/fetch-single.shared.js +66 -3
  144. package/dist/tools/handlers/fetch-single.shared.js.map +1 -1
  145. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -2
  146. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  147. package/dist/tools/handlers/fetch-url.tool.js +39 -17
  148. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  149. package/dist/tools/handlers/fetch-urls/validation.d.ts +0 -1
  150. package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -1
  151. package/dist/tools/handlers/fetch-urls/validation.js +1 -1
  152. package/dist/tools/handlers/fetch-urls/validation.js.map +1 -1
  153. package/dist/tools/index.d.ts +0 -1
  154. package/dist/tools/index.d.ts.map +1 -1
  155. package/dist/tools/index.js +1 -20
  156. package/dist/tools/index.js.map +1 -1
  157. package/dist/tools/schemas.d.ts +57 -250
  158. package/dist/tools/schemas.d.ts.map +1 -1
  159. package/dist/tools/schemas.js +38 -198
  160. package/dist/tools/schemas.js.map +1 -1
  161. package/dist/tools/utils/cache-vary.d.ts +0 -2
  162. package/dist/tools/utils/cache-vary.d.ts.map +1 -1
  163. package/dist/tools/utils/cache-vary.js +8 -40
  164. package/dist/tools/utils/cache-vary.js.map +1 -1
  165. package/dist/tools/utils/common.d.ts +2 -4
  166. package/dist/tools/utils/common.d.ts.map +1 -1
  167. package/dist/tools/utils/common.js +6 -7
  168. package/dist/tools/utils/common.js.map +1 -1
  169. package/dist/tools/utils/content-transform.d.ts +1 -3
  170. package/dist/tools/utils/content-transform.d.ts.map +1 -1
  171. package/dist/tools/utils/content-transform.js +65 -14
  172. package/dist/tools/utils/content-transform.js.map +1 -1
  173. package/dist/tools/utils/fetch-pipeline.d.ts +1 -2
  174. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  175. package/dist/tools/utils/fetch-pipeline.js +25 -21
  176. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  177. package/dist/tools/utils/inline-content.d.ts +3 -3
  178. package/dist/tools/utils/inline-content.d.ts.map +1 -1
  179. package/dist/tools/utils/inline-content.js +0 -1
  180. package/dist/transformers/jsonl.transformer.d.ts +1 -2
  181. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  182. package/dist/transformers/jsonl.transformer.js +0 -1
  183. package/dist/transformers/jsonl.transformer.js.map +1 -1
  184. package/dist/transformers/markdown.transformer.d.ts +1 -2
  185. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  186. package/dist/transformers/markdown.transformer.js +11 -7
  187. package/dist/transformers/markdown.transformer.js.map +1 -1
  188. package/dist/utils/code-language.d.ts +2 -0
  189. package/dist/utils/code-language.d.ts.map +1 -0
  190. package/dist/utils/code-language.js +56 -0
  191. package/dist/utils/code-language.js.map +1 -0
  192. package/dist/utils/content-cleaner.d.ts +0 -2
  193. package/dist/utils/content-cleaner.d.ts.map +1 -1
  194. package/dist/utils/content-cleaner.js +0 -4
  195. package/dist/utils/content-cleaner.js.map +1 -1
  196. package/dist/utils/crypto.d.ts +2 -0
  197. package/dist/utils/crypto.d.ts.map +1 -0
  198. package/dist/utils/crypto.js +32 -0
  199. package/dist/utils/crypto.js.map +1 -0
  200. package/dist/utils/download-url.d.ts +8 -0
  201. package/dist/utils/download-url.d.ts.map +1 -0
  202. package/dist/utils/download-url.js +27 -0
  203. package/dist/utils/download-url.js.map +1 -0
  204. package/dist/utils/error-utils.d.ts +3 -0
  205. package/dist/utils/error-utils.d.ts.map +1 -0
  206. package/dist/utils/error-utils.js +12 -0
  207. package/dist/utils/error-utils.js.map +1 -0
  208. package/dist/utils/filename-generator.d.ts +1 -0
  209. package/dist/utils/filename-generator.d.ts.map +1 -0
  210. package/dist/utils/filename-generator.js +59 -0
  211. package/dist/utils/filename-generator.js.map +1 -0
  212. package/dist/utils/header-normalizer.d.ts +7 -4
  213. package/dist/utils/header-normalizer.d.ts.map +1 -1
  214. package/dist/utils/header-normalizer.js +23 -17
  215. package/dist/utils/header-normalizer.js.map +1 -1
  216. package/dist/utils/html-truncator.d.ts +0 -1
  217. package/dist/utils/html-truncator.js +0 -1
  218. package/dist/utils/sanitizer.d.ts +0 -1
  219. package/dist/utils/sanitizer.js +0 -1
  220. package/dist/utils/tool-error-handler.d.ts +1 -3
  221. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  222. package/dist/utils/tool-error-handler.js +11 -6
  223. package/dist/utils/tool-error-handler.js.map +1 -1
  224. package/dist/utils/url-sanitizer.d.ts +2 -0
  225. package/dist/utils/url-sanitizer.d.ts.map +1 -0
  226. package/dist/utils/url-sanitizer.js +12 -0
  227. package/dist/utils/url-sanitizer.js.map +1 -0
  228. package/dist/utils/url-validator.d.ts +1 -3
  229. package/dist/utils/url-validator.d.ts.map +1 -1
  230. package/dist/utils/url-validator.js +89 -53
  231. package/dist/utils/url-validator.js.map +1 -1
  232. package/package.json +7 -9
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
- # 🚀 superFetch MCP Server
1
+ # superFetch MCP Server
2
2
 
3
3
  <img src="docs/logo.png" alt="SuperFetch MCP Logo" width="200">
4
4
 
5
- [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-≥20.0.0-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
5
+ [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-%3E=20.12-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
6
6
 
7
7
  ## One-Click Install
8
8
 
@@ -10,71 +10,64 @@
10
10
 
11
11
  [![Install in Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
12
12
 
13
- A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches, extracts, and transforms web content into AI-optimized formats using Mozilla Readability.
13
+ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly JSONL or Markdown.
14
14
 
15
- [Quick Start](#quick-start) · [How to Choose a Tool](#-how-to-choose-a-tool) · [Tools](#available-tools) · [Configuration](#configuration) · [Contributing](#contributing)
15
+ [Quick Start](#quick-start) | [How to Choose a Tool](#how-to-choose-a-tool) | [Tools](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
16
16
 
17
- > 📦 **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** Search for `io.github.j0hanz/superfetch`
17
+ > **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
18
18
 
19
19
  ---
20
20
 
21
21
  > [!CAUTION]
22
22
  > This server can access URLs on behalf of AI assistants. Built-in SSRF protection blocks private IP ranges and cloud metadata endpoints, but exercise caution when deploying in sensitive environments.
23
23
 
24
- ## Features
24
+ ## Features
25
25
 
26
- | Feature | Description |
27
- | ------------------------- | ------------------------------------------------------------- |
28
- | 🧠 **Smart Extraction** | Mozilla Readability removes ads, navigation, and boilerplate |
29
- | 📄 **Multiple Formats** | JSONL semantic blocks or clean Markdown with YAML frontmatter |
30
- | 🔗 **Link Discovery** | Extract and classify internal/external links |
31
- | ⚡ **Built-in Caching** | Configurable TTL and max entries |
32
- | 🛡️ **Security First** | SSRF protection, URL validation, header sanitization |
33
- | 🔄 **Resilient Fetching** | Exponential backoff with jitter |
34
- | 📊 **Monitoring** | Stats resource for cache performance and health |
26
+ | Feature | Description |
27
+ | ------------------ | ------------------------------------------------------------------------- |
28
+ | Smart extraction | Mozilla Readability removes ads, navigation, and boilerplate when enabled |
29
+ | JSONL + Markdown | JSONL semantic blocks or clean Markdown with frontmatter |
30
+ | Structured blocks | Headings, paragraphs, lists, code, tables, images, blockquotes |
31
+ | Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
32
+ | Resilient fetching | Redirect handling plus retry with exponential backoff + jitter |
33
+ | Security first | URL validation, SSRF/DNS/IP blocklists, header sanitization |
34
+ | HTTP mode | API key auth, session management, rate limiting, CORS |
35
35
 
36
36
  ---
37
37
 
38
- ## 🎯 How to Choose a Tool
38
+ ## How to Choose a Tool
39
39
 
40
- Use this guide to select the right tool for your web content extraction needs:
40
+ Use this guide to select the right tool for your web content extraction needs.
41
41
 
42
42
  ### Decision Tree
43
43
 
44
44
  ```text
45
45
  Need web content for AI?
46
- ├─ Single URL?
47
- │ ├─ Need structured semantic blocks fetch-url (JSONL)
48
- │ ├─ Need readable markdown fetch-markdown
49
- │ └─ Need links only → fetch-links
50
- └─ Multiple URLs?
51
- └─ Use fetch-urls (batch processing)
46
+ - Want structured JSONL blocks -> fetch-url (format: jsonl)
47
+ - Want clean Markdown -> fetch-markdown
48
+ - Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
52
49
  ```
53
50
 
54
51
  ### Quick Reference Table
55
52
 
56
- | Tool | Best For | Output Format | Use When |
57
- | ---------------- | -------------------------------- | ----------------------- | ------------------------------------------- |
58
- | `fetch-url` | Single page structured content | JSONL semantic blocks | AI analysis, RAG pipelines, content parsing |
59
- | `fetch-markdown` | Single page readable format | Clean Markdown + TOC | Documentation, human-readable output |
60
- | `fetch-links` | Link discovery & classification | URL array with types | Sitemap building, finding related pages |
61
- | `fetch-urls` | Batch processing multiple pages | Multiple JSONL/Markdown | Comparing pages, bulk extraction |
53
+ | Tool | Best For | Output Format | Use When |
54
+ | ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
55
+ | `fetch-url` | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
56
+ | `fetch-markdown` | Single page in readable format | Markdown + frontmatter | Documentation, summaries, human review |
62
57
 
63
58
  ### Common Use Cases
64
59
 
65
60
  | Task | Recommended Tool | Why |
66
61
  | ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
67
62
  | Parse a blog post for AI | `fetch-url` | Returns semantic blocks (headings, paragraphs, code) |
68
- | Generate documentation | `fetch-markdown` | Clean markdown with optional TOC |
69
- | Build a sitemap | `fetch-links` | Extracts and classifies all links |
70
- | Compare multiple docs | `fetch-urls` | Parallel fetching with concurrency control |
63
+ | Generate documentation | `fetch-markdown` | Clean markdown with frontmatter |
71
64
  | Extract article for RAG | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content |
72
65
 
73
66
  ---
74
67
 
75
68
  ## Quick Start
76
69
 
77
- Add superFetch to your MCP client configuration no installation required!
70
+ Add superFetch to your MCP client configuration - no installation required.
78
71
 
79
72
  ### Claude Desktop
80
73
 
@@ -108,25 +101,8 @@ Add to `.vscode/mcp.json` in your workspace:
108
101
 
109
102
  ### With Custom Configuration
110
103
 
111
- Configure SuperFetch behavior by adding environment variables to the `env` property:
112
-
113
- ```json
114
- {
115
- "servers": {
116
- "superFetch": {
117
- "command": "npx",
118
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
119
- "env": {
120
- "CACHE_TTL": "7200",
121
- "LOG_LEVEL": "debug",
122
- "FETCH_TIMEOUT": "60000"
123
- }
124
- }
125
- }
126
- }
127
- ```
128
-
129
- See [Configuration](#configuration) section below for all available options and presets.
104
+ Add environment variables in your MCP client config under `env`.
105
+ See [Configuration](#configuration) or `CONFIGURATION.md` for all available options and presets.
130
106
 
131
107
  ### Cursor
132
108
 
@@ -146,7 +122,7 @@ See [Configuration](#configuration) section below for all available options and
146
122
  }
147
123
  ```
148
124
 
149
- > **Tip:** On Windows, if you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
125
+ > **Tip (Windows):** If you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
150
126
 
151
127
  <details>
152
128
  <summary><strong>Codex IDE</strong></summary>
@@ -161,16 +137,9 @@ command = "npx"
161
137
  args = ["-y", "@j0hanz/superfetch@latest", "--stdio"]
162
138
  ```
163
139
 
164
- **With Environment Variables:**
140
+ **With Environment Variables:** See `CONFIGURATION.md` for examples.
165
141
 
166
- ```toml
167
- [mcp_servers.superfetch]
168
- command = "npx"
169
- args = ["-y", "@j0hanz/superfetch@latest", "--stdio"]
170
- env = { CACHE_TTL = "7200", LOG_LEVEL = "debug", FETCH_TIMEOUT = "60000" }
171
- ```
172
-
173
- > **Access config file:** Click the gear icon → "Codex Settings &gt; Open config.toml"
142
+ > **Access config file:** Click the gear icon -> "Codex Settings > Open config.toml"
174
143
  >
175
144
  > **Documentation:** [Codex MCP Guide](https://codex.com/docs/mcp)
176
145
 
@@ -261,7 +230,7 @@ npm install -g @j0hanz/superfetch
261
230
  # Run in stdio mode
262
231
  superfetch --stdio
263
232
 
264
- # Run HTTP server
233
+ # Run HTTP server (requires API_KEY)
265
234
  superfetch
266
235
  ```
267
236
 
@@ -276,24 +245,6 @@ npm run build
276
245
 
277
246
  ### Running the Server
278
247
 
279
- <details>
280
- <summary><strong>HTTP Mode</strong> (default)</summary>
281
-
282
- ```bash
283
- # Development with hot reload
284
- npm run dev
285
-
286
- # Production
287
- npm start
288
- ```
289
-
290
- Server runs at `http://127.0.0.1:3000`:
291
-
292
- - Health check: `GET /health`
293
- - MCP endpoint: `POST /mcp`
294
-
295
- </details>
296
-
297
248
  <details>
298
249
  <summary><strong>stdio Mode</strong> (direct MCP integration)</summary>
299
250
 
@@ -303,433 +254,199 @@ node dist/index.js --stdio
303
254
 
304
255
  </details>
305
256
 
306
- ---
307
-
308
- ## Available Tools
309
-
310
- > **Note:** If extracted content exceeds `MAX_INLINE_CONTENT_CHARS`, the tool response includes a `resourceUri` and a `resource_link` content block instead of embedding the full text.
311
-
312
- ### `fetch-url`
313
-
314
- Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks.
315
-
316
- | Parameter | Type | Default | Description |
317
- | -------------------- | ------- | ---------- | -------------------------------------------- |
318
- | `url` | string | _required_ | URL to fetch |
319
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
320
- | `includeMetadata` | boolean | `true` | Include page metadata (title, description) |
321
- | `maxContentLength` | number | – | Maximum content length in characters |
322
- | `customHeaders` | object | – | Custom HTTP headers for the request |
323
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
324
- | `retries` | number | `3` | Number of retry attempts (1-10) |
257
+ <details>
258
+ <summary><strong>HTTP Mode</strong> (default)</summary>
325
259
 
326
- **Example Response:**
260
+ HTTP mode requires `API_KEY` and only binds to loopback addresses unless `ALLOW_REMOTE=true`.
327
261
 
328
- ```json
329
- {
330
- "url": "https://example.com/article",
331
- "title": "Example Article",
332
- "fetchedAt": "2025-12-11T10:30:00.000Z",
333
- "contentBlocks": [
334
- {
335
- "type": "metadata",
336
- "title": "Example Article",
337
- "description": "A sample article"
338
- },
339
- { "type": "heading", "level": 1, "text": "Introduction" },
340
- {
341
- "type": "paragraph",
342
- "text": "This is the main content of the article..."
343
- },
344
- {
345
- "type": "code",
346
- "language": "javascript",
347
- "content": "console.log('Hello');"
348
- }
349
- ],
350
- "cached": false
351
- }
262
+ ```bash
263
+ API_KEY=supersecret npx -y @j0hanz/superfetch@latest
264
+ # Server runs at http://127.0.0.1:3000
352
265
  ```
353
266
 
354
- ### `fetch-links`
355
-
356
- Extracts hyperlinks from a webpage with classification. Supports filtering, image links, and link limits.
357
-
358
- | Parameter | Type | Default | Description |
359
- | ----------------- | ------- | ---------- | -------------------------------------------- |
360
- | `url` | string | _required_ | URL to extract links from |
361
- | `includeExternal` | boolean | `true` | Include external links |
362
- | `includeInternal` | boolean | `true` | Include internal links |
363
- | `includeImages` | boolean | `false` | Include image links (img src attributes) |
364
- | `maxLinks` | number | – | Maximum number of links to return (1-1000) |
365
- | `filterPattern` | string | – | Regex pattern to filter links (matches href) |
366
- | `customHeaders` | object | – | Custom HTTP headers for the request |
367
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
368
- | `retries` | number | `3` | Number of retry attempts (1-10) |
267
+ **Windows (PowerShell):**
369
268
 
370
- **Example Response:**
371
-
372
- ```json
373
- {
374
- "url": "https://example.com/",
375
- "linkCount": 15,
376
- "links": [
377
- {
378
- "href": "https://example.com/about",
379
- "text": "About Us",
380
- "type": "internal"
381
- },
382
- {
383
- "href": "https://github.com/example",
384
- "text": "GitHub",
385
- "type": "external"
386
- },
387
- { "href": "https://example.com/logo.png", "text": "", "type": "image" }
388
- ],
389
- "cached": false,
390
- "truncated": false
391
- }
269
+ ```powershell
270
+ $env:API_KEY = "supersecret"
271
+ npx -y @j0hanz/superfetch@latest
392
272
  ```
393
273
 
394
- ### `fetch-markdown`
274
+ Endpoints (all require `Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`):
395
275
 
396
- Fetches a webpage and converts it to clean Markdown with optional table of contents.
276
+ - `GET /health`
277
+ - `POST /mcp`
278
+ - `GET /mcp` (SSE stream)
279
+ - `DELETE /mcp`
280
+ - `GET /mcp/downloads/:namespace/:hash`
397
281
 
398
- | Parameter | Type | Default | Description |
399
- | -------------------- | ------- | ---------- | -------------------------------------------- |
400
- | `url` | string | _required_ | URL to fetch |
401
- | `extractMainContent` | boolean | `true` | Extract main content only |
402
- | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
403
- | `maxContentLength` | number | – | Maximum content length in characters |
404
- | `generateToc` | boolean | `false` | Generate table of contents from headings |
405
- | `customHeaders` | object | – | Custom HTTP headers for the request |
406
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
407
- | `retries` | number | `3` | Number of retry attempts (1-10) |
408
-
409
- **Example Response:**
410
-
411
- ````json
412
- {
413
- "url": "https://example.com/docs",
414
- "title": "Documentation",
415
- "fetchedAt": "2025-12-11T10:30:00.000Z",
416
- "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome to our documentation...\n\n## Installation\n\n```bash\nnpm install example\n```",
417
- "toc": [
418
- { "level": 1, "text": "Getting Started", "slug": "getting-started" },
419
- { "level": 2, "text": "Installation", "slug": "installation" }
420
- ],
421
- "cached": false,
422
- "truncated": false
423
- }
424
- ````
425
-
426
- ### `fetch-urls` (Batch)
427
-
428
- Fetches multiple URLs in parallel with concurrency control. Ideal for comparing content or processing multiple pages efficiently.
429
-
430
- | Parameter | Type | Default | Description |
431
- | -------------------- | -------- | ---------- | -------------------------------------------- |
432
- | `urls` | string[] | _required_ | Array of URLs to fetch (1-10 URLs) |
433
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
434
- | `includeMetadata` | boolean | `true` | Include page metadata |
435
- | `maxContentLength` | number | – | Maximum content length per URL in characters |
436
- | `format` | string | `'jsonl'` | Output format: `'jsonl'` or `'markdown'` |
437
- | `concurrency` | number | `3` | Maximum concurrent requests (1-5) |
438
- | `continueOnError` | boolean | `true` | Continue processing if some URLs fail |
439
- | `customHeaders` | object | – | Custom HTTP headers for all requests |
440
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
441
- | `retries` | number | `3` | Number of retry attempts (1-10) |
442
-
443
- **Example Output:**
444
-
445
- ```json
446
- {
447
- "results": [
448
- {
449
- "url": "https://example.com",
450
- "success": true,
451
- "title": "Example",
452
- "content": "...",
453
- "cached": false
454
- },
455
- {
456
- "url": "https://example.org",
457
- "success": true,
458
- "title": "Example Org",
459
- "content": "...",
460
- "cached": false
461
- }
462
- ],
463
- "summary": {
464
- "total": 2,
465
- "successful": 2,
466
- "failed": 0,
467
- "cached": 0,
468
- "totalContentBlocks": 15
469
- },
470
- "fetchedAt": "2024-12-11T10:30:00.000Z"
471
- }
472
- ```
282
+ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#http-mode-details)).
473
283
 
474
- ### Resources
475
-
476
- | URI | Description |
477
- | --------------------- | --------------------------------------------------- |
478
- | `superfetch://stats` | Server statistics and cache metrics |
479
- | `superfetch://health` | Real-time server health and dependency status |
480
- | Dynamic resources | Cached content available via resource subscriptions |
284
+ </details>
481
285
 
482
286
  ---
483
287
 
484
- ## Configuration
485
-
486
- ### Configuration Presets
487
-
488
- <details open>
489
- <summary><strong>Default (Recommended)</strong> — No configuration needed</summary>
490
-
491
- ```json
492
- {
493
- "servers": {
494
- "superFetch": {
495
- "command": "npx",
496
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"]
497
- }
498
- }
499
- }
500
- ```
501
-
502
- </details>
288
+ ## Available Tools
503
289
 
504
- <details>
505
- <summary><strong>Debug Mode</strong> — Verbose logging and no cache</summary>
290
+ ### Tool Response Notes
506
291
 
507
- **VS Code** (`.vscode/mcp.json`):
292
+ Both tools return:
508
293
 
509
- ```json
510
- {
511
- "servers": {
512
- "superFetch": {
513
- "command": "npx",
514
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
515
- "env": {
516
- "LOG_LEVEL": "debug",
517
- "CACHE_ENABLED": "false"
518
- }
519
- }
520
- }
521
- }
522
- ```
294
+ - `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`)
295
+ - `content` blocks that include:
296
+ - a `text` block containing JSON of `structuredContent`
297
+ - in stdio mode, a `resource` block with a `file:///...` URI containing the full content
298
+ - in HTTP mode, a `resource` block when inline content is available; large payloads include a `resource_link` block when cache is enabled
523
299
 
524
- **Claude Desktop** (`claude_desktop_config.json`):
300
+ If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output and appends `...[truncated]`.
525
301
 
526
- ```json
527
- {
528
- "mcpServers": {
529
- "superFetch": {
530
- "command": "npx",
531
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
532
- "env": {
533
- "LOG_LEVEL": "debug",
534
- "CACHE_ENABLED": "false"
535
- }
536
- }
537
- }
538
- }
539
- ```
302
+ ---
540
303
 
541
- **Cursor** (MCP settings):
304
+ ### `fetch-url`
542
305
 
543
- ```json
544
- {
545
- "mcpServers": {
546
- "superFetch": {
547
- "command": "npx",
548
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
549
- "env": {
550
- "LOG_LEVEL": "debug",
551
- "CACHE_ENABLED": "false"
552
- }
553
- }
554
- }
555
- }
556
- ```
306
+ Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
557
307
 
558
- </details>
308
+ | Parameter | Type | Default | Description |
309
+ | -------------------- | --------------------- | --------- | --------------------------------------------- |
310
+ | `url` | string | required | URL to fetch |
311
+ | `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
312
+ | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
313
+ | `includeMetadata` | boolean | `true` | Include page metadata |
314
+ | `maxContentLength` | number | - | Maximum content length in characters |
315
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
316
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
317
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
559
318
 
560
- <details>
561
- <summary><strong>Performance Mode</strong> — Aggressive caching for speed</summary>
319
+ **Example `structuredContent`:**
562
320
 
563
321
  ```json
564
322
  {
565
- "servers": {
566
- "superFetch": {
567
- "command": "npx",
568
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
569
- "env": {
570
- "CACHE_TTL": "7200",
571
- "CACHE_MAX_KEYS": "500",
572
- "LOG_LEVEL": "warn"
573
- }
574
- }
575
- }
323
+ "url": "https://example.com/article",
324
+ "title": "Example Article",
325
+ "contentBlocks": 42,
326
+ "fetchedAt": "2025-12-11T10:30:00.000Z",
327
+ "format": "jsonl",
328
+ "contentSize": 12345,
329
+ "cached": false,
330
+ "content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
576
331
  }
577
332
  ```
578
333
 
579
- </details>
334
+ ---
580
335
 
581
- <details>
582
- <summary><strong>Custom User Agent</strong> — For sites that block bots</summary>
336
+ ### `fetch-markdown`
583
337
 
584
- ```json
585
- {
586
- "servers": {
587
- "superFetch": {
588
- "command": "npx",
589
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
590
- "env": {
591
- "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
592
- }
593
- }
594
- }
595
- }
596
- ```
338
+ Fetches a webpage and converts it to clean Markdown with optional frontmatter.
597
339
 
598
- </details>
340
+ | Parameter | Type | Default | Description |
341
+ | -------------------- | ------- | -------- | --------------------------------------------- |
342
+ | `url` | string | required | URL to fetch |
343
+ | `extractMainContent` | boolean | `true` | Extract main content only |
344
+ | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
345
+ | `maxContentLength` | number | - | Maximum content length in characters |
346
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
347
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
348
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
599
349
 
600
- <details>
601
- <summary><strong>Slow Networks / CI/CD</strong> — Extended timeouts</summary>
350
+ **Example `structuredContent`:**
602
351
 
603
352
  ```json
604
353
  {
605
- "servers": {
606
- "superFetch": {
607
- "command": "npx",
608
- "args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
609
- "env": {
610
- "FETCH_TIMEOUT": "60000",
611
- "CACHE_ENABLED": "false",
612
- "LOG_LEVEL": "warn"
613
- }
614
- }
354
+ "url": "https://example.com/docs",
355
+ "title": "Documentation",
356
+ "fetchedAt": "2025-12-11T10:30:00.000Z",
357
+ "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
358
+ "contentSize": 9876,
359
+ "cached": false,
360
+ "truncated": false,
361
+ "file": {
362
+ "downloadUrl": "/mcp/downloads/markdown/abc123def456",
363
+ "fileName": "documentation.md",
364
+ "expiresAt": "2025-12-11T11:30:00.000Z"
615
365
  }
616
366
  }
617
367
  ```
618
368
 
619
- </details>
620
-
621
- ### Available Environment Variables
622
-
623
- Configure SuperFetch behavior by adding environment variables to your MCP client configuration's `env` property.
624
-
625
- #### 🌐 Fetcher Settings
369
+ `file` is included only in HTTP mode when content is cached and too large to inline.
626
370
 
627
- | Variable | Default | Valid Values | Description |
628
- | --------------- | -------------------- | -------------------- | --------------------------------------------------------------- |
629
- | `FETCH_TIMEOUT` | `30000` | `5000`-`120000` | Request timeout in milliseconds (5s-2min) |
630
- | `USER_AGENT` | `superFetch-MCP/1.0` | Any valid user agent | Custom user agent for requests (useful for sites blocking bots) |
371
+ ---
631
372
 
632
- #### 💾 Cache Settings
373
+ ### Large Content Handling
633
374
 
634
- | Variable | Default | Valid Values | Description |
635
- | ---------------- | ------- | ---------------- | -------------------------------------- |
636
- | `CACHE_ENABLED` | `true` | `true` / `false` | Enable response caching |
637
- | `CACHE_TTL` | `3600` | `60`-`86400` | Cache lifetime in seconds (1min-24hrs) |
638
- | `CACHE_MAX_KEYS` | `100` | `10`-`1000` | Maximum number of cached entries |
375
+ - Inline limit is configurable via `MAX_INLINE_CONTENT_CHARS` (see `CONFIGURATION.md`).
376
+ - If content exceeds the limit and cache is enabled, responses include `resourceUri` and a `resource_link` block.
377
+ - If cache is disabled, content is truncated with `...[truncated]`.
378
+ - Use `maxContentLength` per request to enforce a lower limit.
379
+ - Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
639
380
 
640
- #### 📦 Output Settings
381
+ ---
641
382
 
642
- | Variable | Default | Valid Values | Description |
643
- | -------------------------- | ------- | --------------- | --------------------------------------------------------------- |
644
- | `MAX_INLINE_CONTENT_CHARS` | `20000` | `1000`-`200000` | Inline content limit before returning a `resource_link` instead |
383
+ ## Resources
645
384
 
646
- #### 📝 Logging Settings
385
+ | URI | Description |
386
+ | ------------------------------------------ | -------------------------------------------------------------------------- |
387
+ | `superfetch://health` | Real-time server health and memory checks |
388
+ | `superfetch://stats` | Server stats and cache metrics |
389
+ | `superfetch://cache/list` | List cached entries and their resource URIs |
390
+ | `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`; `links` is reserved) |
647
391
 
648
- | Variable | Default | Valid Values | Description |
649
- | ---------------- | ------- | ----------------------------------- | -------------------------- |
650
- | `LOG_LEVEL` | `info` | `debug` / `info` / `warn` / `error` | Logging verbosity level |
651
- | `ENABLE_LOGGING` | `true` | `true` / `false` | Enable/disable all logging |
392
+ Resource subscriptions notify clients when cache entries update.
652
393
 
653
- #### 🔍 Extraction Settings
394
+ ---
654
395
 
655
- | Variable | Default | Valid Values | Description |
656
- | ---------------------- | ------- | ---------------- | -------------------------------------------------- |
657
- | `EXTRACT_MAIN_CONTENT` | `true` | `true` / `false` | Use Mozilla Readability to extract main content |
658
- | `INCLUDE_METADATA` | `true` | `true` / `false` | Include page metadata (title, description, author) |
396
+ ## Download Endpoint (HTTP Mode)
659
397
 
660
- #### 🛡️ Security Settings
398
+ When running in HTTP mode, cached content can be downloaded directly. Downloads are available only when cache is enabled.
661
399
 
662
- | Variable | Default | Description |
663
- | -------------- | ------- | -------------------------------------------------------- |
664
- | `API_KEY` | - | API Key for HTTP authentication (required for HTTP mode) |
665
- | `ALLOW_REMOTE` | `false` | Allow binding to non-loopback interfaces |
400
+ ### Endpoint
666
401
 
667
- #### Rate Limiting
402
+ ```text
403
+ GET /mcp/downloads/:namespace/:hash
404
+ ```
668
405
 
669
- | Variable | Default | Valid Values | Description |
670
- | ----------------------- | ------- | ----------------- | ------------------------------------ |
671
- | `RATE_LIMIT_ENABLED` | `true` | `true` / `false` | Enable/disable HTTP rate limiting |
672
- | `RATE_LIMIT_MAX` | `100` | `1`-`10000` | Max requests per window per IP |
673
- | `RATE_LIMIT_WINDOW_MS` | `60000` | `1000`-`3600000` | Rate limit window in milliseconds |
674
- | `RATE_LIMIT_CLEANUP_MS` | `60000` | `10000`-`3600000` | Cleanup interval for limiter entries |
406
+ - `namespace`: `markdown` or `url`
407
+ - Auth required (`Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`)
675
408
 
676
- ### HTTP Mode Configuration
409
+ ### Response Headers
677
410
 
678
- <details>
679
- <summary><strong>HTTP Mode</strong> (Advanced) For running as a standalone HTTP server</summary>
411
+ | Header | Value |
412
+ | --------------------- | ----------------------------------------------------------------------- |
413
+ | `Content-Type` | `text/markdown; charset=utf-8` or `application/x-ndjson; charset=utf-8` |
414
+ | `Content-Disposition` | `attachment; filename="<name>"` |
415
+ | `Cache-Control` | `private, max-age=<CACHE_TTL>` |
680
416
 
681
- SuperFetch can run as an HTTP server for custom integrations. HTTP mode requires additional configuration and an `API_KEY` for authenticated access (send `Authorization: Bearer <key>` or `X-API-Key: <key>`).
682
-
683
- #### Start HTTP Server
417
+ ### Example Usage
684
418
 
685
419
  ```bash
686
- npx -y @j0hanz/superfetch@latest
687
- # Server runs at http://127.0.0.1:3000
420
+ curl -H "Authorization: Bearer $API_KEY" \
421
+ http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
422
+ -o article.md
688
423
  ```
689
424
 
690
- #### HTTP-Specific Environment Variables
425
+ ### Error Responses
691
426
 
692
- | Variable | Default | Description |
693
- | ------------------------- | ----------- | ------------------------------------------------ |
694
- | `PORT` | `3000` | HTTP server port |
695
- | `HOST` | `127.0.0.1` | HTTP server host (`0.0.0.0` for Docker/K8s) |
696
- | `ALLOWED_ORIGINS` | `[]` | Comma-separated CORS origins |
697
- | `CORS_ALLOW_ALL` | `false` | Allow all CORS origins (dev only, security risk) |
698
- | `SESSION_TTL_MS` | `1800000` | Session time-to-live in milliseconds (30 mins) |
699
- | `SESSION_INIT_TIMEOUT_MS` | `10000` | Time allowed for session initialization (ms) |
700
- | `MAX_SESSIONS` | `200` | Maximum number of active sessions |
427
+ | Status | Code | Description |
428
+ | ------ | --------------------- | -------------------------------- |
429
+ | 400 | `BAD_REQUEST` | Invalid namespace or hash format |
430
+ | 404 | `NOT_FOUND` | Content not found or expired |
431
+ | 503 | `SERVICE_UNAVAILABLE` | Download service disabled |
701
432
 
702
- #### VS Code HTTP Mode Setup
433
+ ---
703
434
 
704
- ```json
705
- {
706
- "servers": {
707
- "superFetch": {
708
- "type": "http",
709
- "url": "http://127.0.0.1:3000/mcp"
710
- }
711
- }
712
- }
713
- ```
435
+ ## Configuration
714
436
 
715
- #### Docker/Kubernetes Example
437
+ Configuration details live in `CONFIGURATION.md`, including all environment variables, defaults, ranges, presets, and dev-only flags.
716
438
 
717
- ```bash
718
- PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@latest
719
- ```
439
+ ---
720
440
 
721
- </details>
441
+ ## HTTP Mode Details
442
+
443
+ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
722
444
 
723
- ### Configuration Cookbook
445
+ 1. `POST /mcp` with an `initialize` request and **no** `mcp-session-id` header.
446
+ 2. The server returns `mcp-session-id` in the response headers.
447
+ 3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
724
448
 
725
- | Use Case | Configuration |
726
- | ---------------------------- | -------------------------------------------------------------- |
727
- | 🐛 **Debugging issues** | `LOG_LEVEL=debug`, `CACHE_ENABLED=false` |
728
- | 🚀 **Maximum performance** | `CACHE_TTL=7200`, `CACHE_MAX_KEYS=500`, `LOG_LEVEL=error` |
729
- | 🌐 **Slow target sites** | `FETCH_TIMEOUT=60000` |
730
- | 🤖 **Bypass bot detection** | `USER_AGENT="Mozilla/5.0 (compatible; MyBot/1.0)"` |
731
- | 🔄 **CI/CD (always fresh)** | `CACHE_ENABLED=false`, `FETCH_TIMEOUT=60000`, `LOG_LEVEL=warn` |
732
- | 📊 **Production monitoring** | `LOG_LEVEL=warn` or `error` |
449
+ If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
733
450
 
734
451
  ---
735
452
 
@@ -737,15 +454,16 @@ PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@
737
454
 
738
455
  JSONL output includes semantic content blocks:
739
456
 
740
- | Type | Description |
741
- | ----------- | ----------------------------------------------- |
742
- | `metadata` | Page title, description, author, URL, timestamp |
743
- | `heading` | Headings (h1-h6) with level indicator |
744
- | `paragraph` | Text paragraphs |
745
- | `list` | Ordered/unordered lists |
746
- | `code` | Code blocks with language |
747
- | `table` | Tables with headers and rows |
748
- | `image` | Images with src and alt text |
457
+ | Type | Description |
458
+ | ------------ | ---------------------------------------- |
459
+ | `metadata` | Minimal page metadata (type, title, url) |
460
+ | `heading` | Headings (h1-h6) with level indicator |
461
+ | `paragraph` | Text paragraphs |
462
+ | `list` | Ordered/unordered lists |
463
+ | `code` | Code blocks with optional language |
464
+ | `table` | Tables with headers and rows |
465
+ | `image` | Images with src and alt text |
466
+ | `blockquote` | Block quote text |
749
467
 
750
468
  ---
751
469
 
@@ -753,12 +471,19 @@ JSONL output includes semantic content blocks:
753
471
 
754
472
  ### SSRF Protection
755
473
 
756
- Blocked destinations:
474
+ Blocked destinations include:
757
475
 
758
476
  - Localhost and loopback addresses
759
477
  - Private IP ranges (`10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`)
760
478
  - Cloud metadata endpoints (AWS, GCP, Azure)
761
479
  - IPv6 link-local and unique local addresses
480
+ - Internal suffixes such as `.local` and `.internal`
481
+
482
+ ### URL Validation
483
+
484
+ - Only `http` and `https` URLs
485
+ - No embedded credentials in URLs
486
+ - Max URL length: 2048 characters
762
487
 
763
488
  ### Header Sanitization
764
489
 
@@ -766,21 +491,7 @@ Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-i
766
491
 
767
492
  ### Rate Limiting
768
493
 
769
- Default: **100 requests/minute** per IP. Configure with `RATE_LIMIT_MAX` and
770
- `RATE_LIMIT_WINDOW_MS`.
771
-
772
- ### HTTP Mode Endpoints
773
-
774
- When running without `--stdio`, the following endpoints are available:
775
-
776
- | Endpoint | Method | Description |
777
- | --------- | ------ | --------------------------------------- |
778
- | `/health` | GET | Health check with uptime and version |
779
- | `/mcp` | POST | MCP request handling (requires session) |
780
- | `/mcp` | GET | SSE stream for notifications |
781
- | `/mcp` | DELETE | Close session |
782
-
783
- Sessions are managed via `mcp-session-id` header with 30-minute TTL.
494
+ Rate limiting thresholds are configurable via `RATE_LIMIT_MAX` and `RATE_LIMIT_WINDOW_MS` (see `CONFIGURATION.md`).
784
495
 
785
496
  ---
786
497
 
@@ -807,14 +518,13 @@ Sessions are managed via `mcp-session-id` header with 30-minute TTL.
807
518
 
808
519
  | Category | Technology |
809
520
  | ------------------ | --------------------------------- |
810
- | Runtime | Node.js 20.0.0 |
521
+ | Runtime | Node.js >=20.12 |
811
522
  | Language | TypeScript 5.9 |
812
523
  | MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
813
524
  | Content Extraction | @mozilla/readability ^0.6.0 |
814
525
  | HTML Parsing | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
815
526
  | Markdown | Turndown ^7.2.2 |
816
- | HTTP | Express ^5.2.1, Axios ^1.7.9 |
817
- | Caching | node-cache ^5.1.2 |
527
+ | HTTP | Express ^5.2.1, undici ^6.22.0 |
818
528
  | Validation | Zod ^3.24.1 |
819
529
 
820
530
  ---