@j0hanz/superfetch 1.1.9 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +258 -362
- package/dist/config/constants.d.ts +20 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +25 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/config/formatting.d.ts +0 -1
- package/dist/config/formatting.d.ts.map +1 -1
- package/dist/config/formatting.js +1 -1
- package/dist/config/formatting.js.map +1 -1
- package/dist/config/index.d.ts +8 -1
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +14 -5
- package/dist/config/index.js.map +1 -1
- package/dist/config/types/content.d.ts +1 -19
- package/dist/config/types/content.d.ts.map +1 -1
- package/dist/config/types/runtime.d.ts +7 -4
- package/dist/config/types/runtime.d.ts.map +1 -1
- package/dist/config/types/tools.d.ts +5 -49
- package/dist/config/types/tools.d.ts.map +1 -1
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +17 -12
- package/dist/http/auth.js.map +1 -1
- package/dist/http/cors.js +4 -0
- package/dist/http/cors.js.map +1 -1
- package/dist/http/download-routes.d.ts +15 -0
- package/dist/http/download-routes.d.ts.map +1 -0
- package/dist/http/download-routes.js +132 -0
- package/dist/http/download-routes.js.map +1 -0
- package/dist/http/mcp-routes.d.ts +1 -1
- package/dist/http/mcp-routes.d.ts.map +1 -1
- package/dist/http/mcp-routes.js +1 -1
- package/dist/http/mcp-routes.js.map +1 -1
- package/dist/http/mcp-session-helpers.d.ts +14 -0
- package/dist/http/mcp-session-helpers.d.ts.map +1 -0
- package/dist/http/mcp-session-helpers.js +65 -0
- package/dist/http/mcp-session-helpers.js.map +1 -0
- package/dist/http/mcp-session.d.ts +0 -1
- package/dist/http/mcp-session.d.ts.map +1 -1
- package/dist/http/mcp-session.js +7 -70
- package/dist/http/mcp-session.js.map +1 -1
- package/dist/http/server-middleware.d.ts +10 -0
- package/dist/http/server-middleware.d.ts.map +1 -0
- package/dist/http/server-middleware.js +56 -0
- package/dist/http/server-middleware.js.map +1 -0
- package/dist/http/server.d.ts.map +1 -1
- package/dist/http/server.js +20 -98
- package/dist/http/server.js.map +1 -1
- package/dist/http/session-cleanup.d.ts +3 -0
- package/dist/http/session-cleanup.d.ts.map +1 -0
- package/dist/http/session-cleanup.js +38 -0
- package/dist/http/session-cleanup.js.map +1 -0
- package/dist/index.js +13 -5
- package/dist/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +76 -11
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/services/cache.d.ts +6 -2
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +86 -25
- package/dist/services/cache.js.map +1 -1
- package/dist/services/context.d.ts +2 -1
- package/dist/services/context.d.ts.map +1 -1
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +45 -17
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher/agents.d.ts.map +1 -1
- package/dist/services/fetcher/agents.js +3 -6
- package/dist/services/fetcher/agents.js.map +1 -1
- package/dist/services/fetcher/headers.d.ts.map +1 -1
- package/dist/services/fetcher/headers.js +2 -24
- package/dist/services/fetcher/headers.js.map +1 -1
- package/dist/services/fetcher/interceptors.d.ts +2 -1
- package/dist/services/fetcher/interceptors.d.ts.map +1 -1
- package/dist/services/fetcher/interceptors.js +30 -20
- package/dist/services/fetcher/interceptors.js.map +1 -1
- package/dist/services/fetcher/redirects.d.ts +0 -1
- package/dist/services/fetcher/redirects.d.ts.map +1 -1
- package/dist/services/fetcher/redirects.js +19 -16
- package/dist/services/fetcher/redirects.js.map +1 -1
- package/dist/services/fetcher/retry-policy.d.ts +1 -27
- package/dist/services/fetcher/retry-policy.d.ts.map +1 -1
- package/dist/services/fetcher/retry-policy.js +119 -125
- package/dist/services/fetcher/retry-policy.js.map +1 -1
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +15 -9
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/parser.d.ts +0 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +5 -38
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links/link-extractor.js +15 -19
- package/dist/tools/handlers/fetch-links/link-extractor.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +0 -2
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +16 -17
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.d.ts +11 -2
- package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-single.shared.js +61 -2
- package/dist/tools/handlers/fetch-single.shared.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +3 -14
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls/validation.d.ts +0 -1
- package/dist/tools/handlers/fetch-urls/validation.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls/validation.js +1 -1
- package/dist/tools/handlers/fetch-urls/validation.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +1 -19
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/schemas.d.ts +44 -236
- package/dist/tools/schemas.d.ts.map +1 -1
- package/dist/tools/schemas.js +38 -197
- package/dist/tools/schemas.js.map +1 -1
- package/dist/tools/utils/cache-vary.d.ts +0 -1
- package/dist/tools/utils/cache-vary.d.ts.map +1 -1
- package/dist/tools/utils/cache-vary.js +11 -25
- package/dist/tools/utils/cache-vary.js.map +1 -1
- package/dist/tools/utils/common.d.ts +1 -2
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/content-transform.d.ts.map +1 -1
- package/dist/tools/utils/content-transform.js +28 -13
- package/dist/tools/utils/content-transform.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +14 -3
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/tools/utils/inline-content.d.ts +3 -2
- package/dist/tools/utils/inline-content.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +3 -6
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/code-language.d.ts +3 -0
- package/dist/utils/code-language.d.ts.map +1 -0
- package/dist/utils/code-language.js +57 -0
- package/dist/utils/code-language.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +0 -1
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +0 -3
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/crypto.d.ts +3 -0
- package/dist/utils/crypto.d.ts.map +1 -0
- package/dist/utils/crypto.js +33 -0
- package/dist/utils/crypto.js.map +1 -0
- package/dist/utils/download-url.d.ts +9 -0
- package/dist/utils/download-url.d.ts.map +1 -0
- package/dist/utils/download-url.js +28 -0
- package/dist/utils/download-url.js.map +1 -0
- package/dist/utils/error-utils.d.ts +4 -0
- package/dist/utils/error-utils.d.ts.map +1 -0
- package/dist/utils/error-utils.js +14 -0
- package/dist/utils/error-utils.js.map +1 -0
- package/dist/utils/filename-generator.d.ts +2 -0
- package/dist/utils/filename-generator.d.ts.map +1 -0
- package/dist/utils/filename-generator.js +60 -0
- package/dist/utils/filename-generator.js.map +1 -0
- package/dist/utils/header-normalizer.d.ts +7 -3
- package/dist/utils/header-normalizer.d.ts.map +1 -1
- package/dist/utils/header-normalizer.js +23 -16
- package/dist/utils/header-normalizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +0 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +11 -5
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-sanitizer.d.ts +2 -0
- package/dist/utils/url-sanitizer.d.ts.map +1 -0
- package/dist/utils/url-sanitizer.js +12 -0
- package/dist/utils/url-sanitizer.js.map +1 -0
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +46 -44
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +4 -6
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
#
|
|
1
|
+
# superFetch MCP Server
|
|
2
2
|
|
|
3
3
|
<img src="docs/logo.png" alt="SuperFetch MCP Logo" width="200">
|
|
4
4
|
|
|
5
|
-
[](https://www.npmjs.com/package/@j0hanz/superfetch) [](https://www.npmjs.com/package/@j0hanz/superfetch) [](https://nodejs.org/) [](https://www.typescriptlang.org/)
|
|
6
6
|
|
|
7
7
|
## One-Click Install
|
|
8
8
|
|
|
@@ -10,71 +10,64 @@
|
|
|
10
10
|
|
|
11
11
|
[](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
|
|
12
12
|
|
|
13
|
-
A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches, extracts
|
|
13
|
+
A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly JSONL or Markdown.
|
|
14
14
|
|
|
15
|
-
[Quick Start](#quick-start)
|
|
15
|
+
[Quick Start](#quick-start) | [How to Choose a Tool](#how-to-choose-a-tool) | [Tools](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
|
|
16
16
|
|
|
17
|
-
>
|
|
17
|
+
> **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
|
|
18
18
|
|
|
19
19
|
---
|
|
20
20
|
|
|
21
21
|
> [!CAUTION]
|
|
22
22
|
> This server can access URLs on behalf of AI assistants. Built-in SSRF protection blocks private IP ranges and cloud metadata endpoints, but exercise caution when deploying in sensitive environments.
|
|
23
23
|
|
|
24
|
-
##
|
|
24
|
+
## Features
|
|
25
25
|
|
|
26
|
-
| Feature
|
|
27
|
-
|
|
|
28
|
-
|
|
|
29
|
-
|
|
|
30
|
-
|
|
|
31
|
-
|
|
|
32
|
-
|
|
|
33
|
-
|
|
|
34
|
-
|
|
|
26
|
+
| Feature | Description |
|
|
27
|
+
| ------------------ | ------------------------------------------------------------------------- |
|
|
28
|
+
| Smart extraction | Mozilla Readability removes ads, navigation, and boilerplate when enabled |
|
|
29
|
+
| JSONL + Markdown | JSONL semantic blocks or clean Markdown with frontmatter |
|
|
30
|
+
| Structured blocks | Headings, paragraphs, lists, code, tables, images, blockquotes |
|
|
31
|
+
| Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
|
|
32
|
+
| Resilient fetching | Redirect handling plus retry with exponential backoff + jitter |
|
|
33
|
+
| Security first | URL validation, SSRF/DNS/IP blocklists, header sanitization |
|
|
34
|
+
| HTTP mode | API key auth, session management, rate limiting, CORS |
|
|
35
35
|
|
|
36
36
|
---
|
|
37
37
|
|
|
38
|
-
##
|
|
38
|
+
## How to Choose a Tool
|
|
39
39
|
|
|
40
|
-
Use this guide to select the right tool for your web content extraction needs
|
|
40
|
+
Use this guide to select the right tool for your web content extraction needs.
|
|
41
41
|
|
|
42
42
|
### Decision Tree
|
|
43
43
|
|
|
44
44
|
```text
|
|
45
45
|
Need web content for AI?
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
│ └─ Need links only → fetch-links
|
|
50
|
-
└─ Multiple URLs?
|
|
51
|
-
└─ Use fetch-urls (batch processing)
|
|
46
|
+
- Want structured JSONL blocks -> fetch-url (format: jsonl)
|
|
47
|
+
- Want clean Markdown -> fetch-markdown
|
|
48
|
+
- Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
|
|
52
49
|
```
|
|
53
50
|
|
|
54
51
|
### Quick Reference Table
|
|
55
52
|
|
|
56
|
-
| Tool | Best For
|
|
57
|
-
| ---------------- |
|
|
58
|
-
| `fetch-url` | Single page
|
|
59
|
-
| `fetch-markdown` | Single page
|
|
60
|
-
| `fetch-links` | Link discovery & classification | URL array with types | Sitemap building, finding related pages |
|
|
61
|
-
| `fetch-urls` | Batch processing multiple pages | Multiple JSONL/Markdown | Comparing pages, bulk extraction |
|
|
53
|
+
| Tool | Best For | Output Format | Use When |
|
|
54
|
+
| ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
|
|
55
|
+
| `fetch-url` | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
|
|
56
|
+
| `fetch-markdown` | Single page in readable format | Markdown + frontmatter | Documentation, summaries, human review |
|
|
62
57
|
|
|
63
58
|
### Common Use Cases
|
|
64
59
|
|
|
65
60
|
| Task | Recommended Tool | Why |
|
|
66
61
|
| ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
|
|
67
62
|
| Parse a blog post for AI | `fetch-url` | Returns semantic blocks (headings, paragraphs, code) |
|
|
68
|
-
| Generate documentation | `fetch-markdown` | Clean markdown with
|
|
69
|
-
| Build a sitemap | `fetch-links` | Extracts and classifies all links |
|
|
70
|
-
| Compare multiple docs | `fetch-urls` | Parallel fetching with concurrency control |
|
|
63
|
+
| Generate documentation | `fetch-markdown` | Clean markdown with frontmatter |
|
|
71
64
|
| Extract article for RAG | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content |
|
|
72
65
|
|
|
73
66
|
---
|
|
74
67
|
|
|
75
68
|
## Quick Start
|
|
76
69
|
|
|
77
|
-
Add superFetch to your MCP client configuration
|
|
70
|
+
Add superFetch to your MCP client configuration - no installation required.
|
|
78
71
|
|
|
79
72
|
### Claude Desktop
|
|
80
73
|
|
|
@@ -126,7 +119,7 @@ Configure SuperFetch behavior by adding environment variables to the `env` prope
|
|
|
126
119
|
}
|
|
127
120
|
```
|
|
128
121
|
|
|
129
|
-
See [Configuration](#configuration)
|
|
122
|
+
See [Configuration](#configuration) for all available options.
|
|
130
123
|
|
|
131
124
|
### Cursor
|
|
132
125
|
|
|
@@ -146,7 +139,7 @@ See [Configuration](#configuration) section below for all available options and
|
|
|
146
139
|
}
|
|
147
140
|
```
|
|
148
141
|
|
|
149
|
-
> **Tip
|
|
142
|
+
> **Tip (Windows):** If you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
|
|
150
143
|
|
|
151
144
|
<details>
|
|
152
145
|
<summary><strong>Codex IDE</strong></summary>
|
|
@@ -170,7 +163,7 @@ args = ["-y", "@j0hanz/superfetch@latest", "--stdio"]
|
|
|
170
163
|
env = { CACHE_TTL = "7200", LOG_LEVEL = "debug", FETCH_TIMEOUT = "60000" }
|
|
171
164
|
```
|
|
172
165
|
|
|
173
|
-
> **Access config file:** Click the gear icon
|
|
166
|
+
> **Access config file:** Click the gear icon -> "Codex Settings > Open config.toml"
|
|
174
167
|
>
|
|
175
168
|
> **Documentation:** [Codex MCP Guide](https://codex.com/docs/mcp)
|
|
176
169
|
|
|
@@ -261,7 +254,7 @@ npm install -g @j0hanz/superfetch
|
|
|
261
254
|
# Run in stdio mode
|
|
262
255
|
superfetch --stdio
|
|
263
256
|
|
|
264
|
-
# Run HTTP server
|
|
257
|
+
# Run HTTP server (requires API_KEY)
|
|
265
258
|
superfetch
|
|
266
259
|
```
|
|
267
260
|
|
|
@@ -277,216 +270,263 @@ npm run build
|
|
|
277
270
|
### Running the Server
|
|
278
271
|
|
|
279
272
|
<details>
|
|
280
|
-
<summary><strong>
|
|
273
|
+
<summary><strong>stdio Mode</strong> (direct MCP integration)</summary>
|
|
281
274
|
|
|
282
275
|
```bash
|
|
283
|
-
|
|
284
|
-
npm run dev
|
|
285
|
-
|
|
286
|
-
# Production
|
|
287
|
-
npm start
|
|
276
|
+
node dist/index.js --stdio
|
|
288
277
|
```
|
|
289
278
|
|
|
290
|
-
Server runs at `http://127.0.0.1:3000`:
|
|
291
|
-
|
|
292
|
-
- Health check: `GET /health`
|
|
293
|
-
- MCP endpoint: `POST /mcp`
|
|
294
|
-
|
|
295
279
|
</details>
|
|
296
280
|
|
|
297
281
|
<details>
|
|
298
|
-
<summary><strong>
|
|
282
|
+
<summary><strong>HTTP Mode</strong> (default)</summary>
|
|
283
|
+
|
|
284
|
+
HTTP mode requires `API_KEY` and only binds to loopback addresses unless `ALLOW_REMOTE=true`.
|
|
299
285
|
|
|
300
286
|
```bash
|
|
301
|
-
|
|
287
|
+
API_KEY=supersecret npx -y @j0hanz/superfetch@latest
|
|
288
|
+
# Server runs at http://127.0.0.1:3000
|
|
302
289
|
```
|
|
303
290
|
|
|
291
|
+
**Windows (PowerShell):**
|
|
292
|
+
|
|
293
|
+
```powershell
|
|
294
|
+
$env:API_KEY = "supersecret"
|
|
295
|
+
npx -y @j0hanz/superfetch@latest
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
Endpoints (all require `Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`):
|
|
299
|
+
|
|
300
|
+
- `GET /health`
|
|
301
|
+
- `POST /mcp`
|
|
302
|
+
- `GET /mcp` (SSE stream)
|
|
303
|
+
- `DELETE /mcp`
|
|
304
|
+
- `GET /mcp/downloads/:namespace/:hash`
|
|
305
|
+
|
|
306
|
+
Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#http-mode-details)).
|
|
307
|
+
|
|
304
308
|
</details>
|
|
305
309
|
|
|
306
310
|
---
|
|
307
311
|
|
|
308
312
|
## Available Tools
|
|
309
313
|
|
|
310
|
-
|
|
314
|
+
### Tool Response Notes
|
|
315
|
+
|
|
316
|
+
Both tools return:
|
|
317
|
+
|
|
318
|
+
- `structuredContent` for machine-readable fields
|
|
319
|
+
- `content` blocks that include:
|
|
320
|
+
- a `text` block containing JSON of `structuredContent`
|
|
321
|
+
- a `resource` block with a `file:///...` URI containing the full content (stdio-friendly)
|
|
322
|
+
- a `resource_link` block when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled
|
|
323
|
+
|
|
324
|
+
If content is too large and cache is disabled, the server truncates output and appends `...[truncated]`.
|
|
325
|
+
|
|
326
|
+
---
|
|
311
327
|
|
|
312
328
|
### `fetch-url`
|
|
313
329
|
|
|
314
|
-
Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks.
|
|
330
|
+
Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
|
|
315
331
|
|
|
316
|
-
| Parameter | Type
|
|
317
|
-
| -------------------- |
|
|
318
|
-
| `url` | string
|
|
319
|
-
| `
|
|
320
|
-
| `
|
|
321
|
-
| `
|
|
322
|
-
| `
|
|
323
|
-
| `
|
|
324
|
-
| `
|
|
332
|
+
| Parameter | Type | Default | Description |
|
|
333
|
+
| -------------------- | --------------------- | --------- | --------------------------------------------- |
|
|
334
|
+
| `url` | string | required | URL to fetch |
|
|
335
|
+
| `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
|
|
336
|
+
| `extractMainContent` | boolean | `true` | Use Readability to extract main content |
|
|
337
|
+
| `includeMetadata` | boolean | `true` | Include page metadata |
|
|
338
|
+
| `maxContentLength` | number | - | Maximum content length in characters |
|
|
339
|
+
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
340
|
+
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
341
|
+
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
325
342
|
|
|
326
|
-
**Example
|
|
343
|
+
**Example `structuredContent`:**
|
|
327
344
|
|
|
328
345
|
```json
|
|
329
346
|
{
|
|
330
347
|
"url": "https://example.com/article",
|
|
331
348
|
"title": "Example Article",
|
|
349
|
+
"contentBlocks": 42,
|
|
332
350
|
"fetchedAt": "2025-12-11T10:30:00.000Z",
|
|
333
|
-
"
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
"description": "A sample article"
|
|
338
|
-
},
|
|
339
|
-
{ "type": "heading", "level": 1, "text": "Introduction" },
|
|
340
|
-
{
|
|
341
|
-
"type": "paragraph",
|
|
342
|
-
"text": "This is the main content of the article..."
|
|
343
|
-
},
|
|
344
|
-
{
|
|
345
|
-
"type": "code",
|
|
346
|
-
"language": "javascript",
|
|
347
|
-
"content": "console.log('Hello');"
|
|
348
|
-
}
|
|
349
|
-
],
|
|
350
|
-
"cached": false
|
|
351
|
+
"format": "jsonl",
|
|
352
|
+
"contentSize": 12345,
|
|
353
|
+
"cached": false,
|
|
354
|
+
"content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
|
|
351
355
|
}
|
|
352
356
|
```
|
|
353
357
|
|
|
354
|
-
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
### `fetch-markdown`
|
|
355
361
|
|
|
356
|
-
|
|
362
|
+
Fetches a webpage and converts it to clean Markdown with optional frontmatter.
|
|
357
363
|
|
|
358
|
-
| Parameter
|
|
359
|
-
|
|
|
360
|
-
| `url`
|
|
361
|
-
| `
|
|
362
|
-
| `
|
|
363
|
-
| `
|
|
364
|
-
| `
|
|
365
|
-
| `
|
|
366
|
-
| `
|
|
367
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
|
|
368
|
-
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
364
|
+
| Parameter | Type | Default | Description |
|
|
365
|
+
| -------------------- | ------- | -------- | --------------------------------------------- |
|
|
366
|
+
| `url` | string | required | URL to fetch |
|
|
367
|
+
| `extractMainContent` | boolean | `true` | Extract main content only |
|
|
368
|
+
| `includeMetadata` | boolean | `true` | Include YAML frontmatter |
|
|
369
|
+
| `maxContentLength` | number | - | Maximum content length in characters |
|
|
370
|
+
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
371
|
+
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
372
|
+
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
369
373
|
|
|
370
|
-
**Example
|
|
374
|
+
**Example `structuredContent`:**
|
|
371
375
|
|
|
372
376
|
```json
|
|
373
377
|
{
|
|
374
|
-
"url": "https://example.com/",
|
|
375
|
-
"
|
|
376
|
-
"
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
"text": "About Us",
|
|
380
|
-
"type": "internal"
|
|
381
|
-
},
|
|
382
|
-
{
|
|
383
|
-
"href": "https://github.com/example",
|
|
384
|
-
"text": "GitHub",
|
|
385
|
-
"type": "external"
|
|
386
|
-
},
|
|
387
|
-
{ "href": "https://example.com/logo.png", "text": "", "type": "image" }
|
|
388
|
-
],
|
|
378
|
+
"url": "https://example.com/docs",
|
|
379
|
+
"title": "Documentation",
|
|
380
|
+
"fetchedAt": "2025-12-11T10:30:00.000Z",
|
|
381
|
+
"markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
|
|
382
|
+
"contentSize": 9876,
|
|
389
383
|
"cached": false,
|
|
390
|
-
"truncated": false
|
|
384
|
+
"truncated": false,
|
|
385
|
+
"file": {
|
|
386
|
+
"downloadUrl": "/mcp/downloads/markdown/abc123def456",
|
|
387
|
+
"fileName": "documentation.md",
|
|
388
|
+
"expiresAt": "2025-12-11T11:30:00.000Z"
|
|
389
|
+
}
|
|
391
390
|
}
|
|
392
391
|
```
|
|
393
392
|
|
|
394
|
-
|
|
393
|
+
`file` is included only in HTTP mode when content is cached and too large to inline.
|
|
395
394
|
|
|
396
|
-
|
|
395
|
+
---
|
|
397
396
|
|
|
398
|
-
|
|
399
|
-
| -------------------- | ------- | ---------- | -------------------------------------------- |
|
|
400
|
-
| `url` | string | _required_ | URL to fetch |
|
|
401
|
-
| `extractMainContent` | boolean | `true` | Extract main content only |
|
|
402
|
-
| `includeMetadata` | boolean | `true` | Include YAML frontmatter |
|
|
403
|
-
| `maxContentLength` | number | – | Maximum content length in characters |
|
|
404
|
-
| `generateToc` | boolean | `false` | Generate table of contents from headings |
|
|
405
|
-
| `customHeaders` | object | – | Custom HTTP headers for the request |
|
|
406
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
|
|
407
|
-
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
397
|
+
### Large Content Handling
|
|
408
398
|
|
|
409
|
-
|
|
399
|
+
- Inline limit: `MAX_INLINE_CONTENT_CHARS` (default `20000`).
|
|
400
|
+
- If content exceeds the limit and cache is enabled, responses include `resourceUri` and a `resource_link` block.
|
|
401
|
+
- If cache is disabled, content is truncated with `...[truncated]`.
|
|
402
|
+
- Use `maxContentLength` per request to enforce a lower limit.
|
|
410
403
|
|
|
411
|
-
|
|
412
|
-
{
|
|
413
|
-
"url": "https://example.com/docs",
|
|
414
|
-
"title": "Documentation",
|
|
415
|
-
"fetchedAt": "2025-12-11T10:30:00.000Z",
|
|
416
|
-
"markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome to our documentation...\n\n## Installation\n\n```bash\nnpm install example\n```",
|
|
417
|
-
"toc": [
|
|
418
|
-
{ "level": 1, "text": "Getting Started", "slug": "getting-started" },
|
|
419
|
-
{ "level": 2, "text": "Installation", "slug": "installation" }
|
|
420
|
-
],
|
|
421
|
-
"cached": false,
|
|
422
|
-
"truncated": false
|
|
423
|
-
}
|
|
424
|
-
````
|
|
404
|
+
---
|
|
425
405
|
|
|
426
|
-
|
|
406
|
+
## Resources
|
|
427
407
|
|
|
428
|
-
|
|
408
|
+
| URI | Description |
|
|
409
|
+
| ------------------------------------------ | ----------------------------------------------------- |
|
|
410
|
+
| `superfetch://health` | Real-time server health and memory checks |
|
|
411
|
+
| `superfetch://stats` | Server stats and cache metrics |
|
|
412
|
+
| `superfetch://cache/list` | List cached entries and their resource URIs |
|
|
413
|
+
| `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`) |
|
|
429
414
|
|
|
430
|
-
|
|
431
|
-
| -------------------- | -------- | ---------- | -------------------------------------------- |
|
|
432
|
-
| `urls` | string[] | _required_ | Array of URLs to fetch (1-10 URLs) |
|
|
433
|
-
| `extractMainContent` | boolean | `true` | Use Readability to extract main content |
|
|
434
|
-
| `includeMetadata` | boolean | `true` | Include page metadata |
|
|
435
|
-
| `maxContentLength` | number | – | Maximum content length per URL in characters |
|
|
436
|
-
| `format` | string | `'jsonl'` | Output format: `'jsonl'` or `'markdown'` |
|
|
437
|
-
| `concurrency` | number | `3` | Maximum concurrent requests (1-5) |
|
|
438
|
-
| `continueOnError` | boolean | `true` | Continue processing if some URLs fail |
|
|
439
|
-
| `customHeaders` | object | – | Custom HTTP headers for all requests |
|
|
440
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-60000) |
|
|
441
|
-
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
415
|
+
Resource subscriptions notify clients when cache entries update.
|
|
442
416
|
|
|
443
|
-
|
|
417
|
+
---
|
|
444
418
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
"cached": false
|
|
454
|
-
},
|
|
455
|
-
{
|
|
456
|
-
"url": "https://example.org",
|
|
457
|
-
"success": true,
|
|
458
|
-
"title": "Example Org",
|
|
459
|
-
"content": "...",
|
|
460
|
-
"cached": false
|
|
461
|
-
}
|
|
462
|
-
],
|
|
463
|
-
"summary": {
|
|
464
|
-
"total": 2,
|
|
465
|
-
"successful": 2,
|
|
466
|
-
"failed": 0,
|
|
467
|
-
"cached": 0,
|
|
468
|
-
"totalContentBlocks": 15
|
|
469
|
-
},
|
|
470
|
-
"fetchedAt": "2024-12-11T10:30:00.000Z"
|
|
471
|
-
}
|
|
419
|
+
## Download Endpoint (HTTP Mode)
|
|
420
|
+
|
|
421
|
+
When running in HTTP mode, cached content can be downloaded directly.
|
|
422
|
+
|
|
423
|
+
### Endpoint
|
|
424
|
+
|
|
425
|
+
```text
|
|
426
|
+
GET /mcp/downloads/:namespace/:hash
|
|
472
427
|
```
|
|
473
428
|
|
|
474
|
-
|
|
429
|
+
- `namespace`: `markdown` or `url`
|
|
430
|
+
- Auth required (`Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`)
|
|
431
|
+
|
|
432
|
+
### Response Headers
|
|
433
|
+
|
|
434
|
+
| Header | Value |
|
|
435
|
+
| --------------------- | ----------------------------------------------------------------------- |
|
|
436
|
+
| `Content-Type` | `text/markdown; charset=utf-8` or `application/x-ndjson; charset=utf-8` |
|
|
437
|
+
| `Content-Disposition` | `attachment; filename="<name>"` |
|
|
438
|
+
| `Cache-Control` | `private, max-age=<CACHE_TTL>` |
|
|
475
439
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
440
|
+
### Example Usage
|
|
441
|
+
|
|
442
|
+
```bash
|
|
443
|
+
curl -H "Authorization: Bearer $API_KEY" \
|
|
444
|
+
http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
|
|
445
|
+
-o article.md
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Error Responses
|
|
449
|
+
|
|
450
|
+
| Status | Code | Description |
|
|
451
|
+
| ------ | --------------------- | -------------------------------- |
|
|
452
|
+
| 400 | `BAD_REQUEST` | Invalid namespace or hash format |
|
|
453
|
+
| 404 | `NOT_FOUND` | Content not found or expired |
|
|
454
|
+
| 503 | `SERVICE_UNAVAILABLE` | Download service disabled |
|
|
481
455
|
|
|
482
456
|
---
|
|
483
457
|
|
|
484
458
|
## Configuration
|
|
485
459
|
|
|
460
|
+
Configure SuperFetch behavior by adding environment variables to your MCP client configuration's `env` property.
|
|
461
|
+
|
|
462
|
+
### Fetcher Settings
|
|
463
|
+
|
|
464
|
+
| Variable | Default | Valid Values | Description |
|
|
465
|
+
| --------------- | -------------------- | -------------------- | ------------------------------- |
|
|
466
|
+
| `FETCH_TIMEOUT` | `30000` | `5000`-`120000` | Request timeout in milliseconds |
|
|
467
|
+
| `USER_AGENT` | `superFetch-MCP/1.0` | Any valid user agent | Custom user agent |
|
|
468
|
+
|
|
469
|
+
### Cache Settings
|
|
470
|
+
|
|
471
|
+
| Variable | Default | Valid Values | Description |
|
|
472
|
+
| ---------------- | ------- | ---------------- | ------------------------- |
|
|
473
|
+
| `CACHE_ENABLED` | `true` | `true` / `false` | Enable response caching |
|
|
474
|
+
| `CACHE_TTL` | `3600` | `60`-`86400` | Cache lifetime in seconds |
|
|
475
|
+
| `CACHE_MAX_KEYS` | `100` | `10`-`1000` | Maximum cached entries |
|
|
476
|
+
|
|
477
|
+
### Output Settings
|
|
478
|
+
|
|
479
|
+
| Variable | Default | Valid Values | Description |
|
|
480
|
+
| -------------------------- | ------- | --------------- | ----------------------------------------- |
|
|
481
|
+
| `MAX_INLINE_CONTENT_CHARS` | `20000` | `1000`-`200000` | Inline content limit before resource_link |
|
|
482
|
+
|
|
483
|
+
### Logging Settings
|
|
484
|
+
|
|
485
|
+
| Variable | Default | Valid Values | Description |
|
|
486
|
+
| ---------------- | ------- | ----------------------------------- | ---------------------- |
|
|
487
|
+
| `LOG_LEVEL` | `info` | `debug` / `info` / `warn` / `error` | Logging verbosity |
|
|
488
|
+
| `ENABLE_LOGGING` | `true` | `true` / `false` | Enable/disable logging |
|
|
489
|
+
|
|
490
|
+
### Extraction Settings
|
|
491
|
+
|
|
492
|
+
| Variable | Default | Valid Values | Description |
|
|
493
|
+
| ---------------------- | ------- | ---------------- | --------------------------------------- |
|
|
494
|
+
| `EXTRACT_MAIN_CONTENT` | `true` | `true` / `false` | Use Readability to extract main content |
|
|
495
|
+
| `INCLUDE_METADATA` | `true` | `true` / `false` | Include metadata/frontmatter |
|
|
496
|
+
|
|
497
|
+
### HTTP Server Settings
|
|
498
|
+
|
|
499
|
+
| Variable | Default | Description |
|
|
500
|
+
| ------------------------- | ----------- | -------------------------------------------- |
|
|
501
|
+
| `API_KEY` | - | **Required for HTTP mode** |
|
|
502
|
+
| `HOST` | `127.0.0.1` | HTTP server host |
|
|
503
|
+
| `PORT` | `3000` | HTTP server port |
|
|
504
|
+
| `ALLOW_REMOTE` | `false` | Allow binding to non-loopback interfaces |
|
|
505
|
+
| `TRUST_PROXY` | `false` | Trust proxy headers for client IP resolution |
|
|
506
|
+
| `SESSION_TTL_MS` | `1800000` | Session TTL in milliseconds (30 min) |
|
|
507
|
+
| `SESSION_INIT_TIMEOUT_MS` | `10000` | Time allowed for session initialization |
|
|
508
|
+
| `MAX_SESSIONS` | `200` | Maximum active sessions |
|
|
509
|
+
|
|
510
|
+
### CORS Settings
|
|
511
|
+
|
|
512
|
+
| Variable | Default | Description |
|
|
513
|
+
| ----------------- | ------- | --------------------------------------- |
|
|
514
|
+
| `ALLOWED_ORIGINS` | `[]` | Comma-separated list of allowed origins |
|
|
515
|
+
| `CORS_ALLOW_ALL` | `false` | Allow all origins (dev only) |
|
|
516
|
+
|
|
517
|
+
### Rate Limiting
|
|
518
|
+
|
|
519
|
+
| Variable | Default | Valid Values | Description |
|
|
520
|
+
| ----------------------- | ------- | ----------------- | ------------------------------------ |
|
|
521
|
+
| `RATE_LIMIT_ENABLED` | `true` | `true` / `false` | Enable/disable HTTP rate limiting |
|
|
522
|
+
| `RATE_LIMIT_MAX` | `100` | `1`-`10000` | Max requests per window per IP |
|
|
523
|
+
| `RATE_LIMIT_WINDOW_MS` | `60000` | `1000`-`3600000` | Rate limit window in milliseconds |
|
|
524
|
+
| `RATE_LIMIT_CLEANUP_MS` | `60000` | `10000`-`3600000` | Cleanup interval for limiter entries |
|
|
525
|
+
|
|
486
526
|
### Configuration Presets
|
|
487
527
|
|
|
488
528
|
<details open>
|
|
489
|
-
<summary><strong>Default (Recommended)</strong>
|
|
529
|
+
<summary><strong>Default (Recommended)</strong> - No configuration needed</summary>
|
|
490
530
|
|
|
491
531
|
```json
|
|
492
532
|
{
|
|
@@ -502,9 +542,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
|
|
|
502
542
|
</details>
|
|
503
543
|
|
|
504
544
|
<details>
|
|
505
|
-
<summary><strong>Debug Mode</strong>
|
|
506
|
-
|
|
507
|
-
**VS Code** (`.vscode/mcp.json`):
|
|
545
|
+
<summary><strong>Debug Mode</strong> - Verbose logging and no cache</summary>
|
|
508
546
|
|
|
509
547
|
```json
|
|
510
548
|
{
|
|
@@ -521,44 +559,10 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
|
|
|
521
559
|
}
|
|
522
560
|
```
|
|
523
561
|
|
|
524
|
-
**Claude Desktop** (`claude_desktop_config.json`):
|
|
525
|
-
|
|
526
|
-
```json
|
|
527
|
-
{
|
|
528
|
-
"mcpServers": {
|
|
529
|
-
"superFetch": {
|
|
530
|
-
"command": "npx",
|
|
531
|
-
"args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
|
|
532
|
-
"env": {
|
|
533
|
-
"LOG_LEVEL": "debug",
|
|
534
|
-
"CACHE_ENABLED": "false"
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
```
|
|
540
|
-
|
|
541
|
-
**Cursor** (MCP settings):
|
|
542
|
-
|
|
543
|
-
```json
|
|
544
|
-
{
|
|
545
|
-
"mcpServers": {
|
|
546
|
-
"superFetch": {
|
|
547
|
-
"command": "npx",
|
|
548
|
-
"args": ["-y", "@j0hanz/superfetch@latest", "--stdio"],
|
|
549
|
-
"env": {
|
|
550
|
-
"LOG_LEVEL": "debug",
|
|
551
|
-
"CACHE_ENABLED": "false"
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
```
|
|
557
|
-
|
|
558
562
|
</details>
|
|
559
563
|
|
|
560
564
|
<details>
|
|
561
|
-
<summary><strong>Performance Mode</strong>
|
|
565
|
+
<summary><strong>Performance Mode</strong> - Aggressive caching for speed</summary>
|
|
562
566
|
|
|
563
567
|
```json
|
|
564
568
|
{
|
|
@@ -579,7 +583,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
|
|
|
579
583
|
</details>
|
|
580
584
|
|
|
581
585
|
<details>
|
|
582
|
-
<summary><strong>Custom User Agent</strong>
|
|
586
|
+
<summary><strong>Custom User Agent</strong> - For sites that block bots</summary>
|
|
583
587
|
|
|
584
588
|
```json
|
|
585
589
|
{
|
|
@@ -598,7 +602,7 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
|
|
|
598
602
|
</details>
|
|
599
603
|
|
|
600
604
|
<details>
|
|
601
|
-
<summary><strong>Slow Networks / CI
|
|
605
|
+
<summary><strong>Slow Networks / CI</strong> - Extended timeouts</summary>
|
|
602
606
|
|
|
603
607
|
```json
|
|
604
608
|
{
|
|
@@ -618,118 +622,17 @@ Fetches multiple URLs in parallel with concurrency control. Ideal for comparing
|
|
|
618
622
|
|
|
619
623
|
</details>
|
|
620
624
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
Configure SuperFetch behavior by adding environment variables to your MCP client configuration's `env` property.
|
|
624
|
-
|
|
625
|
-
#### 🌐 Fetcher Settings
|
|
626
|
-
|
|
627
|
-
| Variable | Default | Valid Values | Description |
|
|
628
|
-
| --------------- | -------------------- | -------------------- | --------------------------------------------------------------- |
|
|
629
|
-
| `FETCH_TIMEOUT` | `30000` | `5000`-`120000` | Request timeout in milliseconds (5s-2min) |
|
|
630
|
-
| `USER_AGENT` | `superFetch-MCP/1.0` | Any valid user agent | Custom user agent for requests (useful for sites blocking bots) |
|
|
631
|
-
|
|
632
|
-
#### 💾 Cache Settings
|
|
633
|
-
|
|
634
|
-
| Variable | Default | Valid Values | Description |
|
|
635
|
-
| ---------------- | ------- | ---------------- | -------------------------------------- |
|
|
636
|
-
| `CACHE_ENABLED` | `true` | `true` / `false` | Enable response caching |
|
|
637
|
-
| `CACHE_TTL` | `3600` | `60`-`86400` | Cache lifetime in seconds (1min-24hrs) |
|
|
638
|
-
| `CACHE_MAX_KEYS` | `100` | `10`-`1000` | Maximum number of cached entries |
|
|
639
|
-
|
|
640
|
-
#### 📦 Output Settings
|
|
641
|
-
|
|
642
|
-
| Variable | Default | Valid Values | Description |
|
|
643
|
-
| -------------------------- | ------- | --------------- | --------------------------------------------------------------- |
|
|
644
|
-
| `MAX_INLINE_CONTENT_CHARS` | `20000` | `1000`-`200000` | Inline content limit before returning a `resource_link` instead |
|
|
645
|
-
|
|
646
|
-
#### 📝 Logging Settings
|
|
647
|
-
|
|
648
|
-
| Variable | Default | Valid Values | Description |
|
|
649
|
-
| ---------------- | ------- | ----------------------------------- | -------------------------- |
|
|
650
|
-
| `LOG_LEVEL` | `info` | `debug` / `info` / `warn` / `error` | Logging verbosity level |
|
|
651
|
-
| `ENABLE_LOGGING` | `true` | `true` / `false` | Enable/disable all logging |
|
|
652
|
-
|
|
653
|
-
#### 🔍 Extraction Settings
|
|
654
|
-
|
|
655
|
-
| Variable | Default | Valid Values | Description |
|
|
656
|
-
| ---------------------- | ------- | ---------------- | -------------------------------------------------- |
|
|
657
|
-
| `EXTRACT_MAIN_CONTENT` | `true` | `true` / `false` | Use Mozilla Readability to extract main content |
|
|
658
|
-
| `INCLUDE_METADATA` | `true` | `true` / `false` | Include page metadata (title, description, author) |
|
|
659
|
-
|
|
660
|
-
#### 🛡️ Security Settings
|
|
661
|
-
|
|
662
|
-
| Variable | Default | Description |
|
|
663
|
-
| -------------- | ------- | -------------------------------------------------------- |
|
|
664
|
-
| `API_KEY` | - | API Key for HTTP authentication (required for HTTP mode) |
|
|
665
|
-
| `ALLOW_REMOTE` | `false` | Allow binding to non-loopback interfaces |
|
|
666
|
-
|
|
667
|
-
#### Rate Limiting
|
|
668
|
-
|
|
669
|
-
| Variable | Default | Valid Values | Description |
|
|
670
|
-
| ----------------------- | ------- | ----------------- | ------------------------------------ |
|
|
671
|
-
| `RATE_LIMIT_ENABLED` | `true` | `true` / `false` | Enable/disable HTTP rate limiting |
|
|
672
|
-
| `RATE_LIMIT_MAX` | `100` | `1`-`10000` | Max requests per window per IP |
|
|
673
|
-
| `RATE_LIMIT_WINDOW_MS` | `60000` | `1000`-`3600000` | Rate limit window in milliseconds |
|
|
674
|
-
| `RATE_LIMIT_CLEANUP_MS` | `60000` | `10000`-`3600000` | Cleanup interval for limiter entries |
|
|
675
|
-
|
|
676
|
-
### HTTP Mode Configuration
|
|
677
|
-
|
|
678
|
-
<details>
|
|
679
|
-
<summary><strong>HTTP Mode</strong> (Advanced) — For running as a standalone HTTP server</summary>
|
|
680
|
-
|
|
681
|
-
SuperFetch can run as an HTTP server for custom integrations. HTTP mode requires additional configuration and an `API_KEY` for authenticated access (send `Authorization: Bearer <key>` or `X-API-Key: <key>`).
|
|
682
|
-
|
|
683
|
-
#### Start HTTP Server
|
|
684
|
-
|
|
685
|
-
```bash
|
|
686
|
-
npx -y @j0hanz/superfetch@latest
|
|
687
|
-
# Server runs at http://127.0.0.1:3000
|
|
688
|
-
```
|
|
689
|
-
|
|
690
|
-
#### HTTP-Specific Environment Variables
|
|
691
|
-
|
|
692
|
-
| Variable | Default | Description |
|
|
693
|
-
| ------------------------- | ----------- | ------------------------------------------------ |
|
|
694
|
-
| `PORT` | `3000` | HTTP server port |
|
|
695
|
-
| `HOST` | `127.0.0.1` | HTTP server host (`0.0.0.0` for Docker/K8s) |
|
|
696
|
-
| `ALLOWED_ORIGINS` | `[]` | Comma-separated CORS origins |
|
|
697
|
-
| `CORS_ALLOW_ALL` | `false` | Allow all CORS origins (dev only, security risk) |
|
|
698
|
-
| `SESSION_TTL_MS` | `1800000` | Session time-to-live in milliseconds (30 mins) |
|
|
699
|
-
| `SESSION_INIT_TIMEOUT_MS` | `10000` | Time allowed for session initialization (ms) |
|
|
700
|
-
| `MAX_SESSIONS` | `200` | Maximum number of active sessions |
|
|
701
|
-
|
|
702
|
-
#### VS Code HTTP Mode Setup
|
|
703
|
-
|
|
704
|
-
```json
|
|
705
|
-
{
|
|
706
|
-
"servers": {
|
|
707
|
-
"superFetch": {
|
|
708
|
-
"type": "http",
|
|
709
|
-
"url": "http://127.0.0.1:3000/mcp"
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
```
|
|
714
|
-
|
|
715
|
-
#### Docker/Kubernetes Example
|
|
625
|
+
---
|
|
716
626
|
|
|
717
|
-
|
|
718
|
-
PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@latest
|
|
719
|
-
```
|
|
627
|
+
## HTTP Mode Details
|
|
720
628
|
|
|
721
|
-
|
|
629
|
+
HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
|
|
722
630
|
|
|
723
|
-
|
|
631
|
+
1. `POST /mcp` with an `initialize` request and **no** `mcp-session-id` header.
|
|
632
|
+
2. The server returns `mcp-session-id` in the response headers.
|
|
633
|
+
3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
|
|
724
634
|
|
|
725
|
-
|
|
726
|
-
| ---------------------------- | -------------------------------------------------------------- |
|
|
727
|
-
| 🐛 **Debugging issues** | `LOG_LEVEL=debug`, `CACHE_ENABLED=false` |
|
|
728
|
-
| 🚀 **Maximum performance** | `CACHE_TTL=7200`, `CACHE_MAX_KEYS=500`, `LOG_LEVEL=error` |
|
|
729
|
-
| 🌐 **Slow target sites** | `FETCH_TIMEOUT=60000` |
|
|
730
|
-
| 🤖 **Bypass bot detection** | `USER_AGENT="Mozilla/5.0 (compatible; MyBot/1.0)"` |
|
|
731
|
-
| 🔄 **CI/CD (always fresh)** | `CACHE_ENABLED=false`, `FETCH_TIMEOUT=60000`, `LOG_LEVEL=warn` |
|
|
732
|
-
| 📊 **Production monitoring** | `LOG_LEVEL=warn` or `error` |
|
|
635
|
+
If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
|
|
733
636
|
|
|
734
637
|
---
|
|
735
638
|
|
|
@@ -737,15 +640,16 @@ PORT=8080 HOST=0.0.0.0 ALLOWED_ORIGINS=https://myapp.com npx @j0hanz/superfetch@
|
|
|
737
640
|
|
|
738
641
|
JSONL output includes semantic content blocks:
|
|
739
642
|
|
|
740
|
-
| Type
|
|
741
|
-
|
|
|
742
|
-
| `metadata`
|
|
743
|
-
| `heading`
|
|
744
|
-
| `paragraph`
|
|
745
|
-
| `list`
|
|
746
|
-
| `code`
|
|
747
|
-
| `table`
|
|
748
|
-
| `image`
|
|
643
|
+
| Type | Description |
|
|
644
|
+
| ------------ | ---------------------------------------- |
|
|
645
|
+
| `metadata` | Minimal page metadata (type, title, url) |
|
|
646
|
+
| `heading` | Headings (h1-h6) with level indicator |
|
|
647
|
+
| `paragraph` | Text paragraphs |
|
|
648
|
+
| `list` | Ordered/unordered lists |
|
|
649
|
+
| `code` | Code blocks with optional language |
|
|
650
|
+
| `table` | Tables with headers and rows |
|
|
651
|
+
| `image` | Images with src and alt text |
|
|
652
|
+
| `blockquote` | Block quote text |
|
|
749
653
|
|
|
750
654
|
---
|
|
751
655
|
|
|
@@ -753,12 +657,19 @@ JSONL output includes semantic content blocks:
|
|
|
753
657
|
|
|
754
658
|
### SSRF Protection
|
|
755
659
|
|
|
756
|
-
Blocked destinations:
|
|
660
|
+
Blocked destinations include:
|
|
757
661
|
|
|
758
662
|
- Localhost and loopback addresses
|
|
759
663
|
- Private IP ranges (`10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`)
|
|
760
664
|
- Cloud metadata endpoints (AWS, GCP, Azure)
|
|
761
665
|
- IPv6 link-local and unique local addresses
|
|
666
|
+
- Internal suffixes such as `.local` and `.internal`
|
|
667
|
+
|
|
668
|
+
### URL Validation
|
|
669
|
+
|
|
670
|
+
- Only `http` and `https` URLs
|
|
671
|
+
- No embedded credentials in URLs
|
|
672
|
+
- Max URL length: 2048 characters
|
|
762
673
|
|
|
763
674
|
### Header Sanitization
|
|
764
675
|
|
|
@@ -766,21 +677,7 @@ Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-i
|
|
|
766
677
|
|
|
767
678
|
### Rate Limiting
|
|
768
679
|
|
|
769
|
-
Default: **100 requests/minute** per IP. Configure with `RATE_LIMIT_MAX` and
|
|
770
|
-
`RATE_LIMIT_WINDOW_MS`.
|
|
771
|
-
|
|
772
|
-
### HTTP Mode Endpoints
|
|
773
|
-
|
|
774
|
-
When running without `--stdio`, the following endpoints are available:
|
|
775
|
-
|
|
776
|
-
| Endpoint | Method | Description |
|
|
777
|
-
| --------- | ------ | --------------------------------------- |
|
|
778
|
-
| `/health` | GET | Health check with uptime and version |
|
|
779
|
-
| `/mcp` | POST | MCP request handling (requires session) |
|
|
780
|
-
| `/mcp` | GET | SSE stream for notifications |
|
|
781
|
-
| `/mcp` | DELETE | Close session |
|
|
782
|
-
|
|
783
|
-
Sessions are managed via `mcp-session-id` header with 30-minute TTL.
|
|
680
|
+
Default: **100 requests/minute** per IP (HTTP mode only). Configure with `RATE_LIMIT_MAX` and `RATE_LIMIT_WINDOW_MS`.
|
|
784
681
|
|
|
785
682
|
---
|
|
786
683
|
|
|
@@ -807,14 +704,13 @@ Sessions are managed via `mcp-session-id` header with 30-minute TTL.
|
|
|
807
704
|
|
|
808
705
|
| Category | Technology |
|
|
809
706
|
| ------------------ | --------------------------------- |
|
|
810
|
-
| Runtime | Node.js
|
|
707
|
+
| Runtime | Node.js >=20.12 |
|
|
811
708
|
| Language | TypeScript 5.9 |
|
|
812
709
|
| MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
|
|
813
710
|
| Content Extraction | @mozilla/readability ^0.6.0 |
|
|
814
711
|
| HTML Parsing | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
|
|
815
712
|
| Markdown | Turndown ^7.2.2 |
|
|
816
|
-
| HTTP | Express ^5.2.1,
|
|
817
|
-
| Caching | node-cache ^5.1.2 |
|
|
713
|
+
| HTTP | Express ^5.2.1, undici ^6.22.0 |
|
|
818
714
|
| Validation | Zod ^3.24.1 |
|
|
819
715
|
|
|
820
716
|
---
|