@j0hanz/superfetch 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -152
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +25 -59
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +98 -26
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +0 -1
- package/dist/http/mcp-routes.js +43 -30
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session.js +60 -73
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +5 -2
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +2 -9
- package/dist/http/server-middleware.js +96 -43
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.js +52 -64
- package/dist/http/session-cleanup.js +1 -1
- package/dist/middleware/error-handler.js +1 -3
- package/dist/resources/cached-content.js +50 -108
- package/dist/resources/index.js +0 -82
- package/dist/server.js +51 -30
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +1 -7
- package/dist/services/cache.js +53 -119
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +10 -82
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +34 -95
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +30 -13
- package/dist/services/fetcher/redirects.js +4 -3
- package/dist/services/fetcher/response.js +66 -31
- package/dist/services/fetcher.d.ts +1 -3
- package/dist/services/fetcher.js +14 -33
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
- package/dist/tools/handlers/fetch-single.shared.js +44 -87
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +46 -123
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +2 -108
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +52 -0
- package/dist/tools/utils/content-transform.d.ts +2 -17
- package/dist/tools/utils/content-transform.js +120 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +65 -62
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/transformers/markdown.transformer.js +109 -34
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +21 -10
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +11 -38
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +20 -93
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +13 -16
package/README.md
CHANGED
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
|
|
11
11
|
[](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
|
|
12
12
|
|
|
13
|
-
A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly
|
|
13
|
+
A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly Markdown.
|
|
14
14
|
|
|
15
|
-
[Quick Start](#quick-start) | [
|
|
15
|
+
[Quick Start](#quick-start) | [Tool](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
|
|
16
16
|
|
|
17
17
|
> **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
|
|
18
18
|
|
|
@@ -23,45 +23,15 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that f
|
|
|
23
23
|
|
|
24
24
|
## Features
|
|
25
25
|
|
|
26
|
-
| Feature
|
|
27
|
-
|
|
|
28
|
-
| Smart extraction
|
|
29
|
-
|
|
|
30
|
-
|
|
|
31
|
-
| Built-in caching
|
|
32
|
-
| Resilient fetching
|
|
33
|
-
| Security first
|
|
34
|
-
| HTTP mode
|
|
35
|
-
|
|
36
|
-
---
|
|
37
|
-
|
|
38
|
-
## How to Choose a Tool
|
|
39
|
-
|
|
40
|
-
Use this guide to select the right tool for your web content extraction needs.
|
|
41
|
-
|
|
42
|
-
### Decision Tree
|
|
43
|
-
|
|
44
|
-
```text
|
|
45
|
-
Need web content for AI?
|
|
46
|
-
- Want structured JSONL blocks -> fetch-url (format: jsonl)
|
|
47
|
-
- Want clean Markdown -> fetch-markdown
|
|
48
|
-
- Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
### Quick Reference Table
|
|
52
|
-
|
|
53
|
-
| Tool | Best For | Output Format | Use When |
|
|
54
|
-
| ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
|
|
55
|
-
| `fetch-url` | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
|
|
56
|
-
| `fetch-markdown` | Single page in readable format | Markdown + frontmatter | Documentation, summaries, human review |
|
|
57
|
-
|
|
58
|
-
### Common Use Cases
|
|
59
|
-
|
|
60
|
-
| Task | Recommended Tool | Why |
|
|
61
|
-
| ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
|
|
62
|
-
| Parse a blog post for AI | `fetch-url` | Returns semantic blocks (headings, paragraphs, code) |
|
|
63
|
-
| Generate documentation | `fetch-markdown` | Clean markdown with frontmatter |
|
|
64
|
-
| Extract article for RAG | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content |
|
|
26
|
+
| Feature | Description |
|
|
27
|
+
| -------------------- | ------------------------------------------------------------------------------------- |
|
|
28
|
+
| Smart extraction | Mozilla Readability with quality gates to strip boilerplate when it improves results |
|
|
29
|
+
| Clean Markdown | Markdown output with optional YAML frontmatter (title + source) |
|
|
30
|
+
| Raw content handling | Preserves raw markdown/text and rewrites GitHub/GitLab/Bitbucket blob URLs to raw |
|
|
31
|
+
| Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
|
|
32
|
+
| Resilient fetching | Redirect handling with validation, timeouts, and response size limits |
|
|
33
|
+
| Security first | URL validation plus SSRF/DNS/IP blocklists |
|
|
34
|
+
| HTTP mode | Static token or OAuth auth, session management, rate limiting, host/origin validation |
|
|
65
35
|
|
|
66
36
|
---
|
|
67
37
|
|
|
@@ -230,7 +200,7 @@ npm install -g @j0hanz/superfetch
|
|
|
230
200
|
# Run in stdio mode
|
|
231
201
|
superfetch --stdio
|
|
232
202
|
|
|
233
|
-
# Run HTTP server (requires
|
|
203
|
+
# Run HTTP server (requires auth token)
|
|
234
204
|
superfetch
|
|
235
205
|
```
|
|
236
206
|
|
|
@@ -257,7 +227,7 @@ node dist/index.js --stdio
|
|
|
257
227
|
<details>
|
|
258
228
|
<summary><strong>HTTP Mode</strong> (default)</summary>
|
|
259
229
|
|
|
260
|
-
HTTP mode requires `
|
|
230
|
+
HTTP mode requires authentication. By default it binds to `127.0.0.1`. To listen on all interfaces, set `HOST=0.0.0.0` or `HOST=::` and configure OAuth (remote bindings require OAuth). Other non-loopback `HOST` values are rejected.
|
|
261
231
|
|
|
262
232
|
```bash
|
|
263
233
|
API_KEY=supersecret npx -y @j0hanz/superfetch@latest
|
|
@@ -271,7 +241,9 @@ $env:API_KEY = "supersecret"
|
|
|
271
241
|
npx -y @j0hanz/superfetch@latest
|
|
272
242
|
```
|
|
273
243
|
|
|
274
|
-
|
|
244
|
+
For multiple static tokens, set `ACCESS_TOKENS` (comma/space separated).
|
|
245
|
+
|
|
246
|
+
Endpoints (auth required via `Authorization: Bearer <token>`; in static token mode, `X-API-Key` is also accepted):
|
|
275
247
|
|
|
276
248
|
- `GET /health`
|
|
277
249
|
- `POST /mcp`
|
|
@@ -289,111 +261,61 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
|
|
|
289
261
|
|
|
290
262
|
### Tool Response Notes
|
|
291
263
|
|
|
292
|
-
|
|
264
|
+
The tool returns `structuredContent` with `url`, optional `title`, and `markdown` when inline content is available. On errors, `error` is included instead of content.
|
|
293
265
|
|
|
294
|
-
|
|
295
|
-
- `content` blocks that include:
|
|
296
|
-
- a `text` block containing JSON of `structuredContent`
|
|
297
|
-
- in stdio mode, a `resource` block with a `file:///...` URI embedding the full content
|
|
298
|
-
- in HTTP mode, a `resource` block when inline content is available
|
|
299
|
-
- when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled, a `resource_link` block points to `superfetch://cache/...` and `structuredContent.resourceUri` is set
|
|
266
|
+
The response includes:
|
|
300
267
|
|
|
301
|
-
|
|
268
|
+
- a `text` block containing JSON of `structuredContent`
|
|
269
|
+
- a `resource` block embedding markdown when inline content is available (always in stdio mode)
|
|
270
|
+
- when content exceeds the inline limit and cache is enabled, a `resource_link` block pointing to `superfetch://cache/...` (inline markdown may be omitted)
|
|
302
271
|
|
|
303
272
|
---
|
|
304
273
|
|
|
305
274
|
### `fetch-url`
|
|
306
275
|
|
|
307
|
-
Fetches a webpage and converts it to
|
|
308
|
-
|
|
309
|
-
| Parameter | Type | Default | Description |
|
|
310
|
-
| ---------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------ |
|
|
311
|
-
| `url` | string | required | URL to fetch |
|
|
312
|
-
| `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
|
|
313
|
-
| `includeContentBlocks` | boolean | `true` (jsonl), `false` (markdown) | Include content block counts when `format: "markdown"` |
|
|
314
|
-
| `extractMainContent` | boolean | `true` | Use Readability to extract main content |
|
|
315
|
-
| `includeMetadata` | boolean | `true` | Include page metadata |
|
|
316
|
-
| `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
|
|
317
|
-
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
318
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
319
|
-
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
276
|
+
Fetches a webpage and converts it to clean Markdown format with optional frontmatter.
|
|
320
277
|
|
|
321
|
-
|
|
278
|
+
| Parameter | Type | Default | Description |
|
|
279
|
+
| --------- | ------ | -------- | ------------ |
|
|
280
|
+
| `url` | string | required | URL to fetch |
|
|
322
281
|
|
|
323
282
|
**Example `structuredContent`:**
|
|
324
283
|
|
|
325
284
|
```json
|
|
326
285
|
{
|
|
327
|
-
"url": "https://example.com/
|
|
328
|
-
"title": "
|
|
329
|
-
"
|
|
330
|
-
"fetchedAt": "2025-12-11T10:30:00.000Z",
|
|
331
|
-
"format": "jsonl",
|
|
332
|
-
"contentSize": 12345,
|
|
333
|
-
"cached": false,
|
|
334
|
-
"content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
|
|
286
|
+
"url": "https://example.com/docs",
|
|
287
|
+
"title": "Documentation",
|
|
288
|
+
"markdown": "---\ntitle: Documentation\n---\n\n# Getting Started\n\nWelcome..."
|
|
335
289
|
}
|
|
336
290
|
```
|
|
337
291
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
### `fetch-markdown`
|
|
341
|
-
|
|
342
|
-
Fetches a webpage and converts it to clean Markdown with optional frontmatter.
|
|
343
|
-
|
|
344
|
-
| Parameter | Type | Default | Description |
|
|
345
|
-
| -------------------- | ------- | -------- | ---------------------------------------------------- |
|
|
346
|
-
| `url` | string | required | URL to fetch |
|
|
347
|
-
| `extractMainContent` | boolean | `true` | Extract main content only |
|
|
348
|
-
| `includeMetadata` | boolean | `true` | Include YAML frontmatter |
|
|
349
|
-
| `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
|
|
350
|
-
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
351
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
352
|
-
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
353
|
-
|
|
354
|
-
**Example `structuredContent`:**
|
|
292
|
+
**Error response:**
|
|
355
293
|
|
|
356
294
|
```json
|
|
357
295
|
{
|
|
358
|
-
"url": "https://example.com/
|
|
359
|
-
"
|
|
360
|
-
"fetchedAt": "2025-12-11T10:30:00.000Z",
|
|
361
|
-
"markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
|
|
362
|
-
"contentSize": 9876,
|
|
363
|
-
"cached": false,
|
|
364
|
-
"truncated": false,
|
|
365
|
-
"file": {
|
|
366
|
-
"downloadUrl": "/mcp/downloads/markdown/abc123def456",
|
|
367
|
-
"fileName": "documentation.md",
|
|
368
|
-
"expiresAt": "2025-12-11T11:30:00.000Z"
|
|
369
|
-
}
|
|
296
|
+
"url": "https://example.com/broken",
|
|
297
|
+
"error": "Failed to fetch: 404 Not Found"
|
|
370
298
|
}
|
|
371
299
|
```
|
|
372
300
|
|
|
373
|
-
`file` is included only in HTTP mode when content is cached and too large to inline.
|
|
374
|
-
|
|
375
301
|
---
|
|
376
302
|
|
|
377
303
|
### Large Content Handling
|
|
378
304
|
|
|
379
|
-
- Inline
|
|
380
|
-
-
|
|
381
|
-
- If cache is disabled,
|
|
382
|
-
- Use `maxContentLength` per request to enforce a lower limit (hard cap: 5,242,880 characters).
|
|
305
|
+
- Inline markdown is capped at 20,000 characters (`maxInlineContentChars`).
|
|
306
|
+
- **Stdio mode:** full markdown is embedded as a `resource` block.
|
|
307
|
+
- **HTTP mode:** if content exceeds the inline limit and cache is enabled, the response includes a `resource_link` to `superfetch://cache/...` (no embedded markdown). If cache is disabled, the inline markdown is truncated with `...[truncated]`.
|
|
383
308
|
- Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
|
|
384
309
|
|
|
385
310
|
---
|
|
386
311
|
|
|
387
312
|
## Resources
|
|
388
313
|
|
|
389
|
-
| URI | Description
|
|
390
|
-
| ------------------------------------------ |
|
|
391
|
-
| `superfetch://
|
|
392
|
-
| `superfetch://stats` | Server stats and cache metrics |
|
|
393
|
-
| `superfetch://cache/list` | List cached entries and their resource URIs |
|
|
394
|
-
| `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`; `links` is reserved) |
|
|
314
|
+
| URI | Description |
|
|
315
|
+
| ------------------------------------------ | ---------------------------------------------- |
|
|
316
|
+
| `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `markdown`) |
|
|
395
317
|
|
|
396
|
-
Resource subscriptions notify clients when cache entries update.
|
|
318
|
+
Resource listings enumerate cached entries, and subscriptions notify clients when cache entries update.
|
|
397
319
|
|
|
398
320
|
---
|
|
399
321
|
|
|
@@ -407,21 +329,21 @@ When running in HTTP mode, cached content can be downloaded directly. Downloads
|
|
|
407
329
|
GET /mcp/downloads/:namespace/:hash
|
|
408
330
|
```
|
|
409
331
|
|
|
410
|
-
- `namespace`: `markdown`
|
|
411
|
-
- Auth required (`Authorization: Bearer <
|
|
332
|
+
- `namespace`: `markdown`
|
|
333
|
+
- Auth required (`Authorization: Bearer <token>`; in static token mode, `X-API-Key` is accepted)
|
|
412
334
|
|
|
413
335
|
### Response Headers
|
|
414
336
|
|
|
415
|
-
| Header | Value
|
|
416
|
-
| --------------------- |
|
|
417
|
-
| `Content-Type` | `text/markdown; charset=utf-8`
|
|
418
|
-
| `Content-Disposition` | `attachment; filename="<name>"`
|
|
419
|
-
| `Cache-Control` | `private, max-age=<CACHE_TTL>`
|
|
337
|
+
| Header | Value |
|
|
338
|
+
| --------------------- | ------------------------------- |
|
|
339
|
+
| `Content-Type` | `text/markdown; charset=utf-8` |
|
|
340
|
+
| `Content-Disposition` | `attachment; filename="<name>"` |
|
|
341
|
+
| `Cache-Control` | `private, max-age=<CACHE_TTL>` |
|
|
420
342
|
|
|
421
343
|
### Example Usage
|
|
422
344
|
|
|
423
345
|
```bash
|
|
424
|
-
curl -H "Authorization: Bearer $
|
|
346
|
+
curl -H "Authorization: Bearer $TOKEN" \
|
|
425
347
|
http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
|
|
426
348
|
-o article.md
|
|
427
349
|
```
|
|
@@ -438,7 +360,65 @@ curl -H "Authorization: Bearer $API_KEY" \
|
|
|
438
360
|
|
|
439
361
|
## Configuration
|
|
440
362
|
|
|
441
|
-
|
|
363
|
+
Set environment variables in your MCP client `env` or in the shell before starting the server.
|
|
364
|
+
|
|
365
|
+
### Core Server Settings
|
|
366
|
+
|
|
367
|
+
| Variable | Default | Description |
|
|
368
|
+
| --------------- | -------------------- | ------------------------------------------------------------- |
|
|
369
|
+
| `HOST` | `127.0.0.1` | HTTP bind address |
|
|
370
|
+
| `PORT` | `3000` | HTTP server port (1024-65535) |
|
|
371
|
+
| `USER_AGENT` | `superFetch-MCP/2.0` | User-Agent header for outgoing requests |
|
|
372
|
+
| `CACHE_ENABLED` | `true` | Enable response caching |
|
|
373
|
+
| `CACHE_TTL` | `3600` | Cache TTL in seconds (60-86400) |
|
|
374
|
+
| `LOG_LEVEL` | `info` | `debug`, `info`, `warn`, `error` |
|
|
375
|
+
| `ALLOWED_HOSTS` | (empty) | Additional allowed Host/Origin values (comma/space separated) |
|
|
376
|
+
|
|
377
|
+
### Auth (HTTP Mode)
|
|
378
|
+
|
|
379
|
+
| Variable | Default | Description |
|
|
380
|
+
| --------------- | ------- | ------------------------------------------------------------ |
|
|
381
|
+
| `AUTH_MODE` | auto | `static` or `oauth`. Auto-selects OAuth if any OAUTH URL set |
|
|
382
|
+
| `ACCESS_TOKENS` | (empty) | Comma/space-separated static bearer tokens |
|
|
383
|
+
| `API_KEY` | (empty) | Adds a static bearer token and enables `X-API-Key` header |
|
|
384
|
+
|
|
385
|
+
Static mode requires at least one token (`ACCESS_TOKENS` or `API_KEY`).
|
|
386
|
+
|
|
387
|
+
### OAuth (HTTP Mode)
|
|
388
|
+
|
|
389
|
+
Required when `AUTH_MODE=oauth` (or auto-selected by presence of OAuth URLs):
|
|
390
|
+
|
|
391
|
+
| Variable | Default | Description |
|
|
392
|
+
| ------------------------- | ------- | ---------------------- |
|
|
393
|
+
| `OAUTH_ISSUER_URL` | - | OAuth issuer |
|
|
394
|
+
| `OAUTH_AUTHORIZATION_URL` | - | Authorization endpoint |
|
|
395
|
+
| `OAUTH_TOKEN_URL` | - | Token endpoint |
|
|
396
|
+
| `OAUTH_INTROSPECTION_URL` | - | Introspection endpoint |
|
|
397
|
+
|
|
398
|
+
Optional:
|
|
399
|
+
|
|
400
|
+
| Variable | Default | Description |
|
|
401
|
+
| -------------------------------- | -------------------------- | --------------------------------------- |
|
|
402
|
+
| `OAUTH_REVOCATION_URL` | - | Revocation endpoint |
|
|
403
|
+
| `OAUTH_REGISTRATION_URL` | - | Dynamic client registration endpoint |
|
|
404
|
+
| `OAUTH_RESOURCE_URL` | `http://<host>:<port>/mcp` | Protected resource URL |
|
|
405
|
+
| `OAUTH_REQUIRED_SCOPES` | (empty) | Required scopes (comma/space separated) |
|
|
406
|
+
| `OAUTH_CLIENT_ID` | - | Client ID for introspection |
|
|
407
|
+
| `OAUTH_CLIENT_SECRET` | - | Client secret for introspection |
|
|
408
|
+
| `OAUTH_INTROSPECTION_TIMEOUT_MS` | `5000` | Introspection timeout (1000-30000) |
|
|
409
|
+
|
|
410
|
+
### Fixed Limits (Not Configurable via env)
|
|
411
|
+
|
|
412
|
+
- Fetch timeout: 15 seconds
|
|
413
|
+
- Max redirects: 5
|
|
414
|
+
- Max HTML response size: 10 MB
|
|
415
|
+
- Inline markdown limit: 20,000 characters
|
|
416
|
+
- Cache max entries: 100
|
|
417
|
+
- Session TTL: 30 minutes
|
|
418
|
+
- Max sessions: 200
|
|
419
|
+
- Rate limit: 100 req/min per IP (60s window)
|
|
420
|
+
|
|
421
|
+
See `CONFIGURATION.md` for preset examples and quick-start snippets.
|
|
442
422
|
|
|
443
423
|
---
|
|
444
424
|
|
|
@@ -450,28 +430,13 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
|
|
|
450
430
|
2. The server returns `mcp-session-id` in the response headers.
|
|
451
431
|
3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
|
|
452
432
|
|
|
453
|
-
`
|
|
433
|
+
If the `mcp-protocol-version` header is missing, the server defaults it to `2025-03-26`. Supported versions are `2025-03-26` and `2025-11-25`.
|
|
454
434
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
Host header validation is always enforced in HTTP mode. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send. If an `Origin` header is present, it must be allowed by `ALLOWED_ORIGINS` or `CORS_ALLOW_ALL`.
|
|
458
|
-
|
|
459
|
-
---
|
|
460
|
-
|
|
461
|
-
## Content Block Types
|
|
435
|
+
`GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
|
|
462
436
|
|
|
463
|
-
|
|
437
|
+
If the server reaches its session cap (200), it evicts the oldest session when possible; otherwise it returns a 503.
|
|
464
438
|
|
|
465
|
-
|
|
466
|
-
| ------------ | ---------------------------------------- |
|
|
467
|
-
| `metadata` | Minimal page metadata (type, title, url) |
|
|
468
|
-
| `heading` | Headings (h1-h6) with level indicator |
|
|
469
|
-
| `paragraph` | Text paragraphs |
|
|
470
|
-
| `list` | Ordered/unordered lists |
|
|
471
|
-
| `code` | Code blocks with optional language |
|
|
472
|
-
| `table` | Tables with headers and rows |
|
|
473
|
-
| `image` | Images with src and alt text |
|
|
474
|
-
| `blockquote` | Block quote text |
|
|
439
|
+
Host and Origin headers are always validated. Allowed values include loopback hosts, the configured `HOST` (if not a wildcard), and any entries in `ALLOWED_HOSTS`. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send.
|
|
475
440
|
|
|
476
441
|
---
|
|
477
442
|
|
|
@@ -498,13 +463,14 @@ DNS resolution is performed and blocked if any resolved IP matches a blocked ran
|
|
|
498
463
|
- Max URL length: 2048 characters
|
|
499
464
|
- Hostnames ending in `.local` or `.internal` are rejected
|
|
500
465
|
|
|
501
|
-
###
|
|
466
|
+
### Host/Origin Validation (HTTP Mode)
|
|
502
467
|
|
|
503
|
-
|
|
468
|
+
- Host header must match loopback, configured `HOST` (if not a wildcard), or `ALLOWED_HOSTS`
|
|
469
|
+
- Origin header (when present) is validated against the same allow-list
|
|
504
470
|
|
|
505
471
|
### Rate Limiting
|
|
506
472
|
|
|
507
|
-
Rate limiting applies to `/mcp` and `/mcp/downloads`
|
|
473
|
+
Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s window). OPTIONS requests are not rate-limited.
|
|
508
474
|
|
|
509
475
|
---
|
|
510
476
|
|
|
@@ -522,8 +488,6 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
|
|
|
522
488
|
| `npm run format` | Format with Prettier |
|
|
523
489
|
| `npm test` | Run Node test runner (builds dist) |
|
|
524
490
|
| `npm run test:coverage` | Run tests with experimental coverage |
|
|
525
|
-
| `npm run bench` | Run minimal performance benchmark |
|
|
526
|
-
| `npm run release` | Create new release |
|
|
527
491
|
| `npm run knip` | Find unused exports/dependencies |
|
|
528
492
|
| `npm run knip:fix` | Auto-fix unused code |
|
|
529
493
|
|
|
@@ -537,10 +501,10 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
|
|
|
537
501
|
| Language | TypeScript 5.9 |
|
|
538
502
|
| MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
|
|
539
503
|
| Content Extraction | @mozilla/readability ^0.6.0 |
|
|
540
|
-
| HTML Parsing |
|
|
504
|
+
| HTML Parsing | LinkeDOM ^0.18.12 |
|
|
541
505
|
| Markdown | Turndown ^7.2.2 |
|
|
542
|
-
| HTTP | Express ^5.2.1, undici ^6.
|
|
543
|
-
| Validation | Zod ^4.3.
|
|
506
|
+
| HTTP | Express ^5.2.1, undici ^6.23.0 |
|
|
507
|
+
| Validation | Zod ^4.3.5 |
|
|
544
508
|
|
|
545
509
|
---
|
|
546
510
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface AuthConfig {
|
|
2
|
+
mode: 'oauth' | 'static';
|
|
3
|
+
issuerUrl: URL | undefined;
|
|
4
|
+
authorizationUrl: URL | undefined;
|
|
5
|
+
tokenUrl: URL | undefined;
|
|
6
|
+
revocationUrl: URL | undefined;
|
|
7
|
+
registrationUrl: URL | undefined;
|
|
8
|
+
introspectionUrl: URL | undefined;
|
|
9
|
+
resourceUrl: URL;
|
|
10
|
+
requiredScopes: string[];
|
|
11
|
+
clientId: string | undefined;
|
|
12
|
+
clientSecret: string | undefined;
|
|
13
|
+
introspectionTimeoutMs: number;
|
|
14
|
+
staticTokens: string[];
|
|
15
|
+
}
|
|
16
|
+
export declare function buildAuthConfig(baseUrl: URL): AuthConfig;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { parseInteger, parseList, parseUrlEnv } from './env-parsers.js';
|
|
2
|
+
function readCoreOAuthUrls() {
|
|
3
|
+
return {
|
|
4
|
+
issuerUrl: parseUrlEnv(process.env.OAUTH_ISSUER_URL, 'OAUTH_ISSUER_URL'),
|
|
5
|
+
authorizationUrl: parseUrlEnv(process.env.OAUTH_AUTHORIZATION_URL, 'OAUTH_AUTHORIZATION_URL'),
|
|
6
|
+
tokenUrl: parseUrlEnv(process.env.OAUTH_TOKEN_URL, 'OAUTH_TOKEN_URL'),
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
function readOptionalOAuthUrls(baseUrl) {
|
|
10
|
+
return {
|
|
11
|
+
revocationUrl: parseUrlEnv(process.env.OAUTH_REVOCATION_URL, 'OAUTH_REVOCATION_URL'),
|
|
12
|
+
registrationUrl: parseUrlEnv(process.env.OAUTH_REGISTRATION_URL, 'OAUTH_REGISTRATION_URL'),
|
|
13
|
+
introspectionUrl: parseUrlEnv(process.env.OAUTH_INTROSPECTION_URL, 'OAUTH_INTROSPECTION_URL'),
|
|
14
|
+
resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
|
|
15
|
+
new URL('/mcp', baseUrl),
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
function readOAuthUrls(baseUrl) {
|
|
19
|
+
return { ...readCoreOAuthUrls(), ...readOptionalOAuthUrls(baseUrl) };
|
|
20
|
+
}
|
|
21
|
+
function resolveAuthMode(authModeEnv, urls) {
|
|
22
|
+
if (authModeEnv === 'oauth')
|
|
23
|
+
return 'oauth';
|
|
24
|
+
if (authModeEnv === 'static')
|
|
25
|
+
return 'static';
|
|
26
|
+
const oauthConfigured = [
|
|
27
|
+
urls.issuerUrl,
|
|
28
|
+
urls.authorizationUrl,
|
|
29
|
+
urls.tokenUrl,
|
|
30
|
+
urls.introspectionUrl,
|
|
31
|
+
].some((value) => value !== undefined);
|
|
32
|
+
return oauthConfigured ? 'oauth' : 'static';
|
|
33
|
+
}
|
|
34
|
+
function collectStaticTokens() {
|
|
35
|
+
const staticTokens = new Set(parseList(process.env.ACCESS_TOKENS));
|
|
36
|
+
if (process.env.API_KEY) {
|
|
37
|
+
staticTokens.add(process.env.API_KEY);
|
|
38
|
+
}
|
|
39
|
+
return Array.from(staticTokens);
|
|
40
|
+
}
|
|
41
|
+
export function buildAuthConfig(baseUrl) {
|
|
42
|
+
const urls = readOAuthUrls(baseUrl);
|
|
43
|
+
const mode = resolveAuthMode(process.env.AUTH_MODE?.toLowerCase(), urls);
|
|
44
|
+
return {
|
|
45
|
+
mode,
|
|
46
|
+
...urls,
|
|
47
|
+
requiredScopes: parseList(process.env.OAUTH_REQUIRED_SCOPES),
|
|
48
|
+
clientId: process.env.OAUTH_CLIENT_ID,
|
|
49
|
+
clientSecret: process.env.OAUTH_CLIENT_SECRET,
|
|
50
|
+
introspectionTimeoutMs: parseInteger(process.env.OAUTH_INTROSPECTION_TIMEOUT_MS, 5000, 1000, 30000),
|
|
51
|
+
staticTokens: collectStaticTokens(),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
export declare const SIZE_LIMITS: {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
ONE_MB: number;
|
|
3
|
+
FIVE_MB: number;
|
|
4
|
+
TEN_MB: number;
|
|
5
|
+
FIFTY_MB: number;
|
|
6
|
+
HUNDRED_MB: number;
|
|
7
7
|
};
|
|
8
8
|
export declare const CACHE_HASH: {
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
URL_HASH_LENGTH: number;
|
|
10
|
+
VARY_HASH_LENGTH: number;
|
|
11
11
|
};
|
|
12
12
|
export declare const TIMEOUT: {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
readonly DEFAULT_SESSION_TTL_MS: number;
|
|
18
|
-
readonly MAX_SESSION_TTL_MS: number;
|
|
13
|
+
DEFAULT_FETCH_TIMEOUT_MS: number;
|
|
14
|
+
MIN_SESSION_TTL_MS: number;
|
|
15
|
+
DEFAULT_SESSION_TTL_MS: number;
|
|
16
|
+
MAX_SESSION_TTL_MS: number;
|
|
19
17
|
};
|
package/dist/config/constants.js
CHANGED
|
@@ -15,9 +15,7 @@ export const CACHE_HASH = {
|
|
|
15
15
|
VARY_HASH_LENGTH: 12,
|
|
16
16
|
};
|
|
17
17
|
export const TIMEOUT = {
|
|
18
|
-
|
|
19
|
-
DEFAULT_FETCH_TIMEOUT_MS: 30000,
|
|
20
|
-
MAX_FETCH_TIMEOUT_MS: 120000,
|
|
18
|
+
DEFAULT_FETCH_TIMEOUT_MS: 15000,
|
|
21
19
|
MIN_SESSION_TTL_MS: 60 * 1000,
|
|
22
20
|
DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
|
|
23
21
|
MAX_SESSION_TTL_MS: 24 * 60 * 60 * 1000,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { LogLevel } from './types/runtime.js';
|
|
2
|
+
export declare function parseInteger(envValue: string | undefined, defaultValue: number, min?: number, max?: number): number;
|
|
3
|
+
export declare function parseBoolean(envValue: string | undefined, defaultValue: boolean): boolean;
|
|
4
|
+
export declare function parseList(envValue: string | undefined): string[];
|
|
5
|
+
export declare function parseUrlEnv(value: string | undefined, name: string): URL | undefined;
|
|
6
|
+
export declare function parseAllowedHosts(envValue: string | undefined): Set<string>;
|
|
7
|
+
export declare function parseLogLevel(envValue: string | undefined): LogLevel;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
function normalizeHostValue(value) {
|
|
2
|
+
const trimmed = value.trim().toLowerCase();
|
|
3
|
+
if (!trimmed)
|
|
4
|
+
return null;
|
|
5
|
+
if (trimmed.startsWith('[')) {
|
|
6
|
+
const end = trimmed.indexOf(']');
|
|
7
|
+
if (end === -1)
|
|
8
|
+
return null;
|
|
9
|
+
return trimmed.slice(1, end);
|
|
10
|
+
}
|
|
11
|
+
const colonIndex = trimmed.indexOf(':');
|
|
12
|
+
if (colonIndex !== -1) {
|
|
13
|
+
return trimmed.slice(0, colonIndex);
|
|
14
|
+
}
|
|
15
|
+
return trimmed;
|
|
16
|
+
}
|
|
17
|
+
const ALLOWED_LOG_LEVELS = new Set([
|
|
18
|
+
'debug',
|
|
19
|
+
'info',
|
|
20
|
+
'warn',
|
|
21
|
+
'error',
|
|
22
|
+
]);
|
|
23
|
+
function isLogLevel(value) {
|
|
24
|
+
return ALLOWED_LOG_LEVELS.has(value);
|
|
25
|
+
}
|
|
26
|
+
function isBelowMin(value, min) {
|
|
27
|
+
if (min === undefined)
|
|
28
|
+
return false;
|
|
29
|
+
return value < min;
|
|
30
|
+
}
|
|
31
|
+
function isAboveMax(value, max) {
|
|
32
|
+
if (max === undefined)
|
|
33
|
+
return false;
|
|
34
|
+
return value > max;
|
|
35
|
+
}
|
|
36
|
+
export function parseInteger(envValue, defaultValue, min, max) {
|
|
37
|
+
if (!envValue)
|
|
38
|
+
return defaultValue;
|
|
39
|
+
const parsed = parseInt(envValue, 10);
|
|
40
|
+
if (Number.isNaN(parsed))
|
|
41
|
+
return defaultValue;
|
|
42
|
+
if (isBelowMin(parsed, min))
|
|
43
|
+
return defaultValue;
|
|
44
|
+
if (isAboveMax(parsed, max))
|
|
45
|
+
return defaultValue;
|
|
46
|
+
return parsed;
|
|
47
|
+
}
|
|
48
|
+
export function parseBoolean(envValue, defaultValue) {
|
|
49
|
+
if (!envValue)
|
|
50
|
+
return defaultValue;
|
|
51
|
+
return envValue !== 'false';
|
|
52
|
+
}
|
|
53
|
+
export function parseList(envValue) {
|
|
54
|
+
if (!envValue)
|
|
55
|
+
return [];
|
|
56
|
+
return envValue
|
|
57
|
+
.split(/[\s,]+/)
|
|
58
|
+
.map((entry) => entry.trim())
|
|
59
|
+
.filter((entry) => entry.length > 0);
|
|
60
|
+
}
|
|
61
|
+
export function parseUrlEnv(value, name) {
|
|
62
|
+
if (!value)
|
|
63
|
+
return undefined;
|
|
64
|
+
if (!URL.canParse(value)) {
|
|
65
|
+
throw new Error(`Invalid ${name} value: ${value}`);
|
|
66
|
+
}
|
|
67
|
+
return new URL(value);
|
|
68
|
+
}
|
|
69
|
+
export function parseAllowedHosts(envValue) {
|
|
70
|
+
const hosts = new Set();
|
|
71
|
+
for (const entry of parseList(envValue)) {
|
|
72
|
+
const normalized = normalizeHostValue(entry);
|
|
73
|
+
if (normalized) {
|
|
74
|
+
hosts.add(normalized);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return hosts;
|
|
78
|
+
}
|
|
79
|
+
export function parseLogLevel(envValue) {
|
|
80
|
+
const level = envValue?.toLowerCase();
|
|
81
|
+
if (!level)
|
|
82
|
+
return 'info';
|
|
83
|
+
return isLogLevel(level) ? level : 'info';
|
|
84
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export declare const TRUNCATION_MARKER = "...[truncated]";
|
|
2
2
|
export declare const CODE_BLOCK: {
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
fence: string;
|
|
4
|
+
format: (code: string, language?: string) => string;
|
|
5
5
|
};
|
|
6
6
|
export declare const FRONTMATTER_DELIMITER = "---";
|
|
7
7
|
export declare const joinLines: (lines: readonly string[]) => string;
|