@j0hanz/superfetch 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +139 -46
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +20 -8
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/auth.js +161 -2
  18. package/dist/http/base-middleware.d.ts +7 -0
  19. package/dist/http/base-middleware.js +143 -0
  20. package/dist/http/cors.d.ts +0 -5
  21. package/dist/http/cors.js +0 -6
  22. package/dist/http/download-routes.js +6 -2
  23. package/dist/http/error-handler.d.ts +2 -0
  24. package/dist/http/error-handler.js +55 -0
  25. package/dist/http/host-allowlist.d.ts +3 -0
  26. package/dist/http/host-allowlist.js +117 -0
  27. package/dist/http/mcp-routes.d.ts +8 -2
  28. package/dist/http/mcp-routes.js +101 -8
  29. package/dist/http/mcp-session-eviction.d.ts +3 -0
  30. package/dist/http/mcp-session-eviction.js +24 -0
  31. package/dist/http/mcp-session-init.d.ts +7 -0
  32. package/dist/http/mcp-session-init.js +94 -0
  33. package/dist/http/mcp-session-slots.d.ts +17 -0
  34. package/dist/http/mcp-session-slots.js +55 -0
  35. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  36. package/dist/http/mcp-session-transport-init.js +41 -0
  37. package/dist/http/mcp-session-types.d.ts +5 -0
  38. package/dist/http/mcp-session-types.js +1 -0
  39. package/dist/http/mcp-session.d.ts +9 -9
  40. package/dist/http/mcp-session.js +5 -114
  41. package/dist/http/mcp-sessions.d.ts +41 -0
  42. package/dist/http/mcp-sessions.js +392 -0
  43. package/dist/http/rate-limit.js +2 -2
  44. package/dist/http/server-middleware.d.ts +6 -1
  45. package/dist/http/server-middleware.js +3 -117
  46. package/dist/http/server-shutdown.js +1 -1
  47. package/dist/http/server-tuning.d.ts +9 -0
  48. package/dist/http/server-tuning.js +45 -0
  49. package/dist/http/server.js +206 -9
  50. package/dist/http/session-cleanup.js +8 -5
  51. package/dist/http.d.ts +78 -0
  52. package/dist/http.js +1437 -0
  53. package/dist/index.js +3 -3
  54. package/dist/mcp.d.ts +3 -0
  55. package/dist/mcp.js +94 -0
  56. package/dist/middleware/error-handler.d.ts +1 -1
  57. package/dist/middleware/error-handler.js +31 -30
  58. package/dist/observability.d.ts +16 -0
  59. package/dist/observability.js +78 -0
  60. package/dist/resources/cached-content-params.d.ts +5 -0
  61. package/dist/resources/cached-content-params.js +36 -0
  62. package/dist/resources/cached-content.js +33 -33
  63. package/dist/server.js +21 -6
  64. package/dist/services/cache-events.d.ts +8 -0
  65. package/dist/services/cache-events.js +19 -0
  66. package/dist/services/cache.d.ts +5 -4
  67. package/dist/services/cache.js +49 -45
  68. package/dist/services/context.d.ts +2 -0
  69. package/dist/services/context.js +3 -0
  70. package/dist/services/extractor.d.ts +1 -0
  71. package/dist/services/extractor.js +77 -40
  72. package/dist/services/fetcher/agents.js +1 -1
  73. package/dist/services/fetcher/dns-selection.js +1 -1
  74. package/dist/services/fetcher/interceptors.js +29 -60
  75. package/dist/services/fetcher/redirects.js +12 -4
  76. package/dist/services/fetcher/response.js +18 -8
  77. package/dist/services/fetcher.d.ts +23 -0
  78. package/dist/services/fetcher.js +553 -13
  79. package/dist/services/logger.js +4 -1
  80. package/dist/services/telemetry.d.ts +19 -0
  81. package/dist/services/telemetry.js +43 -0
  82. package/dist/services/transform-worker-pool.d.ts +10 -3
  83. package/dist/services/transform-worker-pool.js +213 -184
  84. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
  85. package/dist/tools/handlers/fetch-single.shared.js +131 -2
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
  87. package/dist/tools/handlers/fetch-url.tool.js +56 -12
  88. package/dist/tools/index.d.ts +1 -0
  89. package/dist/tools/index.js +13 -1
  90. package/dist/tools/schemas.d.ts +2 -0
  91. package/dist/tools/schemas.js +8 -0
  92. package/dist/tools/utils/content-shaping.js +19 -4
  93. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  94. package/dist/tools/utils/content-transform-core.js +180 -0
  95. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  96. package/dist/tools/utils/content-transform-workers.js +1 -0
  97. package/dist/tools/utils/content-transform.d.ts +2 -1
  98. package/dist/tools/utils/content-transform.js +37 -136
  99. package/dist/tools/utils/fetch-pipeline.js +47 -56
  100. package/dist/tools/utils/frontmatter.d.ts +3 -0
  101. package/dist/tools/utils/frontmatter.js +73 -0
  102. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  103. package/dist/tools/utils/markdown-heuristics.js +19 -0
  104. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  105. package/dist/tools/utils/markdown-signals.js +19 -0
  106. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  107. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  108. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  109. package/dist/tools/utils/raw-markdown.js +149 -0
  110. package/dist/tools.d.ts +104 -0
  111. package/dist/tools.js +421 -0
  112. package/dist/transform.d.ts +69 -0
  113. package/dist/transform.js +1509 -0
  114. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  115. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  116. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  117. package/dist/transformers/markdown/frontmatter.js +45 -0
  118. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  119. package/dist/transformers/markdown/noise-rule.js +80 -0
  120. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  121. package/dist/transformers/markdown/turndown-instance.js +19 -0
  122. package/dist/transformers/markdown.d.ts +5 -0
  123. package/dist/transformers/markdown.js +314 -0
  124. package/dist/transformers/markdown.transformer.js +2 -189
  125. package/dist/utils/cancellation.d.ts +1 -0
  126. package/dist/utils/cancellation.js +18 -0
  127. package/dist/utils/code-language-bash.d.ts +1 -0
  128. package/dist/utils/code-language-bash.js +48 -0
  129. package/dist/utils/code-language-core.d.ts +2 -0
  130. package/dist/utils/code-language-core.js +13 -0
  131. package/dist/utils/code-language-detectors.d.ts +5 -0
  132. package/dist/utils/code-language-detectors.js +142 -0
  133. package/dist/utils/code-language-helpers.d.ts +5 -0
  134. package/dist/utils/code-language-helpers.js +62 -0
  135. package/dist/utils/code-language-parsing.d.ts +5 -0
  136. package/dist/utils/code-language-parsing.js +62 -0
  137. package/dist/utils/code-language.js +250 -46
  138. package/dist/utils/error-details.d.ts +3 -0
  139. package/dist/utils/error-details.js +12 -0
  140. package/dist/utils/filename-generator.js +14 -3
  141. package/dist/utils/host-normalizer.d.ts +1 -0
  142. package/dist/utils/host-normalizer.js +37 -0
  143. package/dist/utils/ip-address.d.ts +4 -0
  144. package/dist/utils/ip-address.js +6 -0
  145. package/dist/utils/tool-error-handler.js +12 -17
  146. package/dist/utils/url-redactor.d.ts +1 -0
  147. package/dist/utils/url-redactor.js +13 -0
  148. package/dist/utils/url-validator.js +35 -20
  149. package/dist/workers/transform-worker.js +82 -38
  150. package/package.json +13 -10
package/README.md CHANGED
@@ -1,8 +1,10 @@
1
1
  # superFetch MCP Server
2
2
 
3
+ <!-- markdownlint-disable MD033 -->
4
+
3
5
  <img src="docs/logo.png" alt="SuperFetch MCP Logo" width="200">
4
6
 
5
- [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-%3E=20.12-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
7
+ [![npm version](https://img.shields.io/npm/v/@j0hanz/superfetch.svg)](https://www.npmjs.com/package/@j0hanz/superfetch) [![Node.js](https://img.shields.io/badge/Node.js-%3E=20.18.1-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
6
8
 
7
9
  ## One-Click Install
8
10
 
@@ -12,6 +14,17 @@
12
14
 
13
15
  A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly Markdown.
14
16
 
17
+ Built for AI workflows that need _clean text_, _stable metadata_, and _safe-by-default fetching_.
18
+
19
+ **You get, in one tool call:**
20
+
21
+ - **Readable Markdown** (HTML → Readability → Markdown)
22
+ - **Metadata frontmatter** for HTML (title, source, author, description, fetchedAt)
23
+ - **Raw markdown passthrough** (GitHub/GitLab/Bitbucket/Gist URLs auto-rewritten to raw when possible)
24
+ - **Cache + resources** for large pages (MCP `superfetch://cache/...` resources and optional download endpoint in HTTP mode)
25
+
26
+ **Great for:** docs ingestion, RAG pipelines, research agents, changelog/news summarization, and “fetch this URL and cite it” workflows.
27
+
15
28
  [Quick Start](#quick-start) | [Tool](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
16
29
 
17
30
  > **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
@@ -23,21 +36,39 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that f
23
36
 
24
37
  ## Features
25
38
 
26
- | Feature | Description |
27
- | -------------------- | ------------------------------------------------------------------------------------- |
28
- | Smart extraction | Mozilla Readability with quality gates to strip boilerplate when it improves results |
29
- | Clean Markdown | Markdown output with optional YAML frontmatter (title + source) |
30
- | Raw content handling | Preserves raw markdown/text and rewrites GitHub/GitLab/Bitbucket blob URLs to raw |
31
- | Built-in caching | In-memory cache with TTL, max keys, and resource subscriptions |
32
- | Resilient fetching | Redirect handling with validation, timeouts, and response size limits |
33
- | Security first | URL validation plus SSRF/DNS/IP blocklists |
34
- | HTTP mode | Static token or OAuth auth, session management, rate limiting, host/origin validation |
39
+ - **Cleaner outputs for LLMs**: Readability extraction with a quality gate to avoid making pages worse
40
+ - **Markdown that’s easy to consume**: YAML frontmatter for HTML + consistent Markdown formatting
41
+ - **Handles “raw content” sources**: preserves markdown/text; rewrites GitHub/GitLab/Bitbucket/Gist URLs to raw
42
+ - **Works for both local and hosted setups**:
43
+ - **Stdio mode**: best for MCP clients (VS Code / Claude Desktop / Cursor)
44
+ - **HTTP mode**: best for self-hosting (auth, sessions, rate limiting, Host/Origin validation)
45
+ - **Fast and resilient**: redirect validation, timeouts, and response size limits
46
+ - **Security-first defaults**: URL validation + SSRF/DNS/IP blocklists (blocks private ranges and cloud metadata endpoints)
47
+
48
+ If you’re comparing “just call `fetch()`” vs superFetch: superFetch focuses on _readability_, _consistent structure_, and _safe URL access_ for agent environments.
49
+
50
+ ## What it is (and isn’t)
51
+
52
+ - **It is** a “URL → clean Markdown” tool designed for agent/RAG workflows.
53
+ - **It isn’t** a headless browser: pages that require heavy client-side rendering may not extract well.
54
+ - **It isn’t** a crawler: it fetches a single URL per call (your agent can decide what to fetch next).
55
+ - **It’s opinionated on safety**: blocked private IP ranges and metadata endpoints are a feature, not a bug.
35
56
 
36
57
  ---
37
58
 
38
59
  ## Quick Start
39
60
 
40
- Add superFetch to your MCP client configuration - no installation required.
61
+ Recommended: use **stdio mode** with your MCP client (no HTTP server).
62
+
63
+ ### Try it in 60 seconds
64
+
65
+ 1. Add the MCP server config (below)
66
+ 2. Restart your MCP client
67
+ 3. Call the `fetch-url` tool with any public URL
68
+
69
+ ### What the tool returns
70
+
71
+ You’ll get `structuredContent` with `url`, `resolvedUrl`, optional `title`, and `markdown` (plus a `superfetch://cache/...` resource link when cache is enabled and content is large).
41
72
 
42
73
  ### Claude Desktop
43
74
 
@@ -74,6 +105,23 @@ Add to `.vscode/mcp.json` in your workspace:
74
105
  Add environment variables in your MCP client config under `env`.
75
106
  See [Configuration](#configuration) or `CONFIGURATION.md` for all available options and presets.
76
107
 
108
+ ### Example output (trimmed)
109
+
110
+ ```json
111
+ {
112
+ "url": "https://example.com/docs",
113
+ "inputUrl": "https://example.com/docs",
114
+ "resolvedUrl": "https://example.com/docs",
115
+ "title": "Documentation",
116
+ "markdown": "---\ntitle: Documentation\nsource: https://example.com/docs\nfetchedAt: 2026-01-12T12:00:00.000Z\n---\n\n# Getting Started\n\n..."
117
+ }
118
+ ```
119
+
120
+ > **Tip (Windows):** If you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
121
+
122
+ <details>
123
+ <summary><strong>Other clients (Cursor, Cline, Windsurf, Codex)</strong></summary>
124
+
77
125
  ### Cursor
78
126
 
79
127
  1. Open Cursor Settings
@@ -92,8 +140,6 @@ See [Configuration](#configuration) or `CONFIGURATION.md` for all available opti
92
140
  }
93
141
  ```
94
142
 
95
- > **Tip (Windows):** If you encounter issues, try: `cmd /c "npx -y @j0hanz/superfetch@latest --stdio"`
96
-
97
143
  <details>
98
144
  <summary><strong>Codex IDE</strong></summary>
99
145
 
@@ -188,6 +234,35 @@ code %APPDATA%\Claude\claude_desktop_config.json
188
234
 
189
235
  </details>
190
236
 
237
+ </details>
238
+
239
+ ---
240
+
241
+ ## Use cases
242
+
243
+ ### 1) Turn a docs page into “LLM-ready” Markdown
244
+
245
+ - Call `fetch-url` with the docs URL
246
+ - Feed the returned `markdown` into your summarizer / chunker
247
+ - Use the YAML frontmatter fields (especially `source`) for citations
248
+
249
+ ### 2) Fetch a GitHub/GitLab/Bitbucket file as raw markdown
250
+
251
+ - Pass the normal “web UI” URL to `fetch-url`
252
+ - superFetch will rewrite it to the raw content URL when possible
253
+ - This avoids navigation UI and reduces boilerplate
254
+
255
+ ### 3) Large pages: keep responses stable with cache resources
256
+
257
+ - When content is large, the tool can include a `superfetch://cache/...` resource link
258
+ - In MCP clients that support resources, you can read the full content via the resource URI
259
+ - In HTTP mode, you can also download cached content via `/mcp/downloads/:namespace/:hash` when cache is enabled
260
+
261
+ ### 4) Safe-by-default web access for agents
262
+
263
+ - superFetch blocks private IP ranges and common cloud metadata endpoints
264
+ - If your agent needs internal access, this is intentionally not supported by default (see Security)
265
+
191
266
  ---
192
267
 
193
268
  ## Installation (Alternative)
@@ -227,7 +302,7 @@ node dist/index.js --stdio
227
302
  <details>
228
303
  <summary><strong>HTTP Mode</strong> (default)</summary>
229
304
 
230
- HTTP mode requires authentication. By default it binds to `127.0.0.1`. To listen on all interfaces, set `HOST=0.0.0.0` or `HOST=::` and configure OAuth (remote bindings require OAuth). Other non-loopback `HOST` values are rejected.
305
+ HTTP mode requires authentication. By default it binds to `127.0.0.1`. Non-loopback `HOST` values require `ALLOW_REMOTE=true`. To listen on all interfaces, set `HOST=0.0.0.0` or `HOST=::`, set `ALLOW_REMOTE=true`, and configure OAuth (remote bindings require OAuth).
231
306
 
232
307
  ```bash
233
308
  API_KEY=supersecret npx -y @j0hanz/superfetch@latest
@@ -243,13 +318,15 @@ npx -y @j0hanz/superfetch@latest
243
318
 
244
319
  For multiple static tokens, set `ACCESS_TOKENS` (comma/space separated).
245
320
 
246
- Endpoints (auth required via `Authorization: Bearer <token>`; in static token mode, `X-API-Key` is also accepted):
321
+ Auth is required for `/mcp` and `/mcp/downloads` via `Authorization: Bearer <token>` (static mode also accepts `X-API-Key`).
247
322
 
248
- - `GET /health`
249
- - `POST /mcp`
250
- - `GET /mcp` (SSE stream)
251
- - `DELETE /mcp`
252
- - `GET /mcp/downloads/:namespace/:hash`
323
+ Endpoints:
324
+
325
+ - `GET /health` (no auth; returns status, name, version, uptime)
326
+ - `POST /mcp` (auth required)
327
+ - `GET /mcp` (auth required; SSE stream; requires `Accept: text/event-stream`)
328
+ - `DELETE /mcp` (auth required)
329
+ - `GET /mcp/downloads/:namespace/:hash` (auth required)
253
330
 
254
331
  Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#http-mode-details)).
255
332
 
@@ -261,19 +338,20 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
261
338
 
262
339
  ### Tool Response Notes
263
340
 
264
- The tool returns `structuredContent` with `url`, optional `title`, and `markdown` when inline content is available. On errors, `error` is included instead of content.
341
+ The tool returns `structuredContent` with `url`, `inputUrl`, `resolvedUrl`, optional `title`, and `markdown` when inline content is available. `resolvedUrl` may differ from `inputUrl` when the URL is rewritten to raw content (GitHub/GitLab/Bitbucket/Gist). On errors, `error` is included instead of content.
265
342
 
266
343
  The response includes:
267
344
 
268
345
  - a `text` block containing JSON of `structuredContent`
269
- - a `resource` block embedding markdown when inline content is available (always in stdio mode)
270
- - when content exceeds the inline limit and cache is enabled, a `resource_link` block pointing to `superfetch://cache/...` (inline markdown may be omitted)
346
+ - a `resource` block embedding markdown when inline content is available (always for successful stdio responses)
347
+ - when content exceeds the inline limit and cache is enabled, a `resource_link` block pointing to `superfetch://cache/...` (stdio mode still embeds full markdown; HTTP mode omits embedded markdown)
348
+ - error responses set `isError: true` and return `structuredContent` with `error` and `url`
271
349
 
272
350
  ---
273
351
 
274
352
  ### `fetch-url`
275
353
 
276
- Fetches a webpage and converts it to clean Markdown format with optional frontmatter.
354
+ Fetches a webpage and converts it to clean Markdown format with YAML frontmatter for HTML (raw markdown is preserved).
277
355
 
278
356
  | Parameter | Type | Default | Description |
279
357
  | --------- | ------ | -------- | ------------ |
@@ -284,6 +362,8 @@ Fetches a webpage and converts it to clean Markdown format with optional frontma
284
362
  ```json
285
363
  {
286
364
  "url": "https://example.com/docs",
365
+ "inputUrl": "https://example.com/docs",
366
+ "resolvedUrl": "https://example.com/docs",
287
367
  "title": "Documentation",
288
368
  "markdown": "---\ntitle: Documentation\n---\n\n# Getting Started\n\nWelcome..."
289
369
  }
@@ -303,8 +383,8 @@ Fetches a webpage and converts it to clean Markdown format with optional frontma
303
383
  ### Large Content Handling
304
384
 
305
385
  - Inline markdown is capped at 20,000 characters (`maxInlineContentChars`).
306
- - **Stdio mode:** full markdown is embedded as a `resource` block.
307
- - **HTTP mode:** if content exceeds the inline limit and cache is enabled, the response includes a `resource_link` to `superfetch://cache/...` (no embedded markdown). If cache is disabled, the inline markdown is truncated with `...[truncated]`.
386
+ - **Stdio mode:** full markdown is embedded as a `resource` block; if cache is enabled and content exceeds the inline limit, a `resource_link` is still included.
387
+ - **HTTP mode:** if content exceeds the inline limit and cache is enabled, the response includes a `resource_link` to `superfetch://cache/...` and omits embedded markdown. If cache is disabled, the inline markdown is truncated with `...[truncated]`.
308
388
  - Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
309
389
 
310
390
  ---
@@ -364,23 +444,26 @@ Set environment variables in your MCP client `env` or in the shell before starti
364
444
 
365
445
  ### Core Server Settings
366
446
 
367
- | Variable | Default | Description |
368
- | --------------- | -------------------- | ------------------------------------------------------------- |
369
- | `HOST` | `127.0.0.1` | HTTP bind address |
370
- | `PORT` | `3000` | HTTP server port (1024-65535) |
371
- | `USER_AGENT` | `superFetch-MCP/2.0` | User-Agent header for outgoing requests |
372
- | `CACHE_ENABLED` | `true` | Enable response caching |
373
- | `CACHE_TTL` | `3600` | Cache TTL in seconds (60-86400) |
374
- | `LOG_LEVEL` | `info` | `debug`, `info`, `warn`, `error` |
375
- | `ALLOWED_HOSTS` | (empty) | Additional allowed Host/Origin values (comma/space separated) |
447
+ | Variable | Default | Description |
448
+ | --------------- | -------------------- | --------------------------------------------------------------------------------- |
449
+ | `HOST` | `127.0.0.1` | HTTP bind address |
450
+ | `PORT` | `3000` | HTTP server port (1024-65535) |
451
+ | `USER_AGENT` | `superFetch-MCP/2.0` | User-Agent header for outgoing requests |
452
+ | `CACHE_ENABLED` | `true` | Enable response caching |
453
+ | `CACHE_TTL` | `3600` | Cache TTL in seconds (60-86400) |
454
+ | `LOG_LEVEL` | `info` | Logging level. Only `debug` enables verbose logs; other values behave like `info` |
455
+ | `ALLOW_REMOTE` | `false` | Allow binding to non-loopback hosts (OAuth required) |
456
+ | `ALLOWED_HOSTS` | (empty) | Additional allowed Host/Origin values (comma/space separated) |
457
+
458
+ For HTTP server tuning (`SERVER_HEADERS_TIMEOUT_MS`, `SERVER_REQUEST_TIMEOUT_MS`, `SERVER_KEEP_ALIVE_TIMEOUT_MS`, `SERVER_SHUTDOWN_CLOSE_IDLE`, `SERVER_SHUTDOWN_CLOSE_ALL`), see `CONFIGURATION.md`.
376
459
 
377
460
  ### Auth (HTTP Mode)
378
461
 
379
- | Variable | Default | Description |
380
- | --------------- | ------- | ------------------------------------------------------------ |
381
- | `AUTH_MODE` | auto | `static` or `oauth`. Auto-selects OAuth if any OAUTH URL set |
382
- | `ACCESS_TOKENS` | (empty) | Comma/space-separated static bearer tokens |
383
- | `API_KEY` | (empty) | Adds a static bearer token and enables `X-API-Key` header |
462
+ | Variable | Default | Description |
463
+ | --------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
464
+ | `AUTH_MODE` | auto | `static` or `oauth`. Auto-selects OAuth if OAUTH_ISSUER_URL, OAUTH_AUTHORIZATION_URL, OAUTH_TOKEN_URL, or OAUTH_INTROSPECTION_URL is set |
465
+ | `ACCESS_TOKENS` | (empty) | Comma/space-separated static bearer tokens |
466
+ | `API_KEY` | (empty) | Adds a static bearer token and enables `X-API-Key` header |
384
467
 
385
468
  Static mode requires at least one token (`ACCESS_TOKENS` or `API_KEY`).
386
469
 
@@ -415,6 +498,7 @@ Optional:
415
498
  - Inline markdown limit: 20,000 characters
416
499
  - Cache max entries: 100
417
500
  - Session TTL: 30 minutes
501
+ - Session init timeout: 10 seconds
418
502
  - Max sessions: 200
419
503
  - Rate limit: 100 req/min per IP (60s window)
420
504
 
@@ -430,10 +514,15 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
430
514
  2. The server returns `mcp-session-id` in the response headers.
431
515
  3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
432
516
 
433
- If the `mcp-protocol-version` header is missing, the server defaults it to `2025-03-26`. Supported versions are `2025-03-26` and `2025-11-25`.
517
+ If the `mcp-protocol-version` header is missing, the server assumes `2025-03-26` and rejects the request because only `2025-11-25` is supported. Clients must send `mcp-protocol-version: 2025-11-25`.
434
518
 
435
519
  `GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
436
520
 
521
+ Additional HTTP transport notes:
522
+
523
+ - `GET /mcp` requires `Accept: text/event-stream` (otherwise 406).
524
+ - JSON-RPC batch requests are not supported (400).
525
+
437
526
  If the server reaches its session cap (200), it evicts the oldest session when possible; otherwise it returns a 503.
438
527
 
439
528
  Host and Origin headers are always validated. Allowed values include loopback hosts, the configured `HOST` (if not a wildcard), and any entries in `ALLOWED_HOSTS`. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send.
@@ -484,12 +573,14 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s wi
484
573
  | `npm run build` | Compile TypeScript |
485
574
  | `npm start` | Production server |
486
575
  | `npm run lint` | Run ESLint |
576
+ | `npm run lint:fix` | Auto-fix lint issues |
487
577
  | `npm run type-check` | TypeScript type checking |
488
578
  | `npm run format` | Format with Prettier |
489
579
  | `npm test` | Run Node test runner (builds dist) |
490
580
  | `npm run test:coverage` | Run tests with experimental coverage |
491
581
  | `npm run knip` | Find unused exports/dependencies |
492
582
  | `npm run knip:fix` | Auto-fix unused code |
583
+ | `npm run inspector` | Launch MCP Inspector |
493
584
 
494
585
  > **Note:** Tests run via `node --test` with `--experimental-transform-types` to execute `.ts` test files. Node will emit an experimental warning.
495
586
 
@@ -497,13 +588,13 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s wi
497
588
 
498
589
  | Category | Technology |
499
590
  | ------------------ | --------------------------------- |
500
- | Runtime | Node.js >=20.12 |
591
+ | Runtime | Node.js >=20.18.1 |
501
592
  | Language | TypeScript 5.9 |
502
- | MCP SDK | @modelcontextprotocol/sdk ^1.25.1 |
593
+ | MCP SDK | @modelcontextprotocol/sdk ^1.25.2 |
503
594
  | Content Extraction | @mozilla/readability ^0.6.0 |
504
- | HTML Parsing | LinkeDOM ^0.18.12 |
505
- | Markdown | Turndown ^7.2.2 |
506
- | HTTP | Express ^5.2.1, undici ^6.23.0 |
595
+ | HTML Parsing | linkedom ^0.18.12 |
596
+ | Markdown | node-html-markdown ^2.0.0 |
597
+ | HTTP | Express ^5.2.1, undici ^7.18.2 |
507
598
  | Validation | Zod ^4.3.5 |
508
599
 
509
600
  ---
@@ -519,3 +610,5 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s wi
519
610
  7. Open a Pull Request
520
611
 
521
612
  For examples of other MCP servers, see: [github.com/modelcontextprotocol/servers](https://github.com/modelcontextprotocol/servers)
613
+
614
+ <!-- markdownlint-enable MD033 -->
@@ -0,0 +1,42 @@
1
+ import type { Express } from 'express';
2
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ export interface CacheEntry {
4
+ url: string;
5
+ title?: string;
6
+ content: string;
7
+ fetchedAt: string;
8
+ expiresAt: string;
9
+ }
10
+ export interface CachedPayload {
11
+ content?: string;
12
+ markdown?: string;
13
+ title?: string;
14
+ }
15
+ export declare function parseCachedPayload(raw: string): CachedPayload | null;
16
+ export declare function resolveCachedPayloadContent(payload: CachedPayload): string | null;
17
+ export interface CacheKeyParts {
18
+ namespace: string;
19
+ urlHash: string;
20
+ }
21
+ export declare function createCacheKey(namespace: string, url: string, vary?: Record<string, unknown> | string): string | null;
22
+ export declare function parseCacheKey(cacheKey: string): CacheKeyParts | null;
23
+ export declare function toResourceUri(cacheKey: string): string | null;
24
+ interface CacheUpdateEvent {
25
+ cacheKey: string;
26
+ namespace: string;
27
+ urlHash: string;
28
+ }
29
+ type CacheUpdateListener = (event: CacheUpdateEvent) => void;
30
+ export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
31
+ interface CacheEntryMetadata {
32
+ url: string;
33
+ title?: string;
34
+ }
35
+ export declare function get(cacheKey: string | null): CacheEntry | undefined;
36
+ export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
37
+ export declare function keys(): readonly string[];
38
+ export declare function isEnabled(): boolean;
39
+ export declare function registerCachedContentResource(server: McpServer): void;
40
+ export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
41
+ export declare function registerDownloadRoutes(app: Express): void;
42
+ export {};