npm - @j0hanz/superfetch - Versions diffs - 1.2.5 → 2.0.0 - Mend

@j0hanz/superfetch 1.2.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/README.md +116 -152
package/dist/config/auth-config.d.ts +16 -0
package/dist/config/auth-config.js +53 -0
package/dist/config/constants.d.ts +11 -13
package/dist/config/constants.js +1 -3
package/dist/config/env-parsers.d.ts +7 -0
package/dist/config/env-parsers.js +84 -0
package/dist/config/formatting.d.ts +2 -2
package/dist/config/index.d.ts +47 -53
package/dist/config/index.js +25 -59
package/dist/config/types/content.d.ts +1 -49
package/dist/config/types/runtime.d.ts +8 -16
package/dist/config/types/tools.d.ts +2 -28
package/dist/http/accept-policy.d.ts +3 -0
package/dist/http/accept-policy.js +45 -0
package/dist/http/async-handler.d.ts +2 -0
package/dist/http/async-handler.js +5 -0
package/dist/http/auth-introspection.d.ts +2 -0
package/dist/http/auth-introspection.js +141 -0
package/dist/http/auth-static.d.ts +2 -0
package/dist/http/auth-static.js +23 -0
package/dist/http/auth.d.ts +3 -2
package/dist/http/auth.js +98 -26
package/dist/http/cors.d.ts +6 -6
package/dist/http/cors.js +7 -42
package/dist/http/download-routes.d.ts +0 -12
package/dist/http/download-routes.js +21 -58
package/dist/http/jsonrpc-http.d.ts +2 -0
package/dist/http/jsonrpc-http.js +10 -0
package/dist/http/mcp-routes.d.ts +0 -1
package/dist/http/mcp-routes.js +43 -30
package/dist/http/mcp-session-helpers.d.ts +0 -1
package/dist/http/mcp-session-helpers.js +1 -1
package/dist/http/mcp-session-transport.d.ts +7 -0
package/dist/http/mcp-session-transport.js +57 -0
package/dist/http/mcp-session.js +60 -73
package/dist/http/mcp-validation.d.ts +1 -0
package/dist/http/mcp-validation.js +11 -10
package/dist/http/protocol-policy.d.ts +2 -0
package/dist/http/protocol-policy.js +31 -0
package/dist/http/rate-limit.js +5 -2
package/dist/http/server-config.d.ts +1 -0
package/dist/http/server-config.js +40 -0
package/dist/http/server-middleware.d.ts +2 -9
package/dist/http/server-middleware.js +96 -43
package/dist/http/server-shutdown.d.ts +4 -0
package/dist/http/server-shutdown.js +43 -0
package/dist/http/server.js +52 -64
package/dist/http/session-cleanup.js +1 -1
package/dist/middleware/error-handler.js +1 -3
package/dist/resources/cached-content.js +50 -108
package/dist/resources/index.js +0 -82
package/dist/server.js +51 -30
package/dist/services/cache-keys.d.ts +7 -0
package/dist/services/cache-keys.js +57 -0
package/dist/services/cache.d.ts +1 -7
package/dist/services/cache.js +53 -119
package/dist/services/context.d.ts +0 -1
package/dist/services/context.js +0 -7
package/dist/services/extractor.js +10 -82
package/dist/services/fetcher/agents.d.ts +2 -2
package/dist/services/fetcher/agents.js +34 -95
package/dist/services/fetcher/dns-selection.d.ts +2 -0
package/dist/services/fetcher/dns-selection.js +72 -0
package/dist/services/fetcher/interceptors.d.ts +0 -22
package/dist/services/fetcher/interceptors.js +30 -13
package/dist/services/fetcher/redirects.js +4 -3
package/dist/services/fetcher/response.js +66 -31
package/dist/services/fetcher.d.ts +1 -3
package/dist/services/fetcher.js +14 -33
package/dist/services/fifo-queue.d.ts +8 -0
package/dist/services/fifo-queue.js +25 -0
package/dist/services/logger.js +2 -2
package/dist/services/metadata-collector.d.ts +1 -9
package/dist/services/metadata-collector.js +71 -2
package/dist/services/transform-worker-pool.d.ts +4 -14
package/dist/services/transform-worker-pool.js +177 -129
package/dist/services/transform-worker-types.d.ts +32 -0
package/dist/services/transform-worker-types.js +14 -0
package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
package/dist/tools/handlers/fetch-single.shared.js +44 -87
package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
package/dist/tools/handlers/fetch-url.tool.js +46 -123
package/dist/tools/index.js +21 -40
package/dist/tools/schemas.d.ts +1 -51
package/dist/tools/schemas.js +1 -107
package/dist/tools/utils/cached-markdown.d.ts +5 -0
package/dist/tools/utils/cached-markdown.js +46 -0
package/dist/tools/utils/content-shaping.d.ts +4 -0
package/dist/tools/utils/content-shaping.js +52 -0
package/dist/tools/utils/content-transform.d.ts +2 -17
package/dist/tools/utils/content-transform.js +120 -114
package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
package/dist/tools/utils/fetch-pipeline.js +65 -62
package/dist/tools/utils/inline-content.d.ts +1 -2
package/dist/tools/utils/inline-content.js +4 -7
package/dist/transformers/markdown.transformer.js +109 -34
package/dist/utils/cached-payload.d.ts +7 -0
package/dist/utils/cached-payload.js +36 -0
package/dist/utils/error-utils.js +1 -1
package/dist/utils/filename-generator.js +21 -10
package/dist/utils/guards.d.ts +1 -0
package/dist/utils/guards.js +3 -0
package/dist/utils/header-normalizer.d.ts +0 -3
package/dist/utils/header-normalizer.js +3 -3
package/dist/utils/tool-error-handler.d.ts +2 -2
package/dist/utils/tool-error-handler.js +11 -38
package/dist/utils/url-transformer.d.ts +7 -0
package/dist/utils/url-transformer.js +147 -0
package/dist/utils/url-validator.d.ts +1 -2
package/dist/utils/url-validator.js +20 -93
package/dist/workers/content-transform.worker.d.ts +1 -0
package/dist/workers/content-transform.worker.js +40 -0
package/package.json +13 -16

package/README.md CHANGED Viewed

@@ -10,9 +10,9 @@
 [![Install in Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=superfetch&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIkBqMGhhbnovc3VwZXJmZXRjaEBsYXRlc3QiLCItLXN0ZGlvIl19)
-A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly JSONL or Markdown.
+A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that fetches web pages, extracts readable content with Mozilla Readability, and returns AI-friendly Markdown.
-[Quick Start](#quick-start) | [How to Choose a Tool](#how-to-choose-a-tool) | [Tools](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
+[Quick Start](#quick-start) | [Tool](#available-tools) | [Resources](#resources) | [Configuration](#configuration) | [Security](#security) | [Development](#development)
 > **Published to [MCP Registry](https://registry.modelcontextprotocol.io/)** - Search for `io.github.j0hanz/superfetch`
@@ -23,45 +23,15 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that f
 ## Features
-| Feature            | Description                                                               |
-| ------------------ | ------------------------------------------------------------------------- |
-| Smart extraction   | Mozilla Readability removes ads, navigation, and boilerplate when enabled |
-| JSONL + Markdown   | JSONL semantic blocks or clean Markdown with frontmatter                  |
-| Structured blocks  | Headings, paragraphs, lists, code, tables, images, blockquotes            |
-| Built-in caching   | In-memory cache with TTL, max keys, and resource subscriptions            |
-| Resilient fetching | Redirect handling plus retry with exponential backoff + jitter            |
-| Security first     | URL validation, SSRF/DNS/IP blocklists, header sanitization               |
-| HTTP mode          | API key auth, session management, rate limiting, CORS                     |
----
-## How to Choose a Tool
-Use this guide to select the right tool for your web content extraction needs.
-### Decision Tree
-```text
-Need web content for AI?
-- Want structured JSONL blocks -> fetch-url (format: jsonl)
-- Want clean Markdown -> fetch-markdown
-- Want Markdown but also need contentBlocks count -> fetch-url (format: markdown)
-```
-### Quick Reference Table
-| Tool             | Best For                           | Output Format                    | Use When                                  |
-| ---------------- | ---------------------------------- | -------------------------------- | ----------------------------------------- |
-| `fetch-url`      | Single page with structured blocks | JSONL (or Markdown via `format`) | RAG pipelines, content parsing, analytics |
-| `fetch-markdown` | Single page in readable format     | Markdown + frontmatter           | Documentation, summaries, human review    |
-### Common Use Cases
-| Task                     | Recommended Tool                         | Why                                                  |
-| ------------------------ | ---------------------------------------- | ---------------------------------------------------- |
-| Parse a blog post for AI | `fetch-url`                              | Returns semantic blocks (headings, paragraphs, code) |
-| Generate documentation   | `fetch-markdown`                         | Clean markdown with frontmatter                      |
-| Extract article for RAG  | `fetch-url` + `extractMainContent: true` | Removes ads/nav, keeps main content                  |
+| Feature              | Description                                                                           |
+| -------------------- | ------------------------------------------------------------------------------------- |
+| Smart extraction     | Mozilla Readability with quality gates to strip boilerplate when it improves results  |
+| Clean Markdown       | Markdown output with optional YAML frontmatter (title + source)                       |
+| Raw content handling | Preserves raw markdown/text and rewrites GitHub/GitLab/Bitbucket blob URLs to raw     |
+| Built-in caching     | In-memory cache with TTL, max keys, and resource subscriptions                        |
+| Resilient fetching   | Redirect handling with validation, timeouts, and response size limits                 |
+| Security first       | URL validation plus SSRF/DNS/IP blocklists                                            |
+| HTTP mode            | Static token or OAuth auth, session management, rate limiting, host/origin validation |
 ---
@@ -230,7 +200,7 @@ npm install -g @j0hanz/superfetch
 # Run in stdio mode
 superfetch --stdio
-# Run HTTP server (requires API_KEY)
+# Run HTTP server (requires auth token)
 superfetch
 ```
@@ -257,7 +227,7 @@ node dist/index.js --stdio
 <details>
 <summary><strong>HTTP Mode</strong> (default)</summary>
-HTTP mode requires `API_KEY` and only binds to loopback addresses unless `ALLOW_REMOTE=true`.
+HTTP mode requires authentication. By default it binds to `127.0.0.1`. To listen on all interfaces, set `HOST=0.0.0.0` or `HOST=::` and configure OAuth (remote bindings require OAuth). Other non-loopback `HOST` values are rejected.
 ```bash
 API_KEY=supersecret npx -y @j0hanz/superfetch@latest
@@ -271,7 +241,9 @@ $env:API_KEY = "supersecret"
 npx -y @j0hanz/superfetch@latest
 ```
-Endpoints (all require `Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`):
+For multiple static tokens, set `ACCESS_TOKENS` (comma/space separated).
+Endpoints (auth required via `Authorization: Bearer <token>`; in static token mode, `X-API-Key` is also accepted):
 - `GET /health`
 - `POST /mcp`
@@ -289,111 +261,61 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
 ### Tool Response Notes
-Both tools return:
+The tool returns `structuredContent` with `url`, optional `title`, and `markdown` when inline content is available. On errors, `error` is included instead of content.
-- `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`; Markdown responses may also include `file`)
-- `content` blocks that include:
-  - a `text` block containing JSON of `structuredContent`
-  - in stdio mode, a `resource` block with a `file:///...` URI embedding the full content
-  - in HTTP mode, a `resource` block when inline content is available
-  - when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled, a `resource_link` block points to `superfetch://cache/...` and `structuredContent.resourceUri` is set
+The response includes:
-If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output, appends `...[truncated]`, and sets `truncated: true`.
+- a `text` block containing JSON of `structuredContent`
+- a `resource` block embedding markdown when inline content is available (always in stdio mode)
+- when content exceeds the inline limit and cache is enabled, a `resource_link` block pointing to `superfetch://cache/...` (inline markdown may be omitted)
 ---
 ### `fetch-url`
-Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
-| Parameter              | Type                  | Default                            | Description                                            |
-| ---------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------ |
-| `url`                  | string                | required                           | URL to fetch                                           |
-| `format`               | "jsonl" \| "markdown" | `"jsonl"`                          | Output format                                          |
-| `includeContentBlocks` | boolean               | `true` (jsonl), `false` (markdown) | Include content block counts when `format: "markdown"` |
-| `extractMainContent`   | boolean               | `true`                             | Use Readability to extract main content                |
-| `includeMetadata`      | boolean               | `true`                             | Include page metadata                                  |
-| `maxContentLength`     | number                | -                                  | Maximum content length in characters (max 5,242,880)   |
-| `customHeaders`        | object                | -                                  | Custom HTTP headers (sanitized)                        |
-| `timeout`              | number                | `30000`                            | Request timeout in milliseconds (1000-120000)          |
-| `retries`              | number                | `3`                                | Number of retry attempts (1-10)                        |
+Fetches a webpage and converts it to clean Markdown format with optional frontmatter.
-When `format: "markdown"` and `includeContentBlocks` is `false`, `contentBlocks` will be `0`.
+| Parameter | Type   | Default  | Description  |
+| --------- | ------ | -------- | ------------ |
+| `url`     | string | required | URL to fetch |
 **Example `structuredContent`:**
 ```json
 {
-  "url": "https://example.com/article",
-  "title": "Example Article",
-  "contentBlocks": 42,
-  "fetchedAt": "2025-12-11T10:30:00.000Z",
-  "format": "jsonl",
-  "contentSize": 12345,
-  "cached": false,
-  "content": "{\"type\":\"metadata\",\"title\":\"Example Article\",\"url\":\"https://example.com/article\"}\n{\"type\":\"heading\",\"level\":1,\"text\":\"Introduction\"}"
+  "url": "https://example.com/docs",
+  "title": "Documentation",
+  "markdown": "---\ntitle: Documentation\n---\n\n# Getting Started\n\nWelcome..."
 }
 ```
----
-### `fetch-markdown`
-Fetches a webpage and converts it to clean Markdown with optional frontmatter.
-| Parameter            | Type    | Default  | Description                                          |
-| -------------------- | ------- | -------- | ---------------------------------------------------- |
-| `url`                | string  | required | URL to fetch                                         |
-| `extractMainContent` | boolean | `true`   | Extract main content only                            |
-| `includeMetadata`    | boolean | `true`   | Include YAML frontmatter                             |
-| `maxContentLength`   | number  | -        | Maximum content length in characters (max 5,242,880) |
-| `customHeaders`      | object  | -        | Custom HTTP headers (sanitized)                      |
-| `timeout`            | number  | `30000`  | Request timeout in milliseconds (1000-120000)        |
-| `retries`            | number  | `3`      | Number of retry attempts (1-10)                      |
-**Example `structuredContent`:**
+**Error response:**
 ```json
 {
-  "url": "https://example.com/docs",
-  "title": "Documentation",
-  "fetchedAt": "2025-12-11T10:30:00.000Z",
-  "markdown": "---\ntitle: Documentation\nsource: \"https://example.com/docs\"\n---\n\n# Getting Started\n\nWelcome...",
-  "contentSize": 9876,
-  "cached": false,
-  "truncated": false,
-  "file": {
-    "downloadUrl": "/mcp/downloads/markdown/abc123def456",
-    "fileName": "documentation.md",
-    "expiresAt": "2025-12-11T11:30:00.000Z"
-  }
+  "url": "https://example.com/broken",
+  "error": "Failed to fetch: 404 Not Found"
 }
 ```
-`file` is included only in HTTP mode when content is cached and too large to inline.
 ---
 ### Large Content Handling
-- Inline limit is configurable via `MAX_INLINE_CONTENT_CHARS` (see `CONFIGURATION.md`).
-- If content exceeds the limit and cache is enabled, responses include `resourceUri`/`resourceMimeType` and a `resource_link` block.
-- If cache is disabled, content is truncated with `...[truncated]` and `truncated: true`.
-- Use `maxContentLength` per request to enforce a lower limit (hard cap: 5,242,880 characters).
+- Inline markdown is capped at 20,000 characters (`maxInlineContentChars`).
+- **Stdio mode:** full markdown is embedded as a `resource` block.
+- **HTTP mode:** if content exceeds the inline limit and cache is enabled, the response includes a `resource_link` to `superfetch://cache/...` (no embedded markdown). If cache is disabled, the inline markdown is truncated with `...[truncated]`.
 - Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
 ---
 ## Resources
-| URI                                        | Description                                                                |
-| ------------------------------------------ | -------------------------------------------------------------------------- |
-| `superfetch://health`                      | Real-time server health and memory checks                                  |
-| `superfetch://stats`                       | Server stats and cache metrics                                             |
-| `superfetch://cache/list`                  | List cached entries and their resource URIs                                |
-| `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `url`, `markdown`; `links` is reserved) |
+| URI                                        | Description                                    |
+| ------------------------------------------ | ---------------------------------------------- |
+| `superfetch://cache/{namespace}/{urlHash}` | Cached content entry (`namespace`: `markdown`) |
-Resource subscriptions notify clients when cache entries update.
+Resource listings enumerate cached entries, and subscriptions notify clients when cache entries update.
 ---
@@ -407,21 +329,21 @@ When running in HTTP mode, cached content can be downloaded directly. Downloads
 GET /mcp/downloads/:namespace/:hash
 ```
-- `namespace`: `markdown` or `url`
-- Auth required (`Authorization: Bearer <API_KEY>` or `X-API-Key: <API_KEY>`)
+- `namespace`: `markdown`
+- Auth required (`Authorization: Bearer <token>`; in static token mode, `X-API-Key` is accepted)
 ### Response Headers
-| Header                | Value                                                                   |
-| --------------------- | ----------------------------------------------------------------------- |
-| `Content-Type`        | `text/markdown; charset=utf-8` or `application/x-ndjson; charset=utf-8` |
-| `Content-Disposition` | `attachment; filename="<name>"`                                         |
-| `Cache-Control`       | `private, max-age=<CACHE_TTL>`                                          |
+| Header                | Value                           |
+| --------------------- | ------------------------------- |
+| `Content-Type`        | `text/markdown; charset=utf-8`  |
+| `Content-Disposition` | `attachment; filename="<name>"` |
+| `Cache-Control`       | `private, max-age=<CACHE_TTL>`  |
 ### Example Usage
 ```bash
-curl -H "Authorization: Bearer $API_KEY" \
+curl -H "Authorization: Bearer $TOKEN" \
   http://localhost:3000/mcp/downloads/markdown/abc123.def456 \
   -o article.md
 ```
@@ -438,7 +360,65 @@ curl -H "Authorization: Bearer $API_KEY" \
 ## Configuration
-Configuration details live in `CONFIGURATION.md`, including all environment variables, defaults, ranges, presets, and dev-only flags.
+Set environment variables in your MCP client `env` or in the shell before starting the server.
+### Core Server Settings
+| Variable        | Default              | Description                                                   |
+| --------------- | -------------------- | ------------------------------------------------------------- |
+| `HOST`          | `127.0.0.1`          | HTTP bind address                                             |
+| `PORT`          | `3000`               | HTTP server port (1024-65535)                                 |
+| `USER_AGENT`    | `superFetch-MCP/2.0` | User-Agent header for outgoing requests                       |
+| `CACHE_ENABLED` | `true`               | Enable response caching                                       |
+| `CACHE_TTL`     | `3600`               | Cache TTL in seconds (60-86400)                               |
+| `LOG_LEVEL`     | `info`               | `debug`, `info`, `warn`, `error`                              |
+| `ALLOWED_HOSTS` | (empty)              | Additional allowed Host/Origin values (comma/space separated) |
+### Auth (HTTP Mode)
+| Variable        | Default | Description                                                  |
+| --------------- | ------- | ------------------------------------------------------------ |
+| `AUTH_MODE`     | auto    | `static` or `oauth`. Auto-selects OAuth if any OAUTH URL set |
+| `ACCESS_TOKENS` | (empty) | Comma/space-separated static bearer tokens                   |
+| `API_KEY`       | (empty) | Adds a static bearer token and enables `X-API-Key` header    |
+Static mode requires at least one token (`ACCESS_TOKENS` or `API_KEY`).
+### OAuth (HTTP Mode)
+Required when `AUTH_MODE=oauth` (or auto-selected by presence of OAuth URLs):
+| Variable                  | Default | Description            |
+| ------------------------- | ------- | ---------------------- |
+| `OAUTH_ISSUER_URL`        | -       | OAuth issuer           |
+| `OAUTH_AUTHORIZATION_URL` | -       | Authorization endpoint |
+| `OAUTH_TOKEN_URL`         | -       | Token endpoint         |
+| `OAUTH_INTROSPECTION_URL` | -       | Introspection endpoint |
+Optional:
+| Variable                         | Default                    | Description                             |
+| -------------------------------- | -------------------------- | --------------------------------------- |
+| `OAUTH_REVOCATION_URL`           | -                          | Revocation endpoint                     |
+| `OAUTH_REGISTRATION_URL`         | -                          | Dynamic client registration endpoint    |
+| `OAUTH_RESOURCE_URL`             | `http://<host>:<port>/mcp` | Protected resource URL                  |
+| `OAUTH_REQUIRED_SCOPES`          | (empty)                    | Required scopes (comma/space separated) |
+| `OAUTH_CLIENT_ID`                | -                          | Client ID for introspection             |
+| `OAUTH_CLIENT_SECRET`            | -                          | Client secret for introspection         |
+| `OAUTH_INTROSPECTION_TIMEOUT_MS` | `5000`                     | Introspection timeout (1000-30000)      |
+### Fixed Limits (Not Configurable via env)
+- Fetch timeout: 15 seconds
+- Max redirects: 5
+- Max HTML response size: 10 MB
+- Inline markdown limit: 20,000 characters
+- Cache max entries: 100
+- Session TTL: 30 minutes
+- Max sessions: 200
+- Rate limit: 100 req/min per IP (60s window)
+See `CONFIGURATION.md` for preset examples and quick-start snippets.
 ---
@@ -450,28 +430,13 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
 2. The server returns `mcp-session-id` in the response headers.
 3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
-`GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
+If the `mcp-protocol-version` header is missing, the server defaults it to `2025-03-26`. Supported versions are `2025-03-26` and `2025-11-25`.
-If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
-Host header validation is always enforced in HTTP mode. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send. If an `Origin` header is present, it must be allowed by `ALLOWED_ORIGINS` or `CORS_ALLOW_ALL`.
----
-## Content Block Types
+`GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
-JSONL output includes semantic content blocks:
+If the server reaches its session cap (200), it evicts the oldest session when possible; otherwise it returns a 503.
-| Type         | Description                              |
-| ------------ | ---------------------------------------- |
-| `metadata`   | Minimal page metadata (type, title, url) |
-| `heading`    | Headings (h1-h6) with level indicator    |
-| `paragraph`  | Text paragraphs                          |
-| `list`       | Ordered/unordered lists                  |
-| `code`       | Code blocks with optional language       |
-| `table`      | Tables with headers and rows             |
-| `image`      | Images with src and alt text             |
-| `blockquote` | Block quote text                         |
+Host and Origin headers are always validated. Allowed values include loopback hosts, the configured `HOST` (if not a wildcard), and any entries in `ALLOWED_HOSTS`. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send.
 ---
@@ -498,13 +463,14 @@ DNS resolution is performed and blocked if any resolved IP matches a blocked ran
 - Max URL length: 2048 characters
 - Hostnames ending in `.local` or `.internal` are rejected
-### Header Sanitization
+### Host/Origin Validation (HTTP Mode)
-Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-ip`, `proxy-authorization`
+- Host header must match loopback, configured `HOST` (if not a wildcard), or `ALLOWED_HOSTS`
+- Origin header (when present) is validated against the same allow-list
 ### Rate Limiting
-Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RATE_LIMIT_ENABLED`, `RATE_LIMIT_MAX`, `RATE_LIMIT_WINDOW_MS`, and `RATE_LIMIT_CLEANUP_MS` (see `CONFIGURATION.md`).
+Rate limiting applies to `/mcp` and `/mcp/downloads` (100 req/min per IP, 60s window). OPTIONS requests are not rate-limited.
 ---
@@ -522,8 +488,6 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
 | `npm run format`        | Format with Prettier                 |
 | `npm test`              | Run Node test runner (builds dist)   |
 | `npm run test:coverage` | Run tests with experimental coverage |
-| `npm run bench`         | Run minimal performance benchmark    |
-| `npm run release`       | Create new release                   |
 | `npm run knip`          | Find unused exports/dependencies     |
 | `npm run knip:fix`      | Auto-fix unused code                 |
@@ -537,10 +501,10 @@ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RA
 | Language           | TypeScript 5.9                    |
 | MCP SDK            | @modelcontextprotocol/sdk ^1.25.1 |
 | Content Extraction | @mozilla/readability ^0.6.0       |
-| HTML Parsing       | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
+| HTML Parsing       | LinkeDOM ^0.18.12                 |
 | Markdown           | Turndown ^7.2.2                   |
-| HTTP               | Express ^5.2.1, undici ^6.22.0    |
-| Validation         | Zod ^4.3.4                        |
+| HTTP               | Express ^5.2.1, undici ^6.23.0    |
+| Validation         | Zod ^4.3.5                        |
 ---

package/dist/config/auth-config.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+export interface AuthConfig {
+    mode: 'oauth' | 'static';
+    issuerUrl: URL | undefined;
+    authorizationUrl: URL | undefined;
+    tokenUrl: URL | undefined;
+    revocationUrl: URL | undefined;
+    registrationUrl: URL | undefined;
+    introspectionUrl: URL | undefined;
+    resourceUrl: URL;
+    requiredScopes: string[];
+    clientId: string | undefined;
+    clientSecret: string | undefined;
+    introspectionTimeoutMs: number;
+    staticTokens: string[];
+}
+export declare function buildAuthConfig(baseUrl: URL): AuthConfig;

package/dist/config/auth-config.js ADDED Viewed

@@ -0,0 +1,53 @@
+import { parseInteger, parseList, parseUrlEnv } from './env-parsers.js';
+function readCoreOAuthUrls() {
+    return {
+        issuerUrl: parseUrlEnv(process.env.OAUTH_ISSUER_URL, 'OAUTH_ISSUER_URL'),
+        authorizationUrl: parseUrlEnv(process.env.OAUTH_AUTHORIZATION_URL, 'OAUTH_AUTHORIZATION_URL'),
+        tokenUrl: parseUrlEnv(process.env.OAUTH_TOKEN_URL, 'OAUTH_TOKEN_URL'),
+    };
+}
+function readOptionalOAuthUrls(baseUrl) {
+    return {
+        revocationUrl: parseUrlEnv(process.env.OAUTH_REVOCATION_URL, 'OAUTH_REVOCATION_URL'),
+        registrationUrl: parseUrlEnv(process.env.OAUTH_REGISTRATION_URL, 'OAUTH_REGISTRATION_URL'),
+        introspectionUrl: parseUrlEnv(process.env.OAUTH_INTROSPECTION_URL, 'OAUTH_INTROSPECTION_URL'),
+        resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
+            new URL('/mcp', baseUrl),
+    };
+}
+function readOAuthUrls(baseUrl) {
+    return { ...readCoreOAuthUrls(), ...readOptionalOAuthUrls(baseUrl) };
+}
+function resolveAuthMode(authModeEnv, urls) {
+    if (authModeEnv === 'oauth')
+        return 'oauth';
+    if (authModeEnv === 'static')
+        return 'static';
+    const oauthConfigured = [
+        urls.issuerUrl,
+        urls.authorizationUrl,
+        urls.tokenUrl,
+        urls.introspectionUrl,
+    ].some((value) => value !== undefined);
+    return oauthConfigured ? 'oauth' : 'static';
+}
+function collectStaticTokens() {
+    const staticTokens = new Set(parseList(process.env.ACCESS_TOKENS));
+    if (process.env.API_KEY) {
+        staticTokens.add(process.env.API_KEY);
+    }
+    return Array.from(staticTokens);
+}
+export function buildAuthConfig(baseUrl) {
+    const urls = readOAuthUrls(baseUrl);
+    const mode = resolveAuthMode(process.env.AUTH_MODE?.toLowerCase(), urls);
+    return {
+        mode,
+        ...urls,
+        requiredScopes: parseList(process.env.OAUTH_REQUIRED_SCOPES),
+        clientId: process.env.OAUTH_CLIENT_ID,
+        clientSecret: process.env.OAUTH_CLIENT_SECRET,
+        introspectionTimeoutMs: parseInteger(process.env.OAUTH_INTROSPECTION_TIMEOUT_MS, 5000, 1000, 30000),
+        staticTokens: collectStaticTokens(),
+    };
+}

package/dist/config/constants.d.ts CHANGED Viewed

@@ -1,19 +1,17 @@
 export declare const SIZE_LIMITS: {
-    readonly ONE_MB: number;
-    readonly FIVE_MB: number;
-    readonly TEN_MB: number;
-    readonly FIFTY_MB: number;
-    readonly HUNDRED_MB: number;
+    ONE_MB: number;
+    FIVE_MB: number;
+    TEN_MB: number;
+    FIFTY_MB: number;
+    HUNDRED_MB: number;
 };
 export declare const CACHE_HASH: {
-    readonly URL_HASH_LENGTH: 16;
-    readonly VARY_HASH_LENGTH: 12;
+    URL_HASH_LENGTH: number;
+    VARY_HASH_LENGTH: number;
 };
 export declare const TIMEOUT: {
-    readonly MIN_FETCH_TIMEOUT_MS: 5000;
-    readonly DEFAULT_FETCH_TIMEOUT_MS: 30000;
-    readonly MAX_FETCH_TIMEOUT_MS: 120000;
-    readonly MIN_SESSION_TTL_MS: number;
-    readonly DEFAULT_SESSION_TTL_MS: number;
-    readonly MAX_SESSION_TTL_MS: number;
+    DEFAULT_FETCH_TIMEOUT_MS: number;
+    MIN_SESSION_TTL_MS: number;
+    DEFAULT_SESSION_TTL_MS: number;
+    MAX_SESSION_TTL_MS: number;
 };

package/dist/config/constants.js CHANGED Viewed

@@ -15,9 +15,7 @@ export const CACHE_HASH = {
     VARY_HASH_LENGTH: 12,
 };
 export const TIMEOUT = {
-    MIN_FETCH_TIMEOUT_MS: 5000,
-    DEFAULT_FETCH_TIMEOUT_MS: 30000,
-    MAX_FETCH_TIMEOUT_MS: 120000,
+    DEFAULT_FETCH_TIMEOUT_MS: 15000,
     MIN_SESSION_TTL_MS: 60 * 1000,
     DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
     MAX_SESSION_TTL_MS: 24 * 60 * 60 * 1000,

package/dist/config/env-parsers.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { LogLevel } from './types/runtime.js';
+export declare function parseInteger(envValue: string | undefined, defaultValue: number, min?: number, max?: number): number;
+export declare function parseBoolean(envValue: string | undefined, defaultValue: boolean): boolean;
+export declare function parseList(envValue: string | undefined): string[];
+export declare function parseUrlEnv(value: string | undefined, name: string): URL | undefined;
+export declare function parseAllowedHosts(envValue: string | undefined): Set<string>;
+export declare function parseLogLevel(envValue: string | undefined): LogLevel;

package/dist/config/env-parsers.js ADDED Viewed

@@ -0,0 +1,84 @@
+function normalizeHostValue(value) {
+    const trimmed = value.trim().toLowerCase();
+    if (!trimmed)
+        return null;
+    if (trimmed.startsWith('[')) {
+        const end = trimmed.indexOf(']');
+        if (end === -1)
+            return null;
+        return trimmed.slice(1, end);
+    }
+    const colonIndex = trimmed.indexOf(':');
+    if (colonIndex !== -1) {
+        return trimmed.slice(0, colonIndex);
+    }
+    return trimmed;
+}
+const ALLOWED_LOG_LEVELS = new Set([
+    'debug',
+    'info',
+    'warn',
+    'error',
+]);
+function isLogLevel(value) {
+    return ALLOWED_LOG_LEVELS.has(value);
+}
+function isBelowMin(value, min) {
+    if (min === undefined)
+        return false;
+    return value < min;
+}
+function isAboveMax(value, max) {
+    if (max === undefined)
+        return false;
+    return value > max;
+}
+export function parseInteger(envValue, defaultValue, min, max) {
+    if (!envValue)
+        return defaultValue;
+    const parsed = parseInt(envValue, 10);
+    if (Number.isNaN(parsed))
+        return defaultValue;
+    if (isBelowMin(parsed, min))
+        return defaultValue;
+    if (isAboveMax(parsed, max))
+        return defaultValue;
+    return parsed;
+}
+export function parseBoolean(envValue, defaultValue) {
+    if (!envValue)
+        return defaultValue;
+    return envValue !== 'false';
+}
+export function parseList(envValue) {
+    if (!envValue)
+        return [];
+    return envValue
+        .split(/[\s,]+/)
+        .map((entry) => entry.trim())
+        .filter((entry) => entry.length > 0);
+}
+export function parseUrlEnv(value, name) {
+    if (!value)
+        return undefined;
+    if (!URL.canParse(value)) {
+        throw new Error(`Invalid ${name} value: ${value}`);
+    }
+    return new URL(value);
+}
+export function parseAllowedHosts(envValue) {
+    const hosts = new Set();
+    for (const entry of parseList(envValue)) {
+        const normalized = normalizeHostValue(entry);
+        if (normalized) {
+            hosts.add(normalized);
+        }
+    }
+    return hosts;
+}
+export function parseLogLevel(envValue) {
+    const level = envValue?.toLowerCase();
+    if (!level)
+        return 'info';
+    return isLogLevel(level) ? level : 'info';
+}

package/dist/config/formatting.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 export declare const TRUNCATION_MARKER = "...[truncated]";
 export declare const CODE_BLOCK: {
-    readonly fence: "```";
-    readonly format: (code: string, language?: string) => string;
+    fence: string;
+    format: (code: string, language?: string) => string;
 };
 export declare const FRONTMATTER_DELIMITER = "---";
 export declare const joinLines: (lines: readonly string[]) => string;