@fouradata/mcp 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -39
- package/dist/auth.js +3 -3
- package/dist/http.js +16 -16
- package/dist/http.js.map +1 -1
- package/dist/prompts.d.ts +2 -2
- package/dist/prompts.js +9 -9
- package/dist/resources.d.ts +3 -3
- package/dist/resources.js +5 -5
- package/dist/safe-target.js +3 -3
- package/dist/server.js +1 -1
- package/dist/tools/auto.d.ts +1 -1
- package/dist/tools/auto.js +29 -29
- package/dist/tools/auto.js.map +1 -1
- package/dist/tools/browser.js +22 -22
- package/dist/tools/browser.js.map +1 -1
- package/dist/tools/proxy.d.ts +1 -1
- package/dist/tools/proxy.js +33 -33
- package/dist/tools/proxy.js.map +1 -1
- package/dist/tools/single.d.ts +1 -1
- package/dist/tools/single.js +39 -39
- package/dist/tools/single.js.map +1 -1
- package/package.json +12 -5
package/README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# @fouradata/mcp
|
|
2
2
|
|
|
3
|
-
[FourA Web Scraping API](https://foura.ai/) as
|
|
3
|
+
[FourA Web Scraping API](https://foura.ai/) as four [Model Context Protocol](https://modelcontextprotocol.io) tools plus six built-in workflow prompts. Plug it into Claude Desktop, Claude Code, Cursor, Windsurf, or any other MCP client and fetch arbitrary public web pages, bypass anti-bot challenges, and render JavaScript-heavy sites - without writing a line of integration code.
|
|
4
4
|
|
|
5
5
|
Four tools, six prompts, one API key.
|
|
6
6
|
|
|
7
|
-
## Quick Start
|
|
7
|
+
## Quick Start - local stdio (recommended for Claude Desktop)
|
|
8
8
|
|
|
9
9
|
Grab a key at [foura.ai/dashboard#api-keys](https://foura.ai/dashboard#api-keys) (one click, shown once on creation, format `pk_live_...`). Then drop this into your MCP client's config:
|
|
10
10
|
|
|
@@ -24,7 +24,7 @@ Grab a key at [foura.ai/dashboard#api-keys](https://foura.ai/dashboard#api-keys)
|
|
|
24
24
|
|
|
25
25
|
> **Claude Desktop gotcha:** fully quit Claude Desktop (`Cmd+Q` on macOS) **before** editing the config file. If the app is still running, it will overwrite your edits with its in-memory config on exit.
|
|
26
26
|
|
|
27
|
-
The npx command downloads the package on first launch (~10s) and runs it as a subprocess of your MCP client. No global install needed. Same JSON works in every major client
|
|
27
|
+
The npx command downloads the package on first launch (~10s) and runs it as a subprocess of your MCP client. No global install needed. Same JSON works in every major client - just point it at the right file:
|
|
28
28
|
|
|
29
29
|
| Client | Where the config lives |
|
|
30
30
|
|---|---|
|
|
@@ -37,7 +37,7 @@ The npx command downloads the package on first launch (~10s) and runs it as a su
|
|
|
37
37
|
|
|
38
38
|
Restart the client and `foura_auto`, `foura_single`, `foura_proxy`, `foura_browser` show up in your tool list, plus six prompts under `/prompts`.
|
|
39
39
|
|
|
40
|
-
## Quick Start
|
|
40
|
+
## Quick Start - hosted (Streamable HTTP)
|
|
41
41
|
|
|
42
42
|
For clients that support the Streamable HTTP transport (Cursor, Windsurf, VS Code, Claude Code with `--transport http`), point them at the hosted endpoint instead of running a local subprocess:
|
|
43
43
|
|
|
@@ -54,7 +54,7 @@ For clients that support the Streamable HTTP transport (Cursor, Windsurf, VS Cod
|
|
|
54
54
|
}
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Current Claude Desktop builds reject the bare `url` form
|
|
57
|
+
Current Claude Desktop builds reject the bare `url` form - use the stdio config above for Claude Desktop, or bridge through `mcp-remote`:
|
|
58
58
|
|
|
59
59
|
```json
|
|
60
60
|
{
|
|
@@ -69,15 +69,15 @@ Current Claude Desktop builds reject the bare `url` form — use the stdio confi
|
|
|
69
69
|
|
|
70
70
|
## The Tools
|
|
71
71
|
|
|
72
|
-
`foura_auto` is the **default**
|
|
72
|
+
`foura_auto` is the **default** - give it a URL and it returns the content, picking the fetch method for you. The other three are the lower-level primitives it orchestrates; reach for them when you want explicit control.
|
|
73
73
|
|
|
74
|
-
All four are marked `readOnlyHint: true` and `openWorldHint: true` per the [MCP spec](https://modelcontextprotocol.io/specification/2025-06-18/server/tools)
|
|
74
|
+
All four are marked `readOnlyHint: true` and `openWorldHint: true` per the [MCP spec](https://modelcontextprotocol.io/specification/2025-06-18/server/tools) - clients that auto-approve trusted read-only tools (Claude Desktop, Cursor in 2026) call them without a per-request confirmation modal.
|
|
75
75
|
|
|
76
76
|
Every response carries both human-readable text (`content`) and a typed `structuredContent` JSON object validated against the tool's `outputSchema`. Clients pass `structuredContent` to your LLM natively, skipping the re-tokenization tax on stringified JSON.
|
|
77
77
|
|
|
78
|
-
### `foura_auto`
|
|
78
|
+
### `foura_auto` - smart fetch (the default)
|
|
79
79
|
|
|
80
|
-
Give a URL, get the content back. Use this first when you just want the page and don't want to choose a method. Internally it walks a cost-aware ladder
|
|
80
|
+
Give a URL, get the content back. Use this first when you just want the page and don't want to choose a method. Internally it walks a cost-aware ladder - a fast direct request, then a rotating proxy, then a full browser session - escalating only as far as the target forces it, solving common bot challenges on the way, and cheaply replaying a warm session on repeat calls to the same host. It learns the right settings per host, so there are no `maxTries` / pool / retry knobs to tune.
|
|
81
81
|
|
|
82
82
|
```jsonc
|
|
83
83
|
{
|
|
@@ -88,13 +88,13 @@ Give a URL, get the content back. Use this first when you just want the page and
|
|
|
88
88
|
}
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
-
The client surface is intentionally minimal: `url` (required), plus optional `method`, `headers`, `data`, `validate`, `returnSession` (default `true`), `forceProxy` (default `true`), `timeout_ms` (5000
|
|
91
|
+
The client surface is intentionally minimal: `url` (required), plus optional `method`, `headers`, `data`, `validate`, `returnSession` (default `true`), `forceProxy` (default `true`), `timeout_ms` (5000-180000, default 120000), `ignoreProxies`.
|
|
92
92
|
|
|
93
|
-
`structuredContent` shape: `{status, headers, data, meta, session}`. `meta` is always present
|
|
93
|
+
`structuredContent` shape: `{status, headers, data, meta, session}`. `meta` is always present - `{rung, solved, attempts, credits}` - the trace of which rung delivered and what it cost. `session` (`{proxy, cookies, userAgent}`) is returned by default so you can replay the same session through `foura_single` / `foura_proxy` afterwards (pass `session.proxy` into their `proxy` field). Send `returnSession: false` to omit it. There is no `total_time` field on auto.
|
|
94
94
|
|
|
95
|
-
### `foura_single`
|
|
95
|
+
### `foura_single` - fast HTTP
|
|
96
96
|
|
|
97
|
-
One HTTP request, response back. Typically 200ms
|
|
97
|
+
One HTTP request, response back. Typically 200ms-2s. Use it for static pages, JSON APIs, server-rendered HTML - the bread and butter of scraping. Set `unblocker: true` if the target is picky about wire-level signals.
|
|
98
98
|
|
|
99
99
|
```jsonc
|
|
100
100
|
{
|
|
@@ -104,11 +104,11 @@ One HTTP request, response back. Typically 200ms–2s. Use it for static pages,
|
|
|
104
104
|
}
|
|
105
105
|
```
|
|
106
106
|
|
|
107
|
-
Supports custom headers, a body, per-stage timeouts, redirect controls, JSON auto-parse, a binary-buffer mode, and built-in response validation (`validate.status.accept`, `validate.data.fail`, and so on). If `foura_single` comes back blocked
|
|
107
|
+
Supports custom headers, a body, per-stage timeouts, redirect controls, JSON auto-parse, a binary-buffer mode, and built-in response validation (`validate.status.accept`, `validate.data.fail`, and so on). If `foura_single` comes back blocked - status 403/429, captcha page, OR response headers `x-vercel-mitigated: challenge` / `cf-mitigated: challenge`, OR body title matches `Vercel Security Checkpoint` / `Just a moment` / `Attention Required` - escalate to `foura_proxy` with `maxTries: 25-30` for these tier-1 WAFs. If the page also needs JavaScript to render, chain `foura_proxy`'s returned `proxy` ID into `foura_browser.proxy`.
|
|
108
108
|
|
|
109
109
|
`structuredContent` shape: `{status, headers, data, total_time, ...}`.
|
|
110
110
|
|
|
111
|
-
### `foura_proxy`
|
|
111
|
+
### `foura_proxy` - rotating proxies with retry
|
|
112
112
|
|
|
113
113
|
Same target shape as `foura_single`, but routed through a pool of proxies with automatic retry on failure. Per-host scoring picks the proxies most likely to succeed against this particular target, so you're not burning attempts on known-bad routes.
|
|
114
114
|
|
|
@@ -123,9 +123,9 @@ Same target shape as `foura_single`, but routed through a pool of proxies with a
|
|
|
123
123
|
}
|
|
124
124
|
```
|
|
125
125
|
|
|
126
|
-
Typical latency 1
|
|
126
|
+
Typical latency 1-5s. `structuredContent` adds `proxy` (the encoded ID of the proxy that succeeded - pass it to `ignoreProxies` next time if it later goes bad) and `total` (outer timing including selection + retries). For tier-1 WAF challenges (Vercel Security Checkpoint, Cloudflare 'Just a moment', Akamai Bot Manager) use `maxTries: 25-30` - the default 5 is sized for lightly-blocked sites. If still blocked after 30 attempts the gate is likely country / ASN allowlist (not solvable by rotation) - pivot strategy. If the target needs JavaScript render, chain the returned `proxy` ID into `foura_browser.proxy` - the browser then exits through the IP that already cleared the challenge for this target.
|
|
127
127
|
|
|
128
|
-
### `foura_browser`
|
|
128
|
+
### `foura_browser` - full browser session
|
|
129
129
|
|
|
130
130
|
A real browser session. JavaScript runs, the DOM finishes rendering, cookies come back with the response. Use it when the page is a single-page app, when content lazy-loads after first paint, or when there's an anti-bot challenge that needs a real browser to clear.
|
|
131
131
|
|
|
@@ -137,13 +137,13 @@ A real browser session. JavaScript runs, the DOM finishes rendering, cookies com
|
|
|
137
137
|
}
|
|
138
138
|
```
|
|
139
139
|
|
|
140
|
-
Slowest of the
|
|
140
|
+
Slowest of the lower-level tools (2-10s) but the only tool that handles JavaScript end-to-end. `checkText` is a one-shot post-render validator (substring search on the rendered HTML AFTER navigation completes - not a waiter, does not poll): if the substring is missing, the call fails with an error envelope. Useful when a page returns 200 but the actual content is missing. `unblocker` defaults to `true` - the session actively solves an anti-bot / captcha challenge (Cloudflare Turnstile and similar) it meets along the way; set `unblocker: false` to render and return the page exactly as it loads, challenge page included.
|
|
141
141
|
|
|
142
142
|
`structuredContent` shape is intentionally different from single/proxy: `{status, headers (object, not array), body (not data), cookies (full browser cookie shape), userAgent}`.
|
|
143
143
|
|
|
144
144
|
## Built-in Prompts
|
|
145
145
|
|
|
146
|
-
|
|
146
|
+
Six workflow templates surfaced under `/prompts` in your MCP client. They orchestrate one or more tools without you spelling out the steps.
|
|
147
147
|
|
|
148
148
|
| Prompt | Arguments | What it does |
|
|
149
149
|
|---|---|---|
|
|
@@ -154,7 +154,7 @@ Five workflow templates surfaced under `/prompts` in your MCP client. They orche
|
|
|
154
154
|
| `check_endpoint_health` | `url, expected_text?` | Single with strict validation → reachable/status/timing report |
|
|
155
155
|
| `bulk_fetch_urls` | `urls` (comma-separated) | Parallel single → auto-fallback to proxy per URL → metadata only |
|
|
156
156
|
|
|
157
|
-
Each prompt arrives as a templated user message your LLM executes with the right tools. They cost zero tokens at idle
|
|
157
|
+
Each prompt arrives as a templated user message your LLM executes with the right tools. They cost zero tokens at idle - only invoked prompts enter the context window.
|
|
158
158
|
|
|
159
159
|
Full recipe text + manual fallback prompts: [foura.ai/docs/mcp/recipes](https://foura.ai/docs/mcp/recipes). For the full error code list, see [foura.ai/docs/mcp/errors](https://foura.ai/docs/mcp/errors).
|
|
160
160
|
|
|
@@ -162,9 +162,9 @@ Full recipe text + manual fallback prompts: [foura.ai/docs/mcp/recipes](https://
|
|
|
162
162
|
|
|
163
163
|
Your `Bearer` token (or the `FOURA_API_KEY` env var in stdio mode) forwards to the FourA API as `X-API-Key`. One key, all four tools.
|
|
164
164
|
|
|
165
|
-
Keys are managed in the [dashboard](https://foura.ai/dashboard#api-keys)
|
|
165
|
+
Keys are managed in the [dashboard](https://foura.ai/dashboard#api-keys) - shown once on creation, rotate or deactivate any time. See [foura.ai/docs/getting-started/authentication](https://foura.ai/docs/getting-started/authentication) for the full key-management walkthrough.
|
|
166
166
|
|
|
167
|
-
## Error envelope
|
|
167
|
+
## Error envelope - typed contract for agent retries
|
|
168
168
|
|
|
169
169
|
Every error (`isError: true`) carries a `structuredContent` envelope with at minimum these three fields:
|
|
170
170
|
|
|
@@ -180,27 +180,27 @@ Where the upstream returned a status, you also get `status` (HTTP code) and on r
|
|
|
180
180
|
|
|
181
181
|
| `code` | When | Retry safe? |
|
|
182
182
|
|---|---|---|
|
|
183
|
-
| `ssrf_blocked` | Target IP in a private / reserved range (RFC 5735+6598+IPv6 reserved) | No
|
|
184
|
-
| `upstream_non_json` | Upstream returned malformed body | Maybe
|
|
185
|
-
| `bad_request` (400) | Input shape rejected by FourA | No
|
|
186
|
-
| `auth_failed` (401) | Key missing, invalid, or deactivated | No
|
|
183
|
+
| `ssrf_blocked` | Target IP in a private / reserved range (RFC 5735+6598+IPv6 reserved) | No - change the URL |
|
|
184
|
+
| `upstream_non_json` | Upstream returned malformed body | Maybe - investigate |
|
|
185
|
+
| `bad_request` (400) | Input shape rejected by FourA | No - fix arguments |
|
|
186
|
+
| `auth_failed` (401) | Key missing, invalid, or deactivated | No - fix the key |
|
|
187
187
|
| `forbidden` (403) | Authenticated but not allowed | No |
|
|
188
188
|
| `not_found` (404) | Target / endpoint doesn't exist | No |
|
|
189
|
-
| `rate_limited` (429) | RPM cap hit | Yes
|
|
190
|
-
| `at_capacity` (503) | Concurrency cap hit | Yes
|
|
191
|
-
| `service_disabled` (503) | Maintenance window | Yes
|
|
192
|
-
| `service_unavailable` (503) | Generic 503 | Yes
|
|
193
|
-
| `upstream_error` (≥500) | Upstream 5xx | Yes
|
|
189
|
+
| `rate_limited` (429) | RPM cap hit | Yes - wait `retryAfter` |
|
|
190
|
+
| `at_capacity` (503) | Concurrency cap hit | Yes - wait `retryAfter` |
|
|
191
|
+
| `service_disabled` (503) | Maintenance window | Yes - wait `retryAfter` |
|
|
192
|
+
| `service_unavailable` (503) | Generic 503 | Yes - short backoff |
|
|
193
|
+
| `upstream_error` (≥500) | Upstream 5xx | Yes - exponential backoff |
|
|
194
194
|
| `upstream_client_error` (4xx) | Other 4xx | Usually no |
|
|
195
195
|
|
|
196
196
|
LLM agents can read `code` directly for retry logic without parsing prose. Spec reference: [foura.ai/docs/api/errors](https://foura.ai/docs/api/errors).
|
|
197
197
|
|
|
198
|
-
## Combining the tools
|
|
198
|
+
## Combining the tools - sticky exit IPs
|
|
199
199
|
|
|
200
|
-
The
|
|
200
|
+
The lower-level tools compose. `foura_proxy` returns the base36 ID of the exit it used. Pass that ID back into `foura_single.proxy` or `foura_browser.proxy` and the next call exits through the **same IP** - same session, same fingerprint, same geo.
|
|
201
201
|
|
|
202
202
|
```jsonc
|
|
203
|
-
// 1. Find a working exit for the target
|
|
203
|
+
// 1. Find a working exit for the target - use maxTries:25-30 for tier-1 WAFs
|
|
204
204
|
const r = await foura_proxy({
|
|
205
205
|
maxTries: 30,
|
|
206
206
|
request: { method: "GET", url: "https://probe.example.com", unblocker: true }
|
|
@@ -210,17 +210,17 @@ const r = await foura_proxy({
|
|
|
210
210
|
// 2. Reuse it for follow-up HTTP (cookies, multi-step flows)
|
|
211
211
|
await foura_single({ method: "GET", url: "https://target/api", proxy: r.proxy });
|
|
212
212
|
|
|
213
|
-
// 3. Or render JS through the same egress
|
|
213
|
+
// 3. Or render JS through the same egress - exits through the IP that already
|
|
214
214
|
// cleared the challenge for this target, so the snapshot captures the real
|
|
215
215
|
// post-challenge content instead of a challenge page.
|
|
216
216
|
await foura_browser({ url: "https://target/spa", proxy: r.proxy });
|
|
217
217
|
```
|
|
218
218
|
|
|
219
|
-
This chain is the canonical pattern for **tier-1 WAF + JavaScript-rendered targets** (Vercel Security Checkpoint, Cloudflare 'Just a moment', Akamai Bot Manager protecting SPAs). Calling `foura_browser` directly against a WAF target usually captures the challenge page
|
|
219
|
+
This chain is the canonical pattern for **tier-1 WAF + JavaScript-rendered targets** (Vercel Security Checkpoint, Cloudflare 'Just a moment', Akamai Bot Manager protecting SPAs). Calling `foura_browser` directly against a WAF target usually captures the challenge page - the snapshot fires before the challenge's deferred reload completes. Solve via `foura_proxy` first, then chain.
|
|
220
220
|
|
|
221
221
|
To rotate AWAY from a known-bad proxy on the next `foura_proxy` call, pass it as `ignoreProxies: ["4DZ3VE"]`. The `proxy` field on `foura_single` and `foura_browser` also accepts raw URLs (`http://host:port`, `socks5://...`) if you have your own list.
|
|
222
222
|
|
|
223
|
-
## Large responses
|
|
223
|
+
## Large responses - `offload_large` (default: inline)
|
|
224
224
|
|
|
225
225
|
By default (since v0.2.0), full response bodies are returned inline in `structuredContent` regardless of size. This works in every MCP client.
|
|
226
226
|
|
|
@@ -236,7 +236,7 @@ If your client supports MCP `resources/read` (and you want to save tokens on big
|
|
|
236
236
|
|
|
237
237
|
| Client | `offload_large: true` |
|
|
238
238
|
|---|---|
|
|
239
|
-
| Claude Desktop | not yet
|
|
239
|
+
| Claude Desktop | not yet - leave default `false` |
|
|
240
240
|
| Claude Code, Cursor, Windsurf | supported |
|
|
241
241
|
| VS Code MCP extension | supported |
|
|
242
242
|
|
|
@@ -251,7 +251,7 @@ Tenant-isolated: only the API key that stored a payload can read it back.
|
|
|
251
251
|
|
|
252
252
|
## Self-Hosting
|
|
253
253
|
|
|
254
|
-
The MCP server runs in one container, statelessly
|
|
254
|
+
The MCP server runs in one container, statelessly - each request brings its own key, so there's no session state, no sticky load balancing, nothing to coordinate. Scale horizontally behind any load balancer.
|
|
255
255
|
|
|
256
256
|
Configurable environment:
|
|
257
257
|
|
package/dist/auth.js
CHANGED
|
@@ -2,11 +2,11 @@ import { AsyncLocalStorage } from "node:async_hooks";
|
|
|
2
2
|
/**
|
|
3
3
|
* Shared FourA auth.
|
|
4
4
|
*
|
|
5
|
-
* The API key authenticates the CALLER, not the endpoint
|
|
5
|
+
* The API key authenticates the CALLER, not the endpoint - one key opens
|
|
6
6
|
* /single/, /proxy/, and /browser/. So this lives in one place and is
|
|
7
7
|
* imported by every tool. (Schemas, paths, and per-endpoint behavior remain
|
|
8
|
-
* fully duplicated across tool files
|
|
9
|
-
*
|
|
8
|
+
* fully duplicated across tool files - see
|
|
9
|
+
* .)
|
|
10
10
|
*
|
|
11
11
|
* Dual-mode:
|
|
12
12
|
* - stdio: the user sets FOURA_API_KEY in env (e.g. via claude_desktop_config).
|
package/dist/http.js
CHANGED
|
@@ -4,20 +4,20 @@ import { SUPPORTED_PROTOCOL_VERSIONS } from "@modelcontextprotocol/sdk/types.js"
|
|
|
4
4
|
import { createServer } from "./server.js";
|
|
5
5
|
import { withApiKey } from "./auth.js";
|
|
6
6
|
const PORT = Number(process.env.PORT ?? 3076);
|
|
7
|
-
const SERVER_VERSION = "0.3.
|
|
7
|
+
const SERVER_VERSION = "0.3.2";
|
|
8
8
|
// Spec MUSTs covered in this file:
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
9
|
+
// Origin + Host validation (CVE-2025-66414 DNS rebinding)
|
|
10
|
+
// WWW-Authenticate on 401
|
|
11
|
+
// MCP-Protocol-Version validation (delegated to SDK's list)
|
|
12
|
+
// body size cap (256 KB)
|
|
13
|
+
// server + request timeout
|
|
14
|
+
// SIGTERM graceful shutdown
|
|
15
15
|
// MCP-Protocol-Version allowlist is DERIVED from the SDK at runtime, not
|
|
16
16
|
// hardcoded here. Reason: hardcoding froze us at 2025-06-18 / 2025-03-26
|
|
17
17
|
// and broke every newer client (Claude Code 2.1.141 sends 2025-11-25)
|
|
18
18
|
// until we shipped a release. By reading the SDK's exported authoritative
|
|
19
19
|
// list, every `npm update @modelcontextprotocol/sdk` automatically widens
|
|
20
|
-
// our supported set
|
|
20
|
+
// our supported set - no source-code change, no release coupling.
|
|
21
21
|
//
|
|
22
22
|
// SUPPORTED_PROTOCOL_VERSIONS for @modelcontextprotocol/sdk@1.29.0:
|
|
23
23
|
// ['2025-11-25', '2025-06-18', '2025-03-26', '2024-11-05', '2024-10-07']
|
|
@@ -40,10 +40,10 @@ const ALLOWED_ORIGINS = new Set(parseList(process.env.FOURA_MCP_ALLOWED_ORIGINS,
|
|
|
40
40
|
"https://app.cursor.com",
|
|
41
41
|
]));
|
|
42
42
|
const app = express();
|
|
43
|
-
//
|
|
43
|
+
// cap body size at 256 KB. Real MCP request payloads are <4 KB.
|
|
44
44
|
// Helps mitigate slow-body DoS + memory-exhaustion attacks.
|
|
45
45
|
app.use(express.json({ limit: "256kb" }));
|
|
46
|
-
//
|
|
46
|
+
// Origin + Host validation BEFORE the body is parsed for the MCP
|
|
47
47
|
// path. /healthz stays open so probes can hit it from any source.
|
|
48
48
|
function jsonRpcError(res, status, code, message, extraHeaders) {
|
|
49
49
|
if (extraHeaders)
|
|
@@ -56,7 +56,7 @@ function jsonRpcError(res, status, code, message, extraHeaders) {
|
|
|
56
56
|
});
|
|
57
57
|
}
|
|
58
58
|
function validateOriginAndHost(req, res, next) {
|
|
59
|
-
// Host header
|
|
59
|
+
// Host header - defends against DNS-rebinding (attacker's DNS resolves
|
|
60
60
|
// their hostname to a loopback IP, but Host header carries their hostname).
|
|
61
61
|
const hostHeader = (req.headers.host ?? "").toString();
|
|
62
62
|
if (!hostHeader) {
|
|
@@ -77,7 +77,7 @@ function validateOriginAndHost(req, res, next) {
|
|
|
77
77
|
jsonRpcError(res, 403, -32000, `Host ${hostname} is not in the allowlist`);
|
|
78
78
|
return;
|
|
79
79
|
}
|
|
80
|
-
// Origin
|
|
80
|
+
// Origin - browser-only; server-to-server callers (curl, MCP clients in
|
|
81
81
|
// stdio bridge mode) omit it, which is per-spec acceptable. When PRESENT,
|
|
82
82
|
// it MUST match the allowlist (prevents cross-origin JS from a malicious
|
|
83
83
|
// page driving an authenticated MCP session).
|
|
@@ -90,7 +90,7 @@ function validateOriginAndHost(req, res, next) {
|
|
|
90
90
|
}
|
|
91
91
|
next();
|
|
92
92
|
}
|
|
93
|
-
//
|
|
93
|
+
// MCP-Protocol-Version header validation. Allowlist comes from
|
|
94
94
|
// the SDK's authoritative `SUPPORTED_PROTOCOL_VERSIONS` export so we track
|
|
95
95
|
// upstream automatically. Per spec: when the header is absent, accept
|
|
96
96
|
// (backwards-compat). When present and unknown → 400 with the supported list
|
|
@@ -119,7 +119,7 @@ function extractBearer(req) {
|
|
|
119
119
|
return xKey.trim();
|
|
120
120
|
return null;
|
|
121
121
|
}
|
|
122
|
-
//
|
|
122
|
+
// emit WWW-Authenticate on 401 so clients can negotiate auth.
|
|
123
123
|
const WWW_AUTHENTICATE = `Bearer realm="foura-mcp", resource_metadata="${RESOURCE_METADATA_URL}"`;
|
|
124
124
|
app.post("/mcp", validateOriginAndHost, validateProtocolVersion, async (req, res) => {
|
|
125
125
|
const apiKey = extractBearer(req);
|
|
@@ -155,12 +155,12 @@ app.delete("/mcp", methodNotAllowed);
|
|
|
155
155
|
const server = app.listen(PORT, "0.0.0.0", () => {
|
|
156
156
|
console.error(`[foura-mcp] HTTP listening on :${PORT}`);
|
|
157
157
|
});
|
|
158
|
-
//
|
|
158
|
+
// bound how long an incoming HTTP request can hold a socket open.
|
|
159
159
|
// Defends against slowloris-style attacks (open POST that never finishes
|
|
160
160
|
// sending the body).
|
|
161
161
|
server.setTimeout(60_000);
|
|
162
162
|
server.requestTimeout = 30_000;
|
|
163
|
-
//
|
|
163
|
+
// graceful shutdown. On SIGTERM, stop accepting new connections,
|
|
164
164
|
// let in-flight requests finish (up to 30s), then exit. docker-compose's
|
|
165
165
|
// stop_grace_period must be >= this hard cap.
|
|
166
166
|
function shutdown(signal) {
|
package/dist/http.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"http.js","sourceRoot":"","sources":["../src/http.ts"],"names":[],"mappings":"AAAA,OAAO,OAA2D,MAAM,SAAS,CAAC;AAClF,OAAO,EAAE,6BAA6B,EAAE,MAAM,oDAAoD,CAAC;AACnG,OAAO,EAAE,2BAA2B,EAAE,MAAM,oCAAoC,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;AAC9C,MAAM,cAAc,GAAG,OAAO,CAAC;AAE/B,mCAAmC;AACnC,
|
|
1
|
+
{"version":3,"file":"http.js","sourceRoot":"","sources":["../src/http.ts"],"names":[],"mappings":"AAAA,OAAO,OAA2D,MAAM,SAAS,CAAC;AAClF,OAAO,EAAE,6BAA6B,EAAE,MAAM,oDAAoD,CAAC;AACnG,OAAO,EAAE,2BAA2B,EAAE,MAAM,oCAAoC,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;AAC9C,MAAM,cAAc,GAAG,OAAO,CAAC;AAE/B,mCAAmC;AACnC,4DAA4D;AAC5D,4BAA4B;AAC5B,8DAA8D;AAC9D,2BAA2B;AAC3B,6BAA6B;AAC7B,8BAA8B;AAE9B,yEAAyE;AACzE,yEAAyE;AACzE,sEAAsE;AACtE,0EAA0E;AAC1E,0EAA0E;AAC1E,kEAAkE;AAClE,EAAE;AACF,oEAAoE;AACpE,2EAA2E;AAE3E,MAAM,qBAAqB,GACzB,OAAO,CAAC,GAAG,CAAC,+BAA+B;IAC3C,oCAAoC,CAAC;AAEvC,SAAS,SAAS,CAAC,GAAuB,EAAE,QAAkB;IAC5D,MAAM,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,CAAC,GAAG;QAAE,OAAO,QAAQ,CAAC;IAC1B,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAC7D,CAAC;AAED,mEAAmE;AACnE,0EAA0E;AAC1E,wCAAwC;AACxC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,SAAS,CACrC,OAAO,CAAC,GAAG,CAAC,uBAAuB,EACnC,CAAC,cAAc,EAAE,WAAW,EAAE,WAAW,EAAE,OAAO,CAAC,CACpD,CAAC,CAAC;AACH,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,SAAS,CACvC,OAAO,CAAC,GAAG,CAAC,yBAAyB,EACrC;IACE,sBAAsB;IACtB,mBAAmB;IACnB,uBAAuB;IACvB,wBAAwB;CACzB,CACF,CAAC,CAAC;AAEH,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;AAEtB,gEAAgE;AAChE,4DAA4D;AAC5D,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC;AAE1C,iEAAiE;AACjE,kEAAkE;AAClE,SAAS,YAAY,CAAC,GAAa,EAAE,MAAc,EAAE,IAAY,EAAE,OAAe,EAAE,YAAqC;IACvH,IAAI,YAAY;QAAE,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC;YAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACzF,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC;QACtB,OAAO,EAAE,KAAK;QACd,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE;QACxB,EAAE,EAAE,IAAI;KACT,CAAC,CAAC;AACL,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB;IAC5E,uEAAuE;IACvE,4EAA4E;IAC5E,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;IACvD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;QACtD,OAAO;IACT,CAAC;IACD,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,wBAAwB,UAAU,EAAE,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IACD,4EAA4E;IAC5E,MAAM,cAAc,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC3E,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;QACvE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,QAAQ,QAAQ,0BAA0B,CAAC,CAAC;QAC3E,OAAO;IACT,CAAC;IAED,wEAAwE;IACxE,0EAA0E;IAC1E,yEAAyE;IACzE,8CAA8C;IAC9C,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC;IAClC,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACjC,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,UAAU,MAAM,0BAA0B,CAAC,CAAC;YAC3E,OAAO;QACT,CAAC;IACH,CAAC;IAED,IAAI,EAAE,CAAC;AACT,CAAC;AAED,+DAA+D;AAC/D,2EAA2E;AAC3E,sEAAsE;AACtE,6EAA6E;AAC7E,oEAAoE;AACpE,SAAS,uBAAuB,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB;IAC9E,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/C,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,IAAI,EAAE,CAAC;QACP,OAAO;IACT,CAAC;IACD,IAAI,CAAC,2BAA2B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,YAAY,CACV,GAAG,EACH,GAAG,EACH,CAAC,KAAK,EACN,qCAAqC,GAAG,iDAAiD,2BAA2B,CAAC,IAAI,CAAC,IAAI,CAAC,0CAA0C,CAC1K,CAAC;QACF,OAAO;IACT,CAAC;IACD,IAAI,EAAE,CAAC;AACT,CAAC;AAED,GAAG,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;IAChC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;AACrE,CAAC,CAAC,CAAC;AAEH,SAAS,aAAa,CAAC,GAAY;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IACzC,IAAI,IAAI,EAAE,WAAW,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3E,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IACrC,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAC7B,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8DAA8D;AAC9D,MAAM,gBAAgB,GAAG,gDAAgD,qBAAqB,GAAG,CAAC;AAElG,GAAG,CAAC,IAAI,CACN,MAAM,EACN,qBAAqB,EACrB,uBAAuB,EACvB,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;IACpC,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,YAAY,CACV,GAAG,EACH,GAAG,EACH,CAAC,KAAK,EACN,+EAA+E;YAC7E,kDAAkD,EACpD,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,CACzC,CAAC;QACF,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,YAAY,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,IAAI,6BAA6B,CAAC;YAClD,kBAAkB,EAAE,SAAS;SAC9B,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,SAAS,CAAC,KAAK,EAAE,CAAC;YAClB,GAAG,CAAC,KAAK,EAAE,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,MAAM,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAC7B,MAAM,UAAU,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,GAAG,CAAC,CAAC;QACtD,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,uBAAuB,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;AACH,CAAC,CACF,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;IACxD,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,sDAAsD,CAAC,CAAC;AACzF,CAAC,CAAC;AACF,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;AAClC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;AAErC,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE;IAC9C,OAAO,CAAC,KAAK,CAAC,kCAAkC,IAAI,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC,CAAC;AAEH,kEAAkE;AAClE,yEAAyE;AACzE,qBAAqB;AACrB,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;AAC1B,MAAM,CAAC,cAAc,GAAG,MAAM,CAAC;AAE/B,iEAAiE;AACjE,yEAAyE;AACzE,8CAA8C;AAC9C,SAAS,QAAQ,CAAC,MAAc;IAC9B,OAAO,CAAC,KAAK,CAAC,wBAAwB,MAAM,eAAe,CAAC,CAAC;IAC7D,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACnB,IAAI,GAAG,EAAE,CAAC;YACR,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,GAAG,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IACH,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,CAAC,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC1E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC;AACrB,CAAC;AACD,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;AACjD,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC"}
|
package/dist/prompts.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
2
|
/**
|
|
3
|
-
* MCP Prompts
|
|
3
|
+
* MCP Prompts - pre-written workflow templates the user can invoke from the
|
|
4
4
|
* MCP client UI (Claude Desktop / Cursor / etc) instead of figuring out the
|
|
5
5
|
* tool orchestration themselves.
|
|
6
6
|
*
|
|
7
|
-
* Prompts are LAZY context
|
|
7
|
+
* Prompts are LAZY context - they only enter the LLM's window when invoked,
|
|
8
8
|
* unlike tool descriptions which are loaded on every turn. So we can be more
|
|
9
9
|
* verbose here.
|
|
10
10
|
*/
|
package/dist/prompts.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
/**
|
|
3
|
-
* MCP Prompts
|
|
3
|
+
* MCP Prompts - pre-written workflow templates the user can invoke from the
|
|
4
4
|
* MCP client UI (Claude Desktop / Cursor / etc) instead of figuring out the
|
|
5
5
|
* tool orchestration themselves.
|
|
6
6
|
*
|
|
7
|
-
* Prompts are LAZY context
|
|
7
|
+
* Prompts are LAZY context - they only enter the LLM's window when invoked,
|
|
8
8
|
* unlike tool descriptions which are loaded on every turn. So we can be more
|
|
9
9
|
* verbose here.
|
|
10
10
|
*/
|
|
@@ -21,7 +21,7 @@ export function registerPrompts(server) {
|
|
|
21
21
|
role: "user",
|
|
22
22
|
content: {
|
|
23
23
|
type: "text",
|
|
24
|
-
text: `Fetch the product page at ${url} using the foura_browser tool
|
|
24
|
+
text: `Fetch the product page at ${url} using the foura_browser tool - most product pages are single-page apps and need JavaScript to render.\n\n` +
|
|
25
25
|
`From the response body extract:\n` +
|
|
26
26
|
`- product title\n` +
|
|
27
27
|
`- price (with currency)\n` +
|
|
@@ -48,7 +48,7 @@ export function registerPrompts(server) {
|
|
|
48
48
|
content: {
|
|
49
49
|
type: "text",
|
|
50
50
|
text: `Fetch ${url} using the foura_single tool with unblocker:true. Most news and blog sites are server-rendered, so HTTP is fastest (200ms-2s).\n\n` +
|
|
51
|
-
`If foura_single returns a 403, captcha page, or empty content, retry the same URL with foura_proxy (maxTries:3)
|
|
51
|
+
`If foura_single returns a 403, captcha page, or empty content, retry the same URL with foura_proxy (maxTries:3) - it routes through a rotating proxy pool.\n\n` +
|
|
52
52
|
`From the response, extract:\n` +
|
|
53
53
|
`- headline (the main H1, not the page title bar)\n` +
|
|
54
54
|
`- author byline (may be inside .author / [rel=author] / itemprop)\n` +
|
|
@@ -137,23 +137,23 @@ export function registerPrompts(server) {
|
|
|
137
137
|
type: "text",
|
|
138
138
|
text: `Parse the following comma-separated URLs and fetch each one concurrently using foura_single (unblocker:true).\n\n` +
|
|
139
139
|
`URLs: ${urls}\n\n` +
|
|
140
|
-
`For any URL that returns 403, captcha page, or empty body
|
|
140
|
+
`For any URL that returns 403, captcha page, or empty body - retry that single URL with foura_proxy (maxTries:3).\n\n` +
|
|
141
141
|
`Return a JSON array, one entry per URL in input order:\n` +
|
|
142
142
|
`[{"url": "...", "status": 200, "success": true, "body_size_bytes": 0, "via": "single|proxy", "error": null}, ...]\n\n` +
|
|
143
|
-
`Do NOT inline full response bodies in the output
|
|
143
|
+
`Do NOT inline full response bodies in the output - only metadata. If the caller needs body content, they should call foura_single individually.`,
|
|
144
144
|
},
|
|
145
145
|
},
|
|
146
146
|
],
|
|
147
147
|
}));
|
|
148
148
|
server.registerPrompt("smart_fetch", {
|
|
149
149
|
title: "Fetch a URL the smart way (auto)",
|
|
150
|
-
description: "Fetch any URL with foura_auto
|
|
150
|
+
description: "Fetch any URL with foura_auto - one call that picks the method (direct / proxy / browser), gets past common bot protection, and returns the content. Use when you just want the page and don't want to choose a tool.",
|
|
151
151
|
argsSchema: {
|
|
152
152
|
url: z.string().describe("URL to fetch"),
|
|
153
153
|
must_contain: z
|
|
154
154
|
.string()
|
|
155
155
|
.optional()
|
|
156
|
-
.describe("Optional substring the real page must contain
|
|
156
|
+
.describe("Optional substring the real page must contain - lets auto tell a real page from a challenge page on protected targets."),
|
|
157
157
|
extract: z
|
|
158
158
|
.string()
|
|
159
159
|
.optional()
|
|
@@ -165,7 +165,7 @@ export function registerPrompts(server) {
|
|
|
165
165
|
role: "user",
|
|
166
166
|
content: {
|
|
167
167
|
type: "text",
|
|
168
|
-
text: `Fetch ${url} using the foura_auto tool. It picks the fetch method for you (direct request, rotating proxy, or full browser) and gets past common bot protection automatically
|
|
168
|
+
text: `Fetch ${url} using the foura_auto tool. It picks the fetch method for you (direct request, rotating proxy, or full browser) and gets past common bot protection automatically - you do not need to choose between foura_single / foura_proxy / foura_browser.\n\n` +
|
|
169
169
|
(must_contain
|
|
170
170
|
? `Pass validate.data.accept:["${must_contain}"] so auto keeps escalating until the real page (containing "${must_contain}") comes back, not a challenge page.\n\n`
|
|
171
171
|
: `If the first response looks like a challenge / block page rather than real content, re-call with validate.data.accept:["<a string the real page must contain>"] so auto knows what success looks like.\n\n`) +
|
package/dist/resources.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
2
|
/**
|
|
3
|
-
* Resources
|
|
3
|
+
* Resources - offload large response bodies (>= THRESHOLD bytes) onto host
|
|
4
4
|
* disk and return a MCP resource_link instead of inlining megabytes into the
|
|
5
5
|
* LLM context.
|
|
6
6
|
*
|
|
@@ -9,10 +9,10 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
9
9
|
* which response field is the "large payload" (single/proxy use `data`,
|
|
10
10
|
* browser uses `body`).
|
|
11
11
|
*
|
|
12
|
-
*
|
|
12
|
+
* tenant isolation. Payloads are stored under
|
|
13
13
|
* `<PAYLOADS_DIR>/<keyhash>/<uuid>.{bin,meta.json}`, where `keyhash` is the
|
|
14
14
|
* first 16 hex chars of sha256(apiKey). The resource handler validates the
|
|
15
|
-
* caller's keyhash matches the storage path before serving
|
|
15
|
+
* caller's keyhash matches the storage path before serving - any other
|
|
16
16
|
* tenant gets a `resource not found` (no leaking whether the UUID exists).
|
|
17
17
|
*/
|
|
18
18
|
export declare const THRESHOLD_BYTES = 50000;
|
package/dist/resources.js
CHANGED
|
@@ -5,7 +5,7 @@ import { tmpdir } from "node:os";
|
|
|
5
5
|
import { ResourceTemplate, } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
6
6
|
import { getApiKey } from "./auth.js";
|
|
7
7
|
/**
|
|
8
|
-
* Resources
|
|
8
|
+
* Resources - offload large response bodies (>= THRESHOLD bytes) onto host
|
|
9
9
|
* disk and return a MCP resource_link instead of inlining megabytes into the
|
|
10
10
|
* LLM context.
|
|
11
11
|
*
|
|
@@ -14,16 +14,16 @@ import { getApiKey } from "./auth.js";
|
|
|
14
14
|
* which response field is the "large payload" (single/proxy use `data`,
|
|
15
15
|
* browser uses `body`).
|
|
16
16
|
*
|
|
17
|
-
*
|
|
17
|
+
* tenant isolation. Payloads are stored under
|
|
18
18
|
* `<PAYLOADS_DIR>/<keyhash>/<uuid>.{bin,meta.json}`, where `keyhash` is the
|
|
19
19
|
* first 16 hex chars of sha256(apiKey). The resource handler validates the
|
|
20
|
-
* caller's keyhash matches the storage path before serving
|
|
20
|
+
* caller's keyhash matches the storage path before serving - any other
|
|
21
21
|
* tenant gets a `resource not found` (no leaking whether the UUID exists).
|
|
22
22
|
*/
|
|
23
23
|
export const THRESHOLD_BYTES = 50_000;
|
|
24
24
|
export const PAYLOADS_DIR = process.env.FOURA_MCP_PAYLOADS_DIR ?? path.join(tmpdir(), "foura-mcp-payloads");
|
|
25
25
|
const URI_PREFIX = "foura-mcp://payload/";
|
|
26
|
-
// sha256(apiKey).hex().slice(0, 16)
|
|
26
|
+
// sha256(apiKey).hex().slice(0, 16) - gives 64 bits of namespacing entropy,
|
|
27
27
|
// short enough for a filesystem path component and unguessable for an
|
|
28
28
|
// attacker who doesn't have the corresponding API key.
|
|
29
29
|
export function hashApiKey(apiKey) {
|
|
@@ -100,7 +100,7 @@ export function registerResourceHandler(server) {
|
|
|
100
100
|
throw new Error(`Payload not found: ${uuidStr}`);
|
|
101
101
|
}
|
|
102
102
|
const meta = JSON.parse(metaRaw);
|
|
103
|
-
// Defense in depth
|
|
103
|
+
// Defense in depth - even if filesystem permissions ever got bypassed,
|
|
104
104
|
// the meta sidecar carries the keyhash; reject on mismatch.
|
|
105
105
|
if (meta.keyhash !== keyhash) {
|
|
106
106
|
throw new Error(`Payload not found: ${uuidStr}`);
|
package/dist/safe-target.js
CHANGED
|
@@ -52,16 +52,16 @@ function isReservedV6(addr) {
|
|
|
52
52
|
if (a === "::" || a === "::1")
|
|
53
53
|
return true;
|
|
54
54
|
const firstGroup = a.split(":")[0] ?? "";
|
|
55
|
-
// ULA fc00::/7
|
|
55
|
+
// ULA fc00::/7 - first hex of first group is f, second is c or d
|
|
56
56
|
if (/^f[cd][0-9a-f]{0,2}$/.test(firstGroup))
|
|
57
57
|
return true;
|
|
58
|
-
// link-local fe80::/10
|
|
58
|
+
// link-local fe80::/10 - first group starts fe8, fe9, fea, feb
|
|
59
59
|
if (/^fe[89ab][0-9a-f]{0,1}$/.test(firstGroup))
|
|
60
60
|
return true;
|
|
61
61
|
// documentation 2001:db8::/32
|
|
62
62
|
if (/^2001:0?db8(:|$)/.test(a))
|
|
63
63
|
return true;
|
|
64
|
-
// IPv4-mapped: ::ffff:x.x.x.x (dotted-quad form)
|
|
64
|
+
// IPv4-mapped: ::ffff:x.x.x.x (dotted-quad form) - check embedded v4
|
|
65
65
|
const v4mappedDotted = /^::ffff:([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/.exec(a);
|
|
66
66
|
if (v4mappedDotted?.[1])
|
|
67
67
|
return isReservedV4(v4mappedDotted[1]);
|
package/dist/server.js
CHANGED
package/dist/tools/auto.d.ts
CHANGED
|
@@ -5,7 +5,7 @@ declare function guardHandler(service: "auto", outputSchema: z.ZodObject<any>, f
|
|
|
5
5
|
export declare function registerAutoTool(server: McpServer): void;
|
|
6
6
|
export declare const __test: {
|
|
7
7
|
deriveCode: typeof deriveCode;
|
|
8
|
-
|
|
8
|
+
ResponseHeadersSchema: z.ZodObject<{
|
|
9
9
|
result: z.ZodOptional<z.ZodObject<{
|
|
10
10
|
version: z.ZodOptional<z.ZodString>;
|
|
11
11
|
code: z.ZodOptional<z.ZodNumber>;
|