@j0hanz/fetch-url-mcp 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts +9 -3
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +44 -110
- package/dist/cache.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +9 -4
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +2 -3
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +18 -25
- package/dist/config.js.map +1 -0
- package/dist/crypto.d.ts +1 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/crypto.js +1 -0
- package/dist/crypto.js.map +1 -0
- package/dist/dom-noise-removal.d.ts +2 -1
- package/dist/dom-noise-removal.d.ts.map +1 -0
- package/dist/dom-noise-removal.js +8 -4
- package/dist/dom-noise-removal.js.map +1 -0
- package/dist/download.d.ts +4 -0
- package/dist/download.d.ts.map +1 -0
- package/dist/download.js +106 -0
- package/dist/download.js.map +1 -0
- package/dist/errors.d.ts +1 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +1 -0
- package/dist/errors.js.map +1 -0
- package/dist/examples/mcp-fetch-url-client.js +19 -3
- package/dist/examples/mcp-fetch-url-client.js.map +1 -1
- package/dist/fetch-content.d.ts +1 -0
- package/dist/fetch-content.d.ts.map +1 -0
- package/dist/fetch-content.js +14 -14
- package/dist/fetch-content.js.map +1 -0
- package/dist/fetch-stream.d.ts +1 -0
- package/dist/fetch-stream.d.ts.map +1 -0
- package/dist/fetch-stream.js +6 -3
- package/dist/fetch-stream.js.map +1 -0
- package/dist/fetch.d.ts +1 -0
- package/dist/fetch.d.ts.map +1 -0
- package/dist/fetch.js +120 -51
- package/dist/fetch.js.map +1 -0
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.d.ts.map +1 -0
- package/dist/host-normalization.js +19 -6
- package/dist/host-normalization.js.map +1 -0
- package/dist/http/auth.d.ts +35 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/auth.js +283 -0
- package/dist/http/auth.js.map +1 -0
- package/dist/http/health.d.ts +7 -0
- package/dist/http/health.d.ts.map +1 -0
- package/dist/http/health.js +166 -0
- package/dist/http/health.js.map +1 -0
- package/dist/http/helpers.d.ts +58 -0
- package/dist/http/helpers.d.ts.map +1 -0
- package/dist/http/helpers.js +372 -0
- package/dist/http/helpers.js.map +1 -0
- package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
- package/dist/http/native.d.ts.map +1 -0
- package/dist/http/native.js +529 -0
- package/dist/http/native.js.map +1 -0
- package/dist/http/rate-limit.d.ts +13 -0
- package/dist/http/rate-limit.d.ts.map +1 -0
- package/dist/http/rate-limit.js +81 -0
- package/dist/http/rate-limit.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -0
- package/dist/instructions.d.ts +2 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +108 -0
- package/dist/instructions.js.map +1 -0
- package/dist/ip-blocklist.d.ts +1 -0
- package/dist/ip-blocklist.d.ts.map +1 -0
- package/dist/ip-blocklist.js +2 -0
- package/dist/ip-blocklist.js.map +1 -0
- package/dist/json.d.ts +2 -1
- package/dist/json.d.ts.map +1 -0
- package/dist/json.js +19 -6
- package/dist/json.js.map +1 -0
- package/dist/language-detection.d.ts +1 -0
- package/dist/language-detection.d.ts.map +1 -0
- package/dist/language-detection.js +1 -0
- package/dist/language-detection.js.map +1 -0
- package/dist/markdown-cleanup.d.ts +2 -1
- package/dist/markdown-cleanup.d.ts.map +1 -0
- package/dist/markdown-cleanup.js +51 -52
- package/dist/markdown-cleanup.js.map +1 -0
- package/dist/mcp-validator.d.ts +1 -0
- package/dist/mcp-validator.d.ts.map +1 -0
- package/dist/mcp-validator.js +16 -8
- package/dist/mcp-validator.js.map +1 -0
- package/dist/mcp.d.ts +2 -2
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +17 -333
- package/dist/mcp.js.map +1 -0
- package/dist/observability.d.ts +2 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +30 -5
- package/dist/observability.js.map +1 -0
- package/dist/prompts.d.ts +1 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/prompts.js +15 -3
- package/dist/prompts.js.map +1 -0
- package/dist/resources.d.ts +1 -0
- package/dist/resources.d.ts.map +1 -0
- package/dist/resources.js +30 -23
- package/dist/resources.js.map +1 -0
- package/dist/server-tuning.d.ts +1 -0
- package/dist/server-tuning.d.ts.map +1 -0
- package/dist/server-tuning.js +11 -15
- package/dist/server-tuning.js.map +1 -0
- package/dist/server.d.ts +1 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +23 -23
- package/dist/server.js.map +1 -0
- package/dist/session.d.ts +1 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +55 -28
- package/dist/session.js.map +1 -0
- package/dist/tasks/execution.d.ts +42 -0
- package/dist/tasks/execution.d.ts.map +1 -0
- package/dist/tasks/execution.js +232 -0
- package/dist/tasks/execution.js.map +1 -0
- package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
- package/dist/tasks/manager.d.ts.map +1 -0
- package/dist/{tasks.js → tasks/manager.js} +86 -37
- package/dist/tasks/manager.js.map +1 -0
- package/dist/tasks/owner.d.ts +33 -0
- package/dist/tasks/owner.d.ts.map +1 -0
- package/dist/tasks/owner.js +99 -0
- package/dist/tasks/owner.js.map +1 -0
- package/dist/timer-utils.d.ts +1 -0
- package/dist/timer-utils.d.ts.map +1 -0
- package/dist/timer-utils.js +12 -5
- package/dist/timer-utils.js.map +1 -0
- package/dist/tool-errors.d.ts +12 -0
- package/dist/tool-errors.d.ts.map +1 -0
- package/dist/tool-errors.js +52 -0
- package/dist/tool-errors.js.map +1 -0
- package/dist/tool-pipeline.d.ts +72 -0
- package/dist/tool-pipeline.d.ts.map +1 -0
- package/dist/tool-pipeline.js +407 -0
- package/dist/tool-pipeline.js.map +1 -0
- package/dist/tool-progress.d.ts +32 -0
- package/dist/tool-progress.d.ts.map +1 -0
- package/dist/tool-progress.js +123 -0
- package/dist/tool-progress.js.map +1 -0
- package/dist/tools.d.ts +35 -111
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +93 -566
- package/dist/tools.js.map +1 -0
- package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
- package/dist/transform/transform.d.ts.map +1 -0
- package/dist/{transform.js → transform/transform.js} +73 -769
- package/dist/transform/transform.js.map +1 -0
- package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
- package/dist/transform/types.d.ts.map +1 -0
- package/dist/{transform-types.js → transform/types.js} +1 -0
- package/dist/transform/types.js.map +1 -0
- package/dist/transform/worker-pool.d.ts +93 -0
- package/dist/transform/worker-pool.d.ts.map +1 -0
- package/dist/transform/worker-pool.js +759 -0
- package/dist/transform/worker-pool.js.map +1 -0
- package/dist/transform/workers/transform-child.d.ts +2 -0
- package/dist/transform/workers/transform-child.d.ts.map +1 -0
- package/dist/{workers → transform/workers}/transform-child.js +3 -1
- package/dist/transform/workers/transform-child.js.map +1 -0
- package/dist/transform/workers/transform-worker.d.ts +2 -0
- package/dist/transform/workers/transform-worker.d.ts.map +1 -0
- package/dist/{workers → transform/workers}/transform-worker.js +2 -1
- package/dist/transform/workers/transform-worker.js.map +1 -0
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.d.ts.map +1 -0
- package/dist/type-guards.js +1 -0
- package/dist/type-guards.js.map +1 -0
- package/package.json +6 -7
- package/dist/AGENTS.md +0 -152
- package/dist/http-native.js +0 -1320
- package/dist/instructions.md +0 -113
- package/dist/workers/transform-child.d.ts +0 -1
- package/dist/workers/transform-worker.d.ts +0 -1
package/dist/instructions.md
DELETED
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
# FETCH-URL INSTRUCTIONS
|
|
2
|
-
|
|
3
|
-
Available as resource (`internal://instructions`) or prompt (`get-help`). Load when unsure about tool usage.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## CORE CAPABILITY
|
|
8
|
-
|
|
9
|
-
- Domain: Fetch public web pages and convert HTML to clean, LLM-readable Markdown.
|
|
10
|
-
- Primary Resources: Markdown content, cached snapshots (`internal://cache/{namespace}/{hash}`).
|
|
11
|
-
- Tools: `fetch-url` (READ-ONLY; no write tools exist).
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
|
-
## PROMPTS
|
|
16
|
-
|
|
17
|
-
- `get-help`: Returns these instructions for quick recall.
|
|
18
|
-
|
|
19
|
-
---
|
|
20
|
-
|
|
21
|
-
## RESOURCES & RESOURCE LINKS
|
|
22
|
-
|
|
23
|
-
- `internal://instructions`: This document.
|
|
24
|
-
- `internal://cache/{namespace}/{hash}`: Immutable cached Markdown snapshots from previous `fetch-url` calls. Ephemeral — lost when the server process restarts.
|
|
25
|
-
- `fetch-url` responses include a `resource_link` content block when cache is enabled; use that URI directly with `resources/read`/`resources/subscribe`.
|
|
26
|
-
- If inline Markdown is truncated (ends with `...[truncated]`), the full content may be available via the cache resource. Use `resources/read` with the cache URI to retrieve it.
|
|
27
|
-
- Clients can subscribe to cache resource URIs via `resources/subscribe` and receive `notifications/resources/updated` when that specific cache entry changes.
|
|
28
|
-
|
|
29
|
-
---
|
|
30
|
-
|
|
31
|
-
## PROGRESS & TASKS
|
|
32
|
-
|
|
33
|
-
- Include `_meta.progressToken` in requests to receive `notifications/progress` updates during fetch.
|
|
34
|
-
- Task-augmented tool calls are supported for `fetch-url`:
|
|
35
|
-
- These tools declare `execution.taskSupport: "optional"` — invoke normally or as a task.
|
|
36
|
-
- Send `tools/call` with `task` to get a task id.
|
|
37
|
-
- Poll `tasks/get` and fetch results via `tasks/result`.
|
|
38
|
-
- Use `tasks/cancel` to abort.
|
|
39
|
-
- Task data is stored in memory and cleared on restart.
|
|
40
|
-
|
|
41
|
-
---
|
|
42
|
-
|
|
43
|
-
## THE "GOLDEN PATH" WORKFLOWS (CRITICAL)
|
|
44
|
-
|
|
45
|
-
### WORKFLOW A: STANDARD FETCH
|
|
46
|
-
|
|
47
|
-
1. Call `fetch-url` with `{ "url": "https://..." }`.
|
|
48
|
-
2. Read the `markdown` field from `structuredContent`.
|
|
49
|
-
3. If `truncated` is `true`: use `cacheResourceUri` from `structuredContent` with `resources/read` to get full content.
|
|
50
|
-
NOTE: Never guess URIs; always use values returned in responses.
|
|
51
|
-
|
|
52
|
-
### WORKFLOW B: FRESH CONTENT (BYPASS CACHE)
|
|
53
|
-
|
|
54
|
-
1. Call `fetch-url` with `{ "url": "https://...", "forceRefresh": true }`.
|
|
55
|
-
2. Read the `markdown` field.
|
|
56
|
-
NOTE: Use `forceRefresh` only when stale content is suspected. Cached responses are faster.
|
|
57
|
-
|
|
58
|
-
### WORKFLOW C: FULL-FIDELITY FETCH (PRESERVE NOISE)
|
|
59
|
-
|
|
60
|
-
1. Call `fetch-url` with `{ "url": "https://...", "skipNoiseRemoval": true }`.
|
|
61
|
-
2. Read the `markdown` field — navigation, footers, and sidebars are preserved.
|
|
62
|
-
NOTE: Use this when page structure (nav, footer) is relevant to the task.
|
|
63
|
-
|
|
64
|
-
### WORKFLOW D: ASYNC EXECUTION (LARGE SITES / TIMEOUTS)
|
|
65
|
-
|
|
66
|
-
1. Call `tools/call` with `task: { ttl: ... }` to start a background fetch.
|
|
67
|
-
2. Poll `tasks/get` until status is `completed` or `failed`.
|
|
68
|
-
3. Retrieve result via `tasks/result`.
|
|
69
|
-
|
|
70
|
-
---
|
|
71
|
-
|
|
72
|
-
## TOOL NUANCES & GOTCHAS
|
|
73
|
-
|
|
74
|
-
`fetch-url`
|
|
75
|
-
|
|
76
|
-
- Purpose: Fetch a URL and return Markdown.
|
|
77
|
-
- Input: `{ url, skipNoiseRemoval?, forceRefresh?, maxInlineChars? }`
|
|
78
|
-
- `url` (required): Must be `http://` or `https://`. Max 2048 chars.
|
|
79
|
-
- `skipNoiseRemoval` (bool): Keeps navigation, footers, and other elements normally filtered.
|
|
80
|
-
- `forceRefresh` (bool): Bypasses the cache and fetches live.
|
|
81
|
-
- `maxInlineChars` (int, 0–10485760): Per-call inline limit. `0` means unlimited. If a global limit is configured, the lower value wins.
|
|
82
|
-
- Output: `{ url, inputUrl, resolvedUrl, finalUrl, cacheResourceUri, title, metadata, markdown, fromCache, fetchedAt, contentSize, truncated, error, statusCode, details }`
|
|
83
|
-
- `metadata`: Extracted page metadata — `title`, `description`, `author`, `image`, `favicon`, `publishedAt`, `modifiedAt`.
|
|
84
|
-
- `markdown`: The extracted content. May be absent on error.
|
|
85
|
-
- `truncated`: `true` when inline content was cut. Full content stored in cache.
|
|
86
|
-
- `resolvedUrl`: The normalized/raw-transformed URL actually fetched (GitHub/GitLab/Bitbucket URLs auto-convert to raw content URLs).
|
|
87
|
-
- `finalUrl`: The URL after following redirects.
|
|
88
|
-
- Side effects: None (read-only, idempotent). Populates the in-memory cache automatically.
|
|
89
|
-
- `cacheResourceUri`: Present when cache key generation succeeds; use with `resources/read` for full content retrieval.
|
|
90
|
-
- Gotcha: Inline Markdown may be truncated when `MAX_INLINE_CONTENT_CHARS` is configured. Check the `truncated` field and use the cache resource for full content.
|
|
91
|
-
- Gotcha: GitHub, GitLab, and Bitbucket URLs are auto-transformed to raw content endpoints. Check `resolvedUrl` to see the actual fetched URL.
|
|
92
|
-
- Gotcha: Does not execute client-side JavaScript. Content requiring JS rendering may be incomplete.
|
|
93
|
-
- Limits: HTML capped at 10 MB (`MAX_HTML_BYTES`). Inline content unlimited by default; set `MAX_INLINE_CONTENT_CHARS` env var to cap.
|
|
94
|
-
|
|
95
|
-
---
|
|
96
|
-
|
|
97
|
-
## CONSTRAINTS & LIMITATIONS
|
|
98
|
-
|
|
99
|
-
- **Blocked URLs:** localhost, private IPs (`10.x`, `172.16–31.x`, `192.168.x`), cloud metadata endpoints (`169.254.169.254`, `metadata.google.internal`, etc.), `.local`/`.internal` suffixes.
|
|
100
|
-
- **Max HTML size:** 10 MB per fetch.
|
|
101
|
-
- **Cache:** In-memory LRU — max 100 entries, 50 MB total, 24-hour TTL. Lost on process restart.
|
|
102
|
-
- **No JavaScript execution:** Pages relying on client-side rendering may yield incomplete Markdown.
|
|
103
|
-
- **Binary files:** Not supported — only HTML content is processed.
|
|
104
|
-
- **Redirects:** Max 5 redirects followed automatically.
|
|
105
|
-
|
|
106
|
-
---
|
|
107
|
-
|
|
108
|
-
## ERROR HANDLING STRATEGY
|
|
109
|
-
|
|
110
|
-
- `VALIDATION_ERROR`: URL invalid or blocked (private IP, metadata endpoint). Do not retry — fix the URL.
|
|
111
|
-
- `FETCH_ERROR`: Network/upstream failure (DNS, connection refused, timeout). Retry once with backoff.
|
|
112
|
-
- `HTTP_{status}` (e.g. `HTTP_404`, `HTTP_500`): Upstream returned an HTTP error. Check `statusCode` and `details` fields. Retry only for 5xx errors.
|
|
113
|
-
- `queue_full`: Worker pool busy (concurrent transforms). Wait briefly, then retry or use the Task interface.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|