auggy 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/LICENSE +201 -0
- package/README.md +161 -0
- package/package.json +76 -0
- package/src/agent-card.ts +39 -0
- package/src/agent.ts +283 -0
- package/src/agentmail-client.ts +138 -0
- package/src/augments/bash/index.ts +463 -0
- package/src/augments/bash/skill/SKILL.md +156 -0
- package/src/augments/budgets/budget-store.ts +513 -0
- package/src/augments/budgets/index.ts +134 -0
- package/src/augments/budgets/preamble.ts +93 -0
- package/src/augments/budgets/types.ts +89 -0
- package/src/augments/file-memory/index.ts +71 -0
- package/src/augments/filesystem/index.ts +533 -0
- package/src/augments/filesystem/skill/SKILL.md +142 -0
- package/src/augments/filesystem/skill/references/mount-permissions.md +81 -0
- package/src/augments/layered-memory/extractor/buffer.ts +56 -0
- package/src/augments/layered-memory/extractor/frequency.ts +79 -0
- package/src/augments/layered-memory/extractor/inject-handler.ts +103 -0
- package/src/augments/layered-memory/extractor/parse.ts +75 -0
- package/src/augments/layered-memory/extractor/prompt.md +26 -0
- package/src/augments/layered-memory/index.ts +757 -0
- package/src/augments/layered-memory/skill/SKILL.md +153 -0
- package/src/augments/layered-memory/storage/migrations/README.md +16 -0
- package/src/augments/layered-memory/storage/migrations/supabase-add-fact-fields.sql +9 -0
- package/src/augments/layered-memory/storage/sqlite-store.ts +352 -0
- package/src/augments/layered-memory/storage/supabase-store.ts +263 -0
- package/src/augments/layered-memory/storage/types.ts +98 -0
- package/src/augments/link/index.ts +489 -0
- package/src/augments/link/translate.ts +261 -0
- package/src/augments/notify/adapters/agentmail.ts +70 -0
- package/src/augments/notify/adapters/telegram.ts +60 -0
- package/src/augments/notify/adapters/webhook.ts +55 -0
- package/src/augments/notify/index.ts +284 -0
- package/src/augments/notify/skill/SKILL.md +150 -0
- package/src/augments/org-context/index.ts +721 -0
- package/src/augments/org-context/skill/SKILL.md +96 -0
- package/src/augments/skills/index.ts +103 -0
- package/src/augments/supabase-memory/index.ts +151 -0
- package/src/augments/telegram-transport/index.ts +312 -0
- package/src/augments/telegram-transport/polling.ts +55 -0
- package/src/augments/telegram-transport/webhook.ts +56 -0
- package/src/augments/turn-control/index.ts +61 -0
- package/src/augments/turn-control/skill/SKILL.md +155 -0
- package/src/augments/visitor-auth/email-validation.ts +66 -0
- package/src/augments/visitor-auth/index.ts +779 -0
- package/src/augments/visitor-auth/rate-limiter.ts +90 -0
- package/src/augments/visitor-auth/skill/SKILL.md +55 -0
- package/src/augments/visitor-auth/storage/sqlite-store.ts +398 -0
- package/src/augments/visitor-auth/storage/types.ts +164 -0
- package/src/augments/visitor-auth/types.ts +123 -0
- package/src/augments/visitor-auth/verify-page.ts +179 -0
- package/src/augments/web-fetch/index.ts +331 -0
- package/src/augments/web-fetch/skill/SKILL.md +100 -0
- package/src/cli/agent-index.ts +289 -0
- package/src/cli/augment-catalog.ts +320 -0
- package/src/cli/augment-resolver.ts +597 -0
- package/src/cli/commands/add-skill.ts +194 -0
- package/src/cli/commands/add.ts +87 -0
- package/src/cli/commands/chat.ts +207 -0
- package/src/cli/commands/create.ts +462 -0
- package/src/cli/commands/dev.ts +139 -0
- package/src/cli/commands/eval.ts +180 -0
- package/src/cli/commands/ls.ts +66 -0
- package/src/cli/commands/remove.ts +95 -0
- package/src/cli/commands/restart.ts +40 -0
- package/src/cli/commands/start.ts +123 -0
- package/src/cli/commands/status.ts +104 -0
- package/src/cli/commands/stop.ts +84 -0
- package/src/cli/commands/visitors-revoke.ts +155 -0
- package/src/cli/commands/visitors.ts +101 -0
- package/src/cli/config-parser.ts +1034 -0
- package/src/cli/engine-resolver.ts +68 -0
- package/src/cli/index.ts +178 -0
- package/src/cli/model-picker.ts +89 -0
- package/src/cli/pid-registry.ts +146 -0
- package/src/cli/plist-generator.ts +117 -0
- package/src/cli/resolve-config.ts +56 -0
- package/src/cli/scaffold-skills.ts +158 -0
- package/src/cli/scaffold.ts +291 -0
- package/src/cli/skill-frontmatter.ts +51 -0
- package/src/cli/skill-validator.ts +151 -0
- package/src/cli/types.ts +228 -0
- package/src/cli/yaml-helpers.ts +66 -0
- package/src/engines/_shared/cost.ts +55 -0
- package/src/engines/_shared/schema-normalize.ts +75 -0
- package/src/engines/anthropic/pricing.ts +117 -0
- package/src/engines/anthropic.ts +483 -0
- package/src/engines/openai/pricing.ts +67 -0
- package/src/engines/openai.ts +446 -0
- package/src/engines/openrouter/pricing.ts +83 -0
- package/src/engines/openrouter.ts +185 -0
- package/src/helpers.ts +24 -0
- package/src/http.ts +387 -0
- package/src/index.ts +165 -0
- package/src/kernel/capability-table.ts +172 -0
- package/src/kernel/context-allocator.ts +161 -0
- package/src/kernel/history-manager.ts +198 -0
- package/src/kernel/lifecycle-manager.ts +106 -0
- package/src/kernel/output-validator.ts +35 -0
- package/src/kernel/preamble.ts +23 -0
- package/src/kernel/route-collector.ts +97 -0
- package/src/kernel/timeout.ts +21 -0
- package/src/kernel/tool-selector.ts +47 -0
- package/src/kernel/trace-emitter.ts +66 -0
- package/src/kernel/transport-queue.ts +147 -0
- package/src/kernel/turn-loop.ts +1148 -0
- package/src/memory/context-synthesis.ts +83 -0
- package/src/memory/memory-bus.ts +61 -0
- package/src/memory/registry.ts +80 -0
- package/src/memory/tools.ts +320 -0
- package/src/memory/types.ts +8 -0
- package/src/parts.ts +30 -0
- package/src/scaffold-templates/identity.md +31 -0
- package/src/telegram-client.ts +145 -0
- package/src/tokenizer.ts +14 -0
- package/src/transports/ag-ui-events.ts +253 -0
- package/src/transports/visitor-token.ts +82 -0
- package/src/transports/web-transport.ts +948 -0
- package/src/types.ts +1009 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { Augment } from "../../types";
|
|
3
|
+
import { defineTool } from "../../helpers";
|
|
4
|
+
import { createHttpClient } from "../../http";
|
|
5
|
+
import type { HttpClient, HttpClientOptions, HttpResponse } from "../../http";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* webFetch augment — fetch a URL, strip HTML to text, and render a
|
|
9
|
+
* prompt-aware summary back to the model.
|
|
10
|
+
*
|
|
11
|
+
* Ported from the Rust implementation in soongenwong/claudecode
|
|
12
|
+
* (rust/crates/tools/src/lib.rs, execute_web_fetch).
|
|
13
|
+
*
|
|
14
|
+
* Design choices carried over:
|
|
15
|
+
* - http→https upgrade for any non-localhost URL (normalizeFetchUrl).
|
|
16
|
+
* - 20s default timeout, 10-redirect cap, custom user-agent.
|
|
17
|
+
* - HTML→text via a small state machine (no DOM parser dependency),
|
|
18
|
+
* then collapse whitespace and decode a small set of entities.
|
|
19
|
+
* - Prompt-aware summarization: "title" / "summary|summarize" /
|
|
20
|
+
* default-preview modes, with a 600- or 900-char ceiling.
|
|
21
|
+
* - Output carries the POST-redirect final URL, raw byte count, and
|
|
22
|
+
* round-trip duration so the model can reason about cost.
|
|
23
|
+
*
|
|
24
|
+
* Structural SSRF defense:
|
|
25
|
+
* - Pre-fetch URL filter rejects loopback, RFC 1918, link-local,
|
|
26
|
+
* cloud metadata endpoints, and non-http(s) schemes.
|
|
27
|
+
* - Redirect targets are filtered at each hop (via HttpClient's
|
|
28
|
+
* `rejectUnsafeUrls: true`), so a 3xx → internal-IP attack fails.
|
|
29
|
+
* - Note: DNS-rebinding is NOT defended at this layer — a public-looking
|
|
30
|
+
* hostname that resolves to a private IP at fetch time is out of scope.
|
|
31
|
+
*
|
|
32
|
+
* Not carried over:
|
|
33
|
+
* - No caching. Every call hits the network.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
// =========================================================================
|
|
37
|
+
// URL normalization
|
|
38
|
+
// =========================================================================
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Upgrade http://… to https://… for any host that is not loopback.
|
|
42
|
+
* Matches normalize_fetch_url in the Rust original.
|
|
43
|
+
*/
|
|
44
|
+
export function normalizeFetchUrl(url: string): string {
|
|
45
|
+
const parsed = new URL(url);
|
|
46
|
+
if (parsed.protocol === "http:") {
|
|
47
|
+
const host = parsed.hostname;
|
|
48
|
+
if (host !== "localhost" && host !== "127.0.0.1" && host !== "::1") {
|
|
49
|
+
parsed.protocol = "https:";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return parsed.toString();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// =========================================================================
|
|
56
|
+
// HTML → text
|
|
57
|
+
// =========================================================================
|
|
58
|
+
|
|
59
|
+
/** Tag names whose content should be skipped entirely. */
|
|
60
|
+
const SKIP_CONTENT_TAGS = new Set(["script", "style"]);
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Strip HTML tags with a character-by-character state machine.
|
|
64
|
+
* Deliberately does not parse the DOM — keeps the augment dependency-free.
|
|
65
|
+
*
|
|
66
|
+
* Content inside <script> and <style> tags is skipped entirely so
|
|
67
|
+
* JavaScript and CSS don't bleed into the extracted text.
|
|
68
|
+
*/
|
|
69
|
+
function htmlToText(html: string): string {
|
|
70
|
+
let text = "";
|
|
71
|
+
let inTag = false;
|
|
72
|
+
let tagBuffer = "";
|
|
73
|
+
let skipUntilClose = ""; // non-empty when inside a skip-content tag
|
|
74
|
+
let previousWasSpace = false;
|
|
75
|
+
|
|
76
|
+
for (const ch of html) {
|
|
77
|
+
if (ch === "<") {
|
|
78
|
+
inTag = true;
|
|
79
|
+
tagBuffer = "";
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
if (ch === ">") {
|
|
83
|
+
inTag = false;
|
|
84
|
+
const tagName = (tagBuffer.split(/[\s/]/)[0] ?? "").toLowerCase();
|
|
85
|
+
|
|
86
|
+
if (skipUntilClose) {
|
|
87
|
+
// Check for the matching closing tag: </script> or </style>
|
|
88
|
+
if (tagBuffer.startsWith("/")) {
|
|
89
|
+
const closingName = tagBuffer.slice(1).split(/[\s/]/)[0]?.toLowerCase() ?? "";
|
|
90
|
+
if (closingName === skipUntilClose) {
|
|
91
|
+
skipUntilClose = "";
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
} else if (SKIP_CONTENT_TAGS.has(tagName ?? "")) {
|
|
95
|
+
skipUntilClose = tagName;
|
|
96
|
+
}
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
if (inTag) {
|
|
100
|
+
tagBuffer += ch;
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (skipUntilClose) continue;
|
|
104
|
+
|
|
105
|
+
if (ch === "&") {
|
|
106
|
+
text += "&";
|
|
107
|
+
previousWasSpace = false;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (/\s/.test(ch)) {
|
|
112
|
+
if (!previousWasSpace) {
|
|
113
|
+
text += " ";
|
|
114
|
+
previousWasSpace = true;
|
|
115
|
+
}
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
text += ch;
|
|
120
|
+
previousWasSpace = false;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return collapseWhitespace(decodeHtmlEntities(text));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Decode common HTML entities. The order matters: & must be decoded
|
|
128
|
+
* LAST to avoid double-decoding (e.g. &lt; → < → < is wrong;
|
|
129
|
+
* the correct result for &lt; is the literal text "<").
|
|
130
|
+
*
|
|
131
|
+
* Numeric entities (<, <) are intentionally not handled — they
|
|
132
|
+
* are uncommon in typical web pages and adding a regex pass for them would
|
|
133
|
+
* complicate this deliberately simple decoder. Matches the Rust original.
|
|
134
|
+
*/
|
|
135
|
+
function decodeHtmlEntities(input: string): string {
|
|
136
|
+
return input
|
|
137
|
+
.replaceAll("<", "<")
|
|
138
|
+
.replaceAll(">", ">")
|
|
139
|
+
.replaceAll(""", '"')
|
|
140
|
+
.replaceAll("'", "'")
|
|
141
|
+
.replaceAll(" ", " ")
|
|
142
|
+
.replaceAll("&", "&");
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function collapseWhitespace(input: string): string {
|
|
146
|
+
return input
|
|
147
|
+
.split(/\s+/)
|
|
148
|
+
.filter((token) => token.length > 0)
|
|
149
|
+
.join(" ");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Truncate to `maxChars` code points, with an ellipsis marker if cut.
|
|
154
|
+
* Uses Array.from to count code points (not UTF-16 units) so multi-byte
|
|
155
|
+
* characters are handled the same way Rust's chars() iterator handles
|
|
156
|
+
* them in preview_text.
|
|
157
|
+
*/
|
|
158
|
+
function previewText(input: string, maxChars: number): string {
|
|
159
|
+
const chars = Array.from(input);
|
|
160
|
+
if (chars.length <= maxChars) return input;
|
|
161
|
+
return `${chars.slice(0, maxChars).join("").trimEnd()}…`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function isJsonContentType(contentType: string): boolean {
|
|
165
|
+
return contentType.includes("application/json") || contentType.includes("+json");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function normalizeFetchedContent(body: string, contentType: string): string {
|
|
169
|
+
if (isJsonContentType(contentType)) return body.trim();
|
|
170
|
+
if (contentType.includes("html")) return htmlToText(body);
|
|
171
|
+
return body.trim();
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// =========================================================================
|
|
175
|
+
// Prompt-aware summarization
|
|
176
|
+
// =========================================================================
|
|
177
|
+
|
|
178
|
+
function extractTitle(content: string, rawBody: string, contentType: string): string | null {
|
|
179
|
+
if (contentType.includes("html")) {
|
|
180
|
+
const lowered = rawBody.toLowerCase();
|
|
181
|
+
const start = lowered.indexOf("<title>");
|
|
182
|
+
if (start >= 0) {
|
|
183
|
+
const after = start + "<title>".length;
|
|
184
|
+
const endRel = lowered.slice(after).indexOf("</title>");
|
|
185
|
+
if (endRel >= 0) {
|
|
186
|
+
const slice = rawBody.slice(after, after + endRel);
|
|
187
|
+
const title = collapseWhitespace(decodeHtmlEntities(slice));
|
|
188
|
+
if (title.length > 0) return title;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Fallback: first non-empty line of the normalized content.
|
|
194
|
+
for (const line of content.split("\n")) {
|
|
195
|
+
const trimmed = line.trim();
|
|
196
|
+
if (trimmed.length > 0) return trimmed;
|
|
197
|
+
}
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Max chars for JSON responses — much larger than the 900-char HTML preview. */
|
|
202
|
+
const JSON_MAX_CHARS = 20_000;
|
|
203
|
+
|
|
204
|
+
function summarizeWebFetch(args: {
|
|
205
|
+
url: string;
|
|
206
|
+
prompt: string;
|
|
207
|
+
content: string;
|
|
208
|
+
rawBody: string;
|
|
209
|
+
contentType: string;
|
|
210
|
+
}): string {
|
|
211
|
+
// JSON responses: return the raw content without summarization/truncation
|
|
212
|
+
// to a tiny preview. APIs return structured data the model needs intact.
|
|
213
|
+
if (isJsonContentType(args.contentType)) {
|
|
214
|
+
const preview = previewText(args.content, JSON_MAX_CHARS);
|
|
215
|
+
return `Fetched ${args.url}\n${preview}`;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const lowerPrompt = args.prompt.toLowerCase();
|
|
219
|
+
const compact = collapseWhitespace(args.content);
|
|
220
|
+
|
|
221
|
+
let detail: string;
|
|
222
|
+
if (lowerPrompt.includes("title")) {
|
|
223
|
+
const title = extractTitle(args.content, args.rawBody, args.contentType);
|
|
224
|
+
detail = title !== null ? `Title: ${title}` : previewText(compact, 600);
|
|
225
|
+
} else if (lowerPrompt.includes("summary") || lowerPrompt.includes("summarize")) {
|
|
226
|
+
detail = previewText(compact, 900);
|
|
227
|
+
} else {
|
|
228
|
+
const preview = previewText(compact, 900);
|
|
229
|
+
detail = `Prompt: ${args.prompt}\nContent preview:\n${preview}`;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return `Fetched ${args.url}\n${detail}`;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// =========================================================================
|
|
236
|
+
// Augment
|
|
237
|
+
// =========================================================================
|
|
238
|
+
|
|
239
|
+
export interface WebFetchOptions extends HttpClientOptions {
|
|
240
|
+
/**
|
|
241
|
+
* Optional pre-built HTTP client. Supply this if you want to share a
|
|
242
|
+
* client across augments or inject a mock in tests. If omitted, a
|
|
243
|
+
* client is created from the timeout/redirect/user-agent options.
|
|
244
|
+
*/
|
|
245
|
+
client?: HttpClient;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export interface WebFetchResult {
|
|
249
|
+
url: string;
|
|
250
|
+
code: number;
|
|
251
|
+
codeText: string;
|
|
252
|
+
bytes: number;
|
|
253
|
+
durationMs: number;
|
|
254
|
+
result: string;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Augment that exposes a single `web_fetch` tool. The tool fetches a URL,
|
|
259
|
+
* normalizes HTML to text, and renders a prompt-aware summary.
|
|
260
|
+
*
|
|
261
|
+
* The output string the model sees is JSON containing:
|
|
262
|
+
* { url, code, codeText, bytes, durationMs, result }
|
|
263
|
+
*
|
|
264
|
+
* Where `result` is a pre-formatted human-readable block the model can
|
|
265
|
+
* reason about directly. Matches the Rust WebFetchOutput shape.
|
|
266
|
+
*/
|
|
267
|
+
export function webFetch(opts: WebFetchOptions = {}): Augment {
|
|
268
|
+
// SSRF guard is on by default — web_fetch ingests model-supplied URLs.
|
|
269
|
+
// Operators can still override by passing an explicit client.
|
|
270
|
+
const client = opts.client ?? createHttpClient({ rejectUnsafeUrls: true, ...opts });
|
|
271
|
+
|
|
272
|
+
const webFetchTool = defineTool({
|
|
273
|
+
name: "web_fetch",
|
|
274
|
+
description: "Fetch a URL, convert it into readable text, and answer a prompt about it.",
|
|
275
|
+
category: "search",
|
|
276
|
+
input: z.object({
|
|
277
|
+
url: z.string().url(),
|
|
278
|
+
prompt: z.string(),
|
|
279
|
+
}),
|
|
280
|
+
execute: async ({ url, prompt }) => {
|
|
281
|
+
const startedAt = performance.now();
|
|
282
|
+
let requestUrl: string;
|
|
283
|
+
try {
|
|
284
|
+
requestUrl = normalizeFetchUrl(url);
|
|
285
|
+
} catch (error) {
|
|
286
|
+
return JSON.stringify({
|
|
287
|
+
error: `invalid URL: ${(error as Error).message}`,
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// SSRF guard is enforced by the underlying HttpClient — rejected URLs
|
|
292
|
+
// throw and fall into the catch below, surfaced as a structured error.
|
|
293
|
+
|
|
294
|
+
let response: HttpResponse;
|
|
295
|
+
try {
|
|
296
|
+
response = await client.get(requestUrl);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
return JSON.stringify({
|
|
299
|
+
url: requestUrl,
|
|
300
|
+
error: (error as Error).message,
|
|
301
|
+
durationMs: Math.round(performance.now() - startedAt),
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const normalized = normalizeFetchedContent(response.body, response.contentType);
|
|
306
|
+
const result = summarizeWebFetch({
|
|
307
|
+
url: response.finalUrl,
|
|
308
|
+
prompt,
|
|
309
|
+
content: normalized,
|
|
310
|
+
rawBody: response.body,
|
|
311
|
+
contentType: response.contentType,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
const output: WebFetchResult = {
|
|
315
|
+
url: response.finalUrl,
|
|
316
|
+
code: response.status,
|
|
317
|
+
codeText: response.statusText,
|
|
318
|
+
bytes: new TextEncoder().encode(response.body).byteLength,
|
|
319
|
+
durationMs: Math.round(performance.now() - startedAt),
|
|
320
|
+
result,
|
|
321
|
+
};
|
|
322
|
+
return JSON.stringify(output);
|
|
323
|
+
},
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
name: "web-fetch",
|
|
328
|
+
capabilities: ["tools"],
|
|
329
|
+
tools: [webFetchTool],
|
|
330
|
+
};
|
|
331
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: web-fetch
|
|
3
|
+
description: When and how to use the web_fetch tool to retrieve a URL, read a web page, or call an HTTP API. Read this before fetching anything from the network.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Web fetch
|
|
7
|
+
|
|
8
|
+
You have a single tool, `web_fetch`, that retrieves a URL over the network and gives you the body back as readable text (for HTML) or as raw JSON (for APIs). Use it whenever the conversation needs information that lives at a public URL.
|
|
9
|
+
|
|
10
|
+
## When to use it
|
|
11
|
+
|
|
12
|
+
| Situation | Action |
|
|
13
|
+
|-----------|--------|
|
|
14
|
+
| The peer shares a URL | Fetch it and summarize what's there |
|
|
15
|
+
| You need to check the current state of a web page | Fetch the URL |
|
|
16
|
+
| You need to call a public HTTP API | Fetch the endpoint |
|
|
17
|
+
| The peer asks "what's at this link?" | Fetch and answer |
|
|
18
|
+
| You need to verify a fact that may have changed since training | Fetch a current source |
|
|
19
|
+
|
|
20
|
+
## How to call it
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
web_fetch({ url: "https://example.com", prompt: "summarize this page" })
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**Parameters:**
|
|
27
|
+
- `url` — the full URL. Plain `http://` URLs to non-loopback hosts are auto-upgraded to `https://`.
|
|
28
|
+
- `prompt` — what you want from the content. The tool uses the prompt to shape the returned slice (title, summary, or default preview).
|
|
29
|
+
|
|
30
|
+
The `prompt` is required. It steers what the tool returns:
|
|
31
|
+
|
|
32
|
+
- Prompt contains "title" → returns the page title
|
|
33
|
+
- Prompt contains "summary" or "summarize" → returns up to ~900 chars of cleaned text
|
|
34
|
+
- Anything else → returns up to ~900 chars of cleaned text prefixed with your prompt for reference
|
|
35
|
+
|
|
36
|
+
For JSON APIs, the prompt is informational only — the tool returns the raw JSON body (up to ~20,000 chars) regardless.
|
|
37
|
+
|
|
38
|
+
## What it returns
|
|
39
|
+
|
|
40
|
+
A JSON envelope with these fields:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
{
|
|
44
|
+
"url": "https://example.com", // final URL after redirects
|
|
45
|
+
"code": 200, // HTTP status code
|
|
46
|
+
"codeText": "OK", // status text
|
|
47
|
+
"bytes": 1234, // raw body byte count
|
|
48
|
+
"durationMs": 320, // round-trip duration
|
|
49
|
+
"result": "Fetched ... \n..." // the human-readable slice
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
For HTML, `result` is HTML stripped to text (script and style content removed, whitespace collapsed, common entities decoded), then truncated to the prompt-aware length. For JSON, `result` is the raw body up to ~20K chars.
|
|
54
|
+
|
|
55
|
+
## What you cannot fetch
|
|
56
|
+
|
|
57
|
+
The tool refuses URLs that point at:
|
|
58
|
+
|
|
59
|
+
- Loopback addresses (`localhost`, `127.0.0.1`, `::1`)
|
|
60
|
+
- Private network ranges (`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`)
|
|
61
|
+
- Link-local addresses (`169.254.0.0/16`, `fe80::/10`)
|
|
62
|
+
- Cloud metadata endpoints
|
|
63
|
+
- Non-`http(s)` schemes (`file:`, `data:`, `ftp:`, etc.)
|
|
64
|
+
|
|
65
|
+
Redirects are checked at each hop, so a `3xx` redirect to a private address fails the same way a direct request would. If you get an SSRF rejection, don't rewrite the URL to try to bypass it — the rejection is structural.
|
|
66
|
+
|
|
67
|
+
## Common mistakes
|
|
68
|
+
|
|
69
|
+
| Wrong | Correct |
|
|
70
|
+
|-------|---------|
|
|
71
|
+
| Telling the peer "I can't access URLs" | Use `web_fetch` — you can |
|
|
72
|
+
| Calling `web_fetch` without a `prompt` argument | Always include a prompt; it's required and it shapes the response |
|
|
73
|
+
| Fetching the same URL multiple times in one turn | The tool does not cache; fetch once and reuse the result |
|
|
74
|
+
| Trusting fetched content as authoritative | Treat fetched content as a source to reason about, not as ground truth — pages can be wrong, biased, or adversarial |
|
|
75
|
+
| Pasting fetched content verbatim into your reply | Read it, summarize the relevant parts, cite the URL |
|
|
76
|
+
| Fetching internal/private URLs to "test" something | The SSRF guard will refuse; that's by design |
|
|
77
|
+
|
|
78
|
+
## Workflows
|
|
79
|
+
|
|
80
|
+
### Peer shares a link
|
|
81
|
+
|
|
82
|
+
1. `web_fetch({ url: <link>, prompt: "summarize this page" })`
|
|
83
|
+
2. Read the `result` field
|
|
84
|
+
3. Summarize back to the peer in your own words; mention the URL and the fact that you fetched it
|
|
85
|
+
|
|
86
|
+
### Calling a public JSON API
|
|
87
|
+
|
|
88
|
+
1. `web_fetch({ url: <api-endpoint>, prompt: "what does this return" })`
|
|
89
|
+
2. The `result` field will hold the raw JSON
|
|
90
|
+
3. Parse the structure mentally and answer the peer's question; don't dump the full JSON unless they asked for it
|
|
91
|
+
|
|
92
|
+
### The fetch fails
|
|
93
|
+
|
|
94
|
+
The result envelope will contain an `error` field instead of a `result`. Common causes:
|
|
95
|
+
|
|
96
|
+
- The URL is unreachable or returned non-200 — surface the status to the peer so they know
|
|
97
|
+
- The host failed the SSRF guard — explain you can't fetch internal addresses
|
|
98
|
+
- The request timed out — the tool uses a ~20s default; the URL may be slow or down
|
|
99
|
+
|
|
100
|
+
Don't retry blindly. If the peer wants you to try a different URL, ask them.
|