@pugi/cli 0.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +172 -0
- package/bin/run.js +2 -0
- package/dist/commands/jobs.js +245 -0
- package/dist/core/agents/loader.js +104 -0
- package/dist/core/agents/registry.js +69 -0
- package/dist/core/auto-open-browser.js +128 -0
- package/dist/core/bash-classifier.js +1001 -0
- package/dist/core/clipboard.js +70 -0
- package/dist/core/context/builder.js +114 -0
- package/dist/core/context/compaction-events.js +99 -0
- package/dist/core/context/compaction.js +602 -0
- package/dist/core/context/invariants.js +250 -0
- package/dist/core/context/markdown-loader.js +270 -0
- package/dist/core/credentials.js +355 -0
- package/dist/core/engine/adapter-runner.js +8 -0
- package/dist/core/engine/anvil-client.js +156 -0
- package/dist/core/engine/compaction-hook.js +154 -0
- package/dist/core/engine/index.js +12 -0
- package/dist/core/engine/native-pugi.js +369 -0
- package/dist/core/engine/noop.js +27 -0
- package/dist/core/engine/prompts.js +118 -0
- package/dist/core/engine/tool-bridge.js +313 -0
- package/dist/core/file-cache.js +29 -0
- package/dist/core/hooks.js +415 -0
- package/dist/core/index-store.js +260 -0
- package/dist/core/jobs/registry.js +462 -0
- package/dist/core/mcp/client.js +316 -0
- package/dist/core/mcp/registry.js +171 -0
- package/dist/core/mcp/trust.js +91 -0
- package/dist/core/path-security.js +63 -0
- package/dist/core/permission.js +309 -0
- package/dist/core/repl/cap-warning.js +91 -0
- package/dist/core/repl/clipboard-read.js +174 -0
- package/dist/core/repl/history-search.js +175 -0
- package/dist/core/repl/history.js +172 -0
- package/dist/core/repl/kill-ring.js +138 -0
- package/dist/core/repl/session.js +618 -0
- package/dist/core/repl/slash-commands.js +227 -0
- package/dist/core/repl/workspace-context.js +113 -0
- package/dist/core/session.js +258 -0
- package/dist/core/settings.js +59 -0
- package/dist/core/skills/loader.js +454 -0
- package/dist/core/skills/sources.js +480 -0
- package/dist/core/skills/trust.js +172 -0
- package/dist/core/subagents/dispatcher.js +258 -0
- package/dist/core/subagents/index.js +26 -0
- package/dist/core/subagents/spawn.js +86 -0
- package/dist/core/trust.js +109 -0
- package/dist/index.js +8 -0
- package/dist/runtime/cli.js +3405 -0
- package/dist/runtime/commands/agents.js +385 -0
- package/dist/runtime/commands/budget.js +192 -0
- package/dist/runtime/commands/config.js +231 -0
- package/dist/runtime/commands/privacy.js +107 -0
- package/dist/runtime/commands/skills.js +401 -0
- package/dist/runtime/commands/undo.js +329 -0
- package/dist/runtime/update-check.js +294 -0
- package/dist/tools/bash.js +660 -0
- package/dist/tools/file-tools.js +346 -0
- package/dist/tools/registry.js +25 -0
- package/dist/tools/web-fetch.js +535 -0
- package/dist/tui/agent-tree.js +66 -0
- package/dist/tui/conversation-pane.js +45 -0
- package/dist/tui/device-flow.js +142 -0
- package/dist/tui/input-box.js +474 -0
- package/dist/tui/login-picker.js +69 -0
- package/dist/tui/render.js +125 -0
- package/dist/tui/repl-render.js +240 -0
- package/dist/tui/repl-splash-art.js +64 -0
- package/dist/tui/repl-splash.js +111 -0
- package/dist/tui/repl.js +214 -0
- package/dist/tui/slash-palette.js +106 -0
- package/dist/tui/splash-data.js +61 -0
- package/dist/tui/splash.js +31 -0
- package/dist/tui/status-bar.js +71 -0
- package/dist/tui/update-banner.js +8 -0
- package/dist/tui/workspace-context.js +105 -0
- package/package.json +71 -0
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* web_fetch tool — Sprint α6.15 Phase 1 quick-win.
|
|
3
|
+
*
|
|
4
|
+
* One-shot HTTP GET against an operator-supplied URL. The response is
|
|
5
|
+
* parsed with Readability over a linkedom DOM, converted to Markdown
|
|
6
|
+
* with Turndown, and returned wrapped in an `<untrusted-content-NONCE>`
|
|
7
|
+
* sentinel that downstream prompts must treat as data, never as
|
|
8
|
+
* instructions.
|
|
9
|
+
*
|
|
10
|
+
* Sentinel pattern (P0 from `docs/specs/pugi-browser-integration-2026-05-24.md`
|
|
11
|
+
* §10 risk 3): fetched bytes can carry prompt injection. The Mira system
|
|
12
|
+
* prompt is expected to honor the `<untrusted-content-*>` wrapping the
|
|
13
|
+
* way Anthropic Computer Use and Codex `skills/chrome/SKILL.md` do —
|
|
14
|
+
* treat the block as fact, refuse to follow instructions inside.
|
|
15
|
+
* The tag carries a per-call random nonce so a page literal of
|
|
16
|
+
* `</untrusted-content>` cannot break out of the boundary, and the
|
|
17
|
+
* source URL lives inside the body (escaped) instead of as an opening
|
|
18
|
+
* attribute so quote-injection cannot break the tag.
|
|
19
|
+
*
|
|
20
|
+
* Gate: the tool refuses unless either the caller flips
|
|
21
|
+
* `web.fetch.enabled = true` in `.pugi/settings.json` or the CLI
|
|
22
|
+
* runtime sets `allowFetch: true` (mapped from `--allow-fetch`). The
|
|
23
|
+
* default-off posture mirrors the «no auto-fetch from chat» rule in
|
|
24
|
+
* §8 anti-pattern 1 of the spec.
|
|
25
|
+
*
|
|
26
|
+
* SSRF guard: every URL we are about to fetch (initial + every redirect
|
|
27
|
+
* hop) is resolved via `dns.lookup` and rejected if any answer maps to
|
|
28
|
+
* loopback, link-local, RFC 1918, CGNAT, IPv6 ULA/link-local, or the
|
|
29
|
+
* 0.0.0.0/8 wildcard. There is a microsecond TOCTOU window between
|
|
30
|
+
* lookup and the kernel's connect(); we accept it for v1 because
|
|
31
|
+
* exploiting it requires DNS control over the target host plus a
|
|
32
|
+
* timing race. Tracked as P3 follow-up.
|
|
33
|
+
*
|
|
34
|
+
* Brand voice: brief / dispatch / ship / sentinel only. The
|
|
35
|
+
* brandbook §08 forbidden-word list applies — see CLAUDE.md.
|
|
36
|
+
*/
|
|
37
|
+
import { request } from 'undici';
|
|
38
|
+
import { Readability } from '@mozilla/readability';
|
|
39
|
+
import { parseHTML } from 'linkedom';
|
|
40
|
+
import TurndownService from 'turndown';
|
|
41
|
+
import { randomBytes } from 'node:crypto';
|
|
42
|
+
import { lookup as dnsLookup } from 'node:dns/promises';
|
|
43
|
+
import { isIPv4, isIPv6 } from 'node:net';
|
|
44
|
+
let activeLookup = async (hostname) => await dnsLookup(hostname, { all: true, verbatim: true });
|
|
45
|
+
export function _setLookupForTests(fn) {
|
|
46
|
+
activeLookup = fn ?? (async (hostname) => await dnsLookup(hostname, { all: true, verbatim: true }));
|
|
47
|
+
}
|
|
48
|
+
const FETCH_TIMEOUT_MS = 10_000;
|
|
49
|
+
const MAX_RESPONSE_BYTES = 5 * 1024 * 1024; // 5 MiB
|
|
50
|
+
const MAX_REDIRECTS = 5;
|
|
51
|
+
const USER_AGENT = 'pugi-cli/0.1 (+https://pugi.dev)';
|
|
52
|
+
const ALLOWED_CONTENT_TYPES = ['text/html', 'application/xhtml+xml', 'text/plain'];
|
|
53
|
+
export function isWebFetchEnabled(ctx) {
|
|
54
|
+
if (ctx.allowFetch === true)
|
|
55
|
+
return true;
|
|
56
|
+
return ctx.settings.web?.fetch?.enabled === true;
|
|
57
|
+
}
|
|
58
|
+
/* ----------------------- SSRF guard helpers ---------------------- */
|
|
59
|
+
/**
|
|
60
|
+
* Parse a dotted IPv4 string into a 32-bit unsigned integer. Returns
|
|
61
|
+
* `null` if the string is not a syntactically valid IPv4. We avoid
|
|
62
|
+
* adding `ip-address` to keep the deps surface clean.
|
|
63
|
+
*/
|
|
64
|
+
function ipv4ToInt(ip) {
|
|
65
|
+
const parts = ip.split('.');
|
|
66
|
+
if (parts.length !== 4)
|
|
67
|
+
return null;
|
|
68
|
+
let acc = 0;
|
|
69
|
+
for (const part of parts) {
|
|
70
|
+
if (!/^\d{1,3}$/.test(part))
|
|
71
|
+
return null;
|
|
72
|
+
const n = Number(part);
|
|
73
|
+
if (n < 0 || n > 255)
|
|
74
|
+
return null;
|
|
75
|
+
acc = (acc << 8) + n;
|
|
76
|
+
}
|
|
77
|
+
// Force unsigned 32-bit.
|
|
78
|
+
return acc >>> 0;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Hand-rolled IPv4 CIDR check. `prefix` is the high-bit count.
|
|
82
|
+
*/
|
|
83
|
+
function ipv4InCidr(ip, cidr, prefix) {
|
|
84
|
+
const ipInt = ipv4ToInt(ip);
|
|
85
|
+
const baseInt = ipv4ToInt(cidr);
|
|
86
|
+
if (ipInt === null || baseInt === null)
|
|
87
|
+
return false;
|
|
88
|
+
if (prefix === 0)
|
|
89
|
+
return true;
|
|
90
|
+
const mask = prefix === 32 ? 0xffffffff : (0xffffffff << (32 - prefix)) >>> 0;
|
|
91
|
+
return (ipInt & mask) === (baseInt & mask);
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* IPv4 blocklist — every range that must never reach a server-side
|
|
95
|
+
* fetcher. Sources: IANA special-purpose registry plus the standard
|
|
96
|
+
* SSRF cheat-sheet (loopback, RFC 1918, link-local, CGNAT, wildcard).
|
|
97
|
+
*/
|
|
98
|
+
const IPV4_BLOCKED_RANGES = [
|
|
99
|
+
['0.0.0.0', 8], // "this network" wildcard
|
|
100
|
+
['10.0.0.0', 8], // RFC 1918
|
|
101
|
+
['100.64.0.0', 10], // RFC 6598 CGNAT
|
|
102
|
+
['127.0.0.0', 8], // loopback
|
|
103
|
+
['169.254.0.0', 16], // link-local + AWS/GCP metadata
|
|
104
|
+
['172.16.0.0', 12], // RFC 1918
|
|
105
|
+
['192.0.0.0', 24], // IETF protocol assignments
|
|
106
|
+
['192.168.0.0', 16], // RFC 1918
|
|
107
|
+
['198.18.0.0', 15], // benchmarking
|
|
108
|
+
['224.0.0.0', 4], // multicast
|
|
109
|
+
['240.0.0.0', 4], // reserved (includes 255.255.255.255 broadcast)
|
|
110
|
+
];
|
|
111
|
+
/**
|
|
112
|
+
* Expand an IPv6 address into 8 16-bit hex words. Handles `::`
|
|
113
|
+
* shorthand and IPv4-mapped trailers (`::ffff:1.2.3.4`).
|
|
114
|
+
* Returns `null` if the input cannot be parsed as IPv6.
|
|
115
|
+
*/
|
|
116
|
+
function expandIPv6(ip) {
|
|
117
|
+
// Strip zone id (`%eth0` etc) — it is not part of the address.
|
|
118
|
+
const bare = ip.split('%')[0] ?? ip;
|
|
119
|
+
// Handle IPv4-mapped form by converting the trailing dotted quad
|
|
120
|
+
// into two hex words first.
|
|
121
|
+
let working = bare;
|
|
122
|
+
const lastColon = working.lastIndexOf(':');
|
|
123
|
+
if (lastColon !== -1 && working.slice(lastColon + 1).includes('.')) {
|
|
124
|
+
const dotted = working.slice(lastColon + 1);
|
|
125
|
+
const v4 = ipv4ToInt(dotted);
|
|
126
|
+
if (v4 === null)
|
|
127
|
+
return null;
|
|
128
|
+
const hi = ((v4 >>> 16) & 0xffff).toString(16);
|
|
129
|
+
const lo = (v4 & 0xffff).toString(16);
|
|
130
|
+
working = `${working.slice(0, lastColon)}:${hi}:${lo}`;
|
|
131
|
+
}
|
|
132
|
+
if (!working.includes(':'))
|
|
133
|
+
return null;
|
|
134
|
+
const sides = working.split('::');
|
|
135
|
+
if (sides.length > 2)
|
|
136
|
+
return null;
|
|
137
|
+
const leftRaw = sides[0] ?? '';
|
|
138
|
+
const rightRaw = sides.length === 2 ? sides[1] ?? '' : '';
|
|
139
|
+
const left = leftRaw === '' ? [] : leftRaw.split(':');
|
|
140
|
+
const right = rightRaw === '' ? [] : rightRaw.split(':');
|
|
141
|
+
const totalGiven = left.length + right.length;
|
|
142
|
+
if (sides.length === 1 && totalGiven !== 8)
|
|
143
|
+
return null;
|
|
144
|
+
if (totalGiven > 8)
|
|
145
|
+
return null;
|
|
146
|
+
const fillCount = 8 - totalGiven;
|
|
147
|
+
const filled = [...left, ...Array(fillCount).fill('0'), ...right];
|
|
148
|
+
for (const word of filled) {
|
|
149
|
+
if (!/^[0-9a-fA-F]{1,4}$/.test(word))
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
return filled.map((w) => w.toLowerCase().padStart(4, '0'));
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Reject the IPv6 ranges the SSRF guard never wants to reach.
|
|
156
|
+
* Covers loopback (::1), unspecified (::), link-local (fe80::/10),
|
|
157
|
+
* unique local (fc00::/7), discard (100::/64), and IPv4-mapped
|
|
158
|
+
* (::ffff:0:0/96 — must also block since the embedded IPv4 still
|
|
159
|
+
* routes locally on some stacks).
|
|
160
|
+
*/
|
|
161
|
+
/**
|
|
162
|
+
* Build a dotted IPv4 string from the last two 16-bit words of an
|
|
163
|
+
* expanded IPv6 address. Shared by every embedded-IPv4 path below
|
|
164
|
+
* (IPv4-mapped, IPv4-translated SIIT, NAT64 well-known).
|
|
165
|
+
*/
|
|
166
|
+
function embeddedIPv4FromTrailingWords(words) {
|
|
167
|
+
const high = parseInt(words[6] ?? '0', 16);
|
|
168
|
+
const low = parseInt(words[7] ?? '0', 16);
|
|
169
|
+
return `${high >>> 8}.${high & 0xff}.${low >>> 8}.${low & 0xff}`;
|
|
170
|
+
}
|
|
171
|
+
function ipv6IsBlocked(ip) {
|
|
172
|
+
const words = expandIPv6(ip);
|
|
173
|
+
if (!words)
|
|
174
|
+
return false;
|
|
175
|
+
const joined = words.join('');
|
|
176
|
+
// ::1 loopback.
|
|
177
|
+
if (joined === '00000000000000000000000000000001')
|
|
178
|
+
return true;
|
|
179
|
+
// :: unspecified / wildcard.
|
|
180
|
+
if (joined === '00000000000000000000000000000000')
|
|
181
|
+
return true;
|
|
182
|
+
// ::ffff:0:0/96 IPv4-mapped (RFC 4291 §2.5.5.2):
|
|
183
|
+
// words[0..4] = 0000, words[5] = ffff.
|
|
184
|
+
// Example: ::ffff:127.0.0.1 → [0,0,0,0,0,ffff,7f00,0001].
|
|
185
|
+
if (words.slice(0, 5).every((w) => w === '0000') && words[5] === 'ffff') {
|
|
186
|
+
const embedded = embeddedIPv4FromTrailingWords(words);
|
|
187
|
+
if (ipv4IsBlocked(embedded))
|
|
188
|
+
return true;
|
|
189
|
+
}
|
|
190
|
+
// ::ffff:0:0:0/96 IPv4-translated (RFC 6145 §2.2 / RFC 6052 SIIT):
|
|
191
|
+
// words[0..3] = 0000, words[4] = ffff, words[5] = 0000.
|
|
192
|
+
// Example: ::ffff:0:a9fe:a9fe → [0,0,0,0,ffff,0,a9fe,a9fe] → 169.254.169.254.
|
|
193
|
+
// Codex P2 (PR #349): the original guard only covered the IPv4-mapped
|
|
194
|
+
// form above. SIIT/NAT64 stacks (Linux clatd, some macOS revisions,
|
|
195
|
+
// and various carrier-NAT64 deployments) translate `::ffff:0:a.b.c.d`
|
|
196
|
+
// straight to the embedded IPv4, so without this branch a hostile
|
|
197
|
+
// literal could ride through to the metadata service.
|
|
198
|
+
if (words.slice(0, 4).every((w) => w === '0000') &&
|
|
199
|
+
words[4] === 'ffff' &&
|
|
200
|
+
words[5] === '0000') {
|
|
201
|
+
const embedded = embeddedIPv4FromTrailingWords(words);
|
|
202
|
+
if (ipv4IsBlocked(embedded))
|
|
203
|
+
return true;
|
|
204
|
+
}
|
|
205
|
+
// 100::/64 discard prefix.
|
|
206
|
+
if (words[0] === '0100' && words.slice(1, 4).every((w) => w === '0000'))
|
|
207
|
+
return true;
|
|
208
|
+
// 64:ff9b::/96 — well-known NAT64 (still resolves to embedded IPv4).
|
|
209
|
+
if (words[0] === '0064' && words[1] === 'ff9b' && words.slice(2, 6).every((w) => w === '0000')) {
|
|
210
|
+
const embedded = embeddedIPv4FromTrailingWords(words);
|
|
211
|
+
if (ipv4IsBlocked(embedded))
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
// fc00::/7 — unique local (high 7 bits = 1111110).
|
|
215
|
+
const firstByte = parseInt(words[0]?.slice(0, 2) ?? '00', 16);
|
|
216
|
+
if ((firstByte & 0xfe) === 0xfc)
|
|
217
|
+
return true;
|
|
218
|
+
// fe80::/10 — link-local (first 10 bits = 1111111010).
|
|
219
|
+
const firstTen = parseInt(words[0] ?? '0000', 16) & 0xffc0;
|
|
220
|
+
if (firstTen === 0xfe80)
|
|
221
|
+
return true;
|
|
222
|
+
// ff00::/8 — multicast.
|
|
223
|
+
if (firstByte === 0xff)
|
|
224
|
+
return true;
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
function ipv4IsBlocked(ip) {
|
|
228
|
+
for (const [base, prefix] of IPV4_BLOCKED_RANGES) {
|
|
229
|
+
if (ipv4InCidr(ip, base, prefix))
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Resolve `hostname` via dns.lookup and reject if any answer maps to
|
|
236
|
+
* a private/loopback/link-local/CGNAT range. Returns `null` on success
|
|
237
|
+
* (safe to fetch), an error string when the lookup or guard fails.
|
|
238
|
+
*
|
|
239
|
+
* `hostname` is whatever URL.hostname returned, so it may already be
|
|
240
|
+
* a literal IP (with brackets stripped). We honor that fast-path and
|
|
241
|
+
* skip DNS.
|
|
242
|
+
*/
|
|
243
|
+
export async function validateHostnameForFetch(hostname) {
|
|
244
|
+
// URL.hostname keeps the brackets off IPv6 literals already.
|
|
245
|
+
if (!hostname)
|
|
246
|
+
return 'empty hostname';
|
|
247
|
+
// Literal `localhost` resolves locally regardless of DNS — refuse
|
|
248
|
+
// by name so a hosts-file alias to a public IP cannot smuggle it.
|
|
249
|
+
if (hostname.toLowerCase() === 'localhost') {
|
|
250
|
+
return 'localhost is blocked (SSRF guard)';
|
|
251
|
+
}
|
|
252
|
+
// Fast-path: literal IP. Skip DNS.
|
|
253
|
+
if (isIPv4(hostname)) {
|
|
254
|
+
if (ipv4IsBlocked(hostname)) {
|
|
255
|
+
return `IP ${hostname} is in a blocked range (SSRF guard)`;
|
|
256
|
+
}
|
|
257
|
+
return null;
|
|
258
|
+
}
|
|
259
|
+
if (isIPv6(hostname)) {
|
|
260
|
+
if (ipv6IsBlocked(hostname)) {
|
|
261
|
+
return `IPv6 ${hostname} is in a blocked range (SSRF guard)`;
|
|
262
|
+
}
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
// DNS lookup — refuse if any answer is private. The active resolver
|
|
266
|
+
// is module-private so tests can stub it.
|
|
267
|
+
let answers;
|
|
268
|
+
try {
|
|
269
|
+
answers = await activeLookup(hostname);
|
|
270
|
+
}
|
|
271
|
+
catch (error) {
|
|
272
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
273
|
+
return `DNS lookup failed for ${hostname}: ${msg}`;
|
|
274
|
+
}
|
|
275
|
+
if (answers.length === 0) {
|
|
276
|
+
return `DNS returned no answers for ${hostname}`;
|
|
277
|
+
}
|
|
278
|
+
for (const answer of answers) {
|
|
279
|
+
if (answer.family === 4 && ipv4IsBlocked(answer.address)) {
|
|
280
|
+
return `${hostname} resolves to ${answer.address} which is in a blocked range (SSRF guard)`;
|
|
281
|
+
}
|
|
282
|
+
if (answer.family === 6 && ipv6IsBlocked(answer.address)) {
|
|
283
|
+
return `${hostname} resolves to ${answer.address} which is in a blocked range (SSRF guard)`;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
/* ----------------------- sentinel helpers ---------------------- */
|
|
289
|
+
/**
|
|
290
|
+
* HTML-escape the five characters that can break out of either an
|
|
291
|
+
* element body or an attribute value. We place the source URL inside
|
|
292
|
+
* the sentinel body (not as an attribute), so the only realistic
|
|
293
|
+
* breakout vector is a literal `</untrusted-content-NONCE>` closing
|
|
294
|
+
* tag, but escaping `<` and `>` covers it cheaply.
|
|
295
|
+
*
|
|
296
|
+
* Exported for spec coverage; production callers must keep using
|
|
297
|
+
* the wrapper inside `webFetchTool`.
|
|
298
|
+
*/
|
|
299
|
+
export function escapeForSentinelBody(input) {
|
|
300
|
+
return input
|
|
301
|
+
.replace(/&/g, '&')
|
|
302
|
+
.replace(/</g, '<')
|
|
303
|
+
.replace(/>/g, '>')
|
|
304
|
+
.replace(/"/g, '"')
|
|
305
|
+
.replace(/'/g, ''');
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Strip any literal `</untrusted-content-NONCE>` (or the bare legacy
|
|
309
|
+
* form) from fetched body content. The nonce makes a successful
|
|
310
|
+
* breakout cryptographically improbable but the extra scrub costs
|
|
311
|
+
* nothing and gives defense-in-depth.
|
|
312
|
+
*/
|
|
313
|
+
function scrubSentinelEscapes(input, nonce) {
|
|
314
|
+
const nonceTag = new RegExp(`</?untrusted-content-${nonce}>`, 'gi');
|
|
315
|
+
const bareTag = /<\/?untrusted-content[^>]*>/gi;
|
|
316
|
+
return input.replace(nonceTag, '').replace(bareTag, '');
|
|
317
|
+
}
|
|
318
|
+
/* ----------------------- response read ---------------------- */
|
|
319
|
+
/**
|
|
320
|
+
* Read the response body with a hard 5 MiB streaming cap. Disables
|
|
321
|
+
* undici auto-decompression upstream (caller sets accept-encoding:
|
|
322
|
+
* identity) so the cap is meaningful — otherwise a 50 KB gzip bomb
|
|
323
|
+
* could expand to gigabytes before we noticed.
|
|
324
|
+
*
|
|
325
|
+
* On size overflow we abort the request via the AbortController AND
|
|
326
|
+
* destroy the body stream so the socket closes instead of dangling.
|
|
327
|
+
*/
|
|
328
|
+
async function readBodyWithCap(body, controller) {
|
|
329
|
+
const chunks = [];
|
|
330
|
+
let total = 0;
|
|
331
|
+
try {
|
|
332
|
+
for await (const chunk of body) {
|
|
333
|
+
const buf = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
334
|
+
total += buf.length;
|
|
335
|
+
if (total > MAX_RESPONSE_BYTES) {
|
|
336
|
+
controller.abort();
|
|
337
|
+
// undici BodyReadable extends Node Readable — destroy() closes
|
|
338
|
+
// the underlying socket eagerly so we are not waiting on GC.
|
|
339
|
+
try {
|
|
340
|
+
if (typeof body.destroy === 'function')
|
|
341
|
+
body.destroy();
|
|
342
|
+
}
|
|
343
|
+
catch {
|
|
344
|
+
/* ignore — abort already fired */
|
|
345
|
+
}
|
|
346
|
+
return { ok: false, error: `Response exceeded ${MAX_RESPONSE_BYTES} byte cap.` };
|
|
347
|
+
}
|
|
348
|
+
chunks.push(buf);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
catch (error) {
|
|
352
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
353
|
+
return { ok: false, error: `Body read failed: ${msg}` };
|
|
354
|
+
}
|
|
355
|
+
return { ok: true, buffer: Buffer.concat(chunks) };
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Dispatch a single GET, follow up to MAX_REDIRECTS hops, enforce the
|
|
359
|
+
* 5 MiB / 10 s caps, refuse non-2xx and unsupported content-types.
|
|
360
|
+
* Returns the wrapped Markdown on success, an error result otherwise.
|
|
361
|
+
*
|
|
362
|
+
* No retries: GET is idempotent but the contract is one-shot per
|
|
363
|
+
* spec; surface the error to the operator and let them re-dispatch
|
|
364
|
+
* explicitly.
|
|
365
|
+
*/
|
|
366
|
+
export async function webFetchTool(input, ctx) {
|
|
367
|
+
if (!isWebFetchEnabled(ctx)) {
|
|
368
|
+
return {
|
|
369
|
+
ok: false,
|
|
370
|
+
error: 'web_fetch disabled. Enable with --allow-fetch or set web.fetch.enabled=true in .pugi/settings.json.',
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
let parsedUrl;
|
|
374
|
+
try {
|
|
375
|
+
parsedUrl = new URL(input.url);
|
|
376
|
+
}
|
|
377
|
+
catch {
|
|
378
|
+
return { ok: false, error: `Invalid URL: ${input.url}` };
|
|
379
|
+
}
|
|
380
|
+
if (parsedUrl.protocol !== 'http:' && parsedUrl.protocol !== 'https:') {
|
|
381
|
+
return { ok: false, error: `Unsupported scheme ${parsedUrl.protocol} — only http/https.` };
|
|
382
|
+
}
|
|
383
|
+
// Strip IPv6 brackets — URL.hostname keeps them, dns/net do not.
|
|
384
|
+
const initialHost = parsedUrl.hostname.replace(/^\[|\]$/g, '');
|
|
385
|
+
const initialGuard = await validateHostnameForFetch(initialHost);
|
|
386
|
+
if (initialGuard) {
|
|
387
|
+
return { ok: false, error: `SSRF refused: ${initialGuard}` };
|
|
388
|
+
}
|
|
389
|
+
// Manual redirect loop: undici v8 moved `maxRedirections` off the
|
|
390
|
+
// per-request options and onto the redirect interceptor, which we
|
|
391
|
+
// skip to keep the call site MockAgent-compatible. Cap at 5 hops.
|
|
392
|
+
// Every hop re-runs the SSRF guard because a public origin can
|
|
393
|
+
// return `302 Location: http://169.254.169.254/...`.
|
|
394
|
+
let response = null;
|
|
395
|
+
let currentUrl = parsedUrl;
|
|
396
|
+
let hops = 0;
|
|
397
|
+
const controller = new AbortController();
|
|
398
|
+
try {
|
|
399
|
+
while (true) {
|
|
400
|
+
response = await request(currentUrl.toString(), {
|
|
401
|
+
method: 'GET',
|
|
402
|
+
headers: {
|
|
403
|
+
'user-agent': USER_AGENT,
|
|
404
|
+
accept: 'text/html,application/xhtml+xml',
|
|
405
|
+
// Disable content-encoding negotiation — undici would
|
|
406
|
+
// auto-decompress gzip/br otherwise and our streaming cap
|
|
407
|
+
// would only see post-decompression bytes, making a small
|
|
408
|
+
// gzip bomb expand to GBs before the cap trips.
|
|
409
|
+
'accept-encoding': 'identity',
|
|
410
|
+
},
|
|
411
|
+
bodyTimeout: FETCH_TIMEOUT_MS,
|
|
412
|
+
headersTimeout: FETCH_TIMEOUT_MS,
|
|
413
|
+
signal: controller.signal,
|
|
414
|
+
});
|
|
415
|
+
if (response.statusCode >= 300 && response.statusCode < 400) {
|
|
416
|
+
const loc = response.headers['location'];
|
|
417
|
+
const locStr = Array.isArray(loc) ? loc[0] : loc;
|
|
418
|
+
if (typeof locStr !== 'string' || locStr.length === 0)
|
|
419
|
+
break;
|
|
420
|
+
hops += 1;
|
|
421
|
+
if (hops > MAX_REDIRECTS) {
|
|
422
|
+
// Drain the body on the way out so the underlying socket
|
|
423
|
+
// closes deterministically instead of lingering until GC.
|
|
424
|
+
// Codex P2 (PR #349 triple-review): without this dump() the
|
|
425
|
+
// socket stays half-read until the response object is
|
|
426
|
+
// collected, which under load can exhaust the connection
|
|
427
|
+
// pool. `dump()` swallows errors; the catch is belt + braces.
|
|
428
|
+
try {
|
|
429
|
+
await response.body.dump();
|
|
430
|
+
}
|
|
431
|
+
catch {
|
|
432
|
+
try {
|
|
433
|
+
response.body.destroy();
|
|
434
|
+
}
|
|
435
|
+
catch {
|
|
436
|
+
/* socket already closed — nothing to do */
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return { ok: false, error: `Exceeded ${MAX_REDIRECTS} redirect hops.` };
|
|
440
|
+
}
|
|
441
|
+
// Drain prior body so the socket can be reused.
|
|
442
|
+
await response.body.dump();
|
|
443
|
+
let nextUrl;
|
|
444
|
+
try {
|
|
445
|
+
nextUrl = new URL(locStr, currentUrl);
|
|
446
|
+
}
|
|
447
|
+
catch {
|
|
448
|
+
return { ok: false, error: `Invalid redirect target: ${locStr}` };
|
|
449
|
+
}
|
|
450
|
+
if (nextUrl.protocol !== 'http:' && nextUrl.protocol !== 'https:') {
|
|
451
|
+
return {
|
|
452
|
+
ok: false,
|
|
453
|
+
error: `Refusing redirect to unsupported scheme ${nextUrl.protocol}.`,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
const nextHost = nextUrl.hostname.replace(/^\[|\]$/g, '');
|
|
457
|
+
const guard = await validateHostnameForFetch(nextHost);
|
|
458
|
+
if (guard) {
|
|
459
|
+
return { ok: false, error: `SSRF refused on redirect: ${guard}` };
|
|
460
|
+
}
|
|
461
|
+
currentUrl = nextUrl;
|
|
462
|
+
continue;
|
|
463
|
+
}
|
|
464
|
+
break;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
catch (error) {
|
|
468
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
469
|
+
return { ok: false, error: `Fetch failed: ${message}` };
|
|
470
|
+
}
|
|
471
|
+
if (!response) {
|
|
472
|
+
return { ok: false, error: 'No response received.' };
|
|
473
|
+
}
|
|
474
|
+
if (response.statusCode < 200 || response.statusCode >= 300) {
|
|
475
|
+
return { ok: false, error: `HTTP ${response.statusCode} from ${currentUrl.toString()}` };
|
|
476
|
+
}
|
|
477
|
+
// content-length is advisory — never trust it for the size cap, but
|
|
478
|
+
// we can short-circuit obviously huge declared payloads BEFORE we
|
|
479
|
+
// start reading. The streaming cap is still the source of truth.
|
|
480
|
+
const declaredLengthRaw = response.headers['content-length'];
|
|
481
|
+
const declaredLength = Array.isArray(declaredLengthRaw) ? declaredLengthRaw[0] : declaredLengthRaw;
|
|
482
|
+
if (typeof declaredLength === 'string' && /^\d+$/.test(declaredLength)) {
|
|
483
|
+
const n = Number(declaredLength);
|
|
484
|
+
if (n > MAX_RESPONSE_BYTES) {
|
|
485
|
+
controller.abort();
|
|
486
|
+
try {
|
|
487
|
+
response.body.destroy();
|
|
488
|
+
}
|
|
489
|
+
catch {
|
|
490
|
+
/* ignore */
|
|
491
|
+
}
|
|
492
|
+
return {
|
|
493
|
+
ok: false,
|
|
494
|
+
error: `Declared content-length ${n} exceeds ${MAX_RESPONSE_BYTES} byte cap.`,
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
const contentTypeRaw = response.headers['content-type'];
|
|
499
|
+
const contentType = Array.isArray(contentTypeRaw) ? contentTypeRaw[0] : contentTypeRaw;
|
|
500
|
+
const mime = typeof contentType === 'string' ? contentType.split(';')[0]?.trim().toLowerCase() ?? '' : '';
|
|
501
|
+
if (!ALLOWED_CONTENT_TYPES.includes(mime)) {
|
|
502
|
+
return { ok: false, error: `Disallowed content-type ${mime || '(none)'}; only HTML/XHTML/text.` };
|
|
503
|
+
}
|
|
504
|
+
const bodyResult = await readBodyWithCap(response.body, controller);
|
|
505
|
+
if (!bodyResult.ok)
|
|
506
|
+
return bodyResult;
|
|
507
|
+
const html = bodyResult.buffer.toString('utf8');
|
|
508
|
+
// linkedom is the lightweight DOM Readability needs; jsdom would
|
|
509
|
+
// add ~3 MB to the install footprint for the same surface.
|
|
510
|
+
const { document } = parseHTML(html);
|
|
511
|
+
const article = new Readability(document).parse();
|
|
512
|
+
const title = article?.title?.trim() || currentUrl.hostname;
|
|
513
|
+
const articleHtml = article?.content ?? html;
|
|
514
|
+
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
|
|
515
|
+
const markdown = turndown.turndown(articleHtml).trim();
|
|
516
|
+
// Per-call nonce defeats sentinel escape via literal `</untrusted-content>`
|
|
517
|
+
// inside fetched bodies. Tag carries the nonce; downstream consumers
|
|
518
|
+
// match dynamically. Source URL lives INSIDE the sentinel body
|
|
519
|
+
// (escaped) so a quote-injection in the URL cannot break the tag.
|
|
520
|
+
const nonce = randomBytes(8).toString('hex');
|
|
521
|
+
const scrubbedMarkdown = scrubSentinelEscapes(markdown, nonce);
|
|
522
|
+
const safeSource = escapeForSentinelBody(currentUrl.toString());
|
|
523
|
+
const wrapped = `<untrusted-content-${nonce}>\n` +
|
|
524
|
+
`Source: ${safeSource}\n\n` +
|
|
525
|
+
`${scrubbedMarkdown}\n` +
|
|
526
|
+
`</untrusted-content-${nonce}>`;
|
|
527
|
+
return {
|
|
528
|
+
ok: true,
|
|
529
|
+
url: currentUrl.toString(),
|
|
530
|
+
title,
|
|
531
|
+
content_md: wrapped,
|
|
532
|
+
fetched_at: new Date().toISOString(),
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
//# sourceMappingURL=web-fetch.js.map
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
export function AgentTree(props) {
|
|
4
|
+
if (props.agents.length === 0) {
|
|
5
|
+
return (_jsx(Box, { children: _jsx(Text, { dimColor: true, children: "No agents on watch. Type a brief to dispatch one." }) }));
|
|
6
|
+
}
|
|
7
|
+
const now = props.nowEpochMs ?? Date.now();
|
|
8
|
+
return (_jsx(Box, { flexDirection: "column", children: props.agents.map((agent, index) => (_jsx(AgentRow, { agent: agent, last: index === props.agents.length - 1, nowEpochMs: now }, agent.taskId))) }));
|
|
9
|
+
}
|
|
10
|
+
function AgentRow({ agent, last, nowEpochMs, }) {
|
|
11
|
+
const branch = last ? '└' : '├';
|
|
12
|
+
const glyph = statusGlyph(agent.status);
|
|
13
|
+
const glyphColor = statusColor(agent.status);
|
|
14
|
+
const elapsed = formatElapsed(agent.startedAtEpochMs, nowEpochMs);
|
|
15
|
+
const tokens = formatTokens(agent.tokensIn + agent.tokensOut);
|
|
16
|
+
const name = agent.personaName.padEnd(8, ' ');
|
|
17
|
+
const role = agent.role.padEnd(10, ' ');
|
|
18
|
+
const detail = agent.detail.length > 60 ? `${agent.detail.slice(0, 57)}…` : agent.detail;
|
|
19
|
+
return (_jsxs(Box, { children: [_jsx(Text, { dimColor: true, children: ` ${branch} ` }), _jsx(Text, { bold: true, children: `${name}` }), _jsx(Text, { dimColor: true, children: ` ${role} ` }), _jsx(Text, { color: glyphColor, children: glyph }), _jsx(Text, { children: ` ${detail}` }), _jsx(Text, { dimColor: true, children: ` (${elapsed}${tokens ? ` · ↓ ${tokens}` : ''})` })] }));
|
|
20
|
+
}
|
|
21
|
+
function statusGlyph(status) {
|
|
22
|
+
switch (status) {
|
|
23
|
+
case 'queued':
|
|
24
|
+
return '□';
|
|
25
|
+
case 'thinking':
|
|
26
|
+
return '⏳';
|
|
27
|
+
case 'shipped':
|
|
28
|
+
return '✓';
|
|
29
|
+
case 'blocked':
|
|
30
|
+
return '✗';
|
|
31
|
+
case 'failed':
|
|
32
|
+
return '✗';
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function statusColor(status) {
|
|
36
|
+
switch (status) {
|
|
37
|
+
case 'queued':
|
|
38
|
+
return undefined;
|
|
39
|
+
case 'thinking':
|
|
40
|
+
return 'cyan';
|
|
41
|
+
case 'shipped':
|
|
42
|
+
return 'green';
|
|
43
|
+
case 'blocked':
|
|
44
|
+
return 'yellow';
|
|
45
|
+
case 'failed':
|
|
46
|
+
return 'red';
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
function formatElapsed(startedAtEpochMs, nowEpochMs) {
|
|
50
|
+
const ms = Math.max(0, nowEpochMs - startedAtEpochMs);
|
|
51
|
+
if (ms < 60_000)
|
|
52
|
+
return `${Math.floor(ms / 1000)}s`;
|
|
53
|
+
const minutes = Math.floor(ms / 60_000);
|
|
54
|
+
const seconds = Math.floor((ms % 60_000) / 1000);
|
|
55
|
+
return `${minutes}m ${seconds.toString().padStart(2, '0')}s`;
|
|
56
|
+
}
|
|
57
|
+
function formatTokens(total) {
|
|
58
|
+
if (total <= 0)
|
|
59
|
+
return '';
|
|
60
|
+
if (total < 1_000)
|
|
61
|
+
return total.toString();
|
|
62
|
+
if (total < 1_000_000)
|
|
63
|
+
return `${(total / 1_000).toFixed(1)}k`;
|
|
64
|
+
return `${(total / 1_000_000).toFixed(1)}m`;
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=agent-tree.js.map
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
const HUE_COLOR_BY_SLUG = {
|
|
4
|
+
// Mira (Pug) - coordinator
|
|
5
|
+
main: 'cyan',
|
|
6
|
+
// Olivia (Honeybee) - release
|
|
7
|
+
pm: 'yellow',
|
|
8
|
+
// Marcus (Owl)
|
|
9
|
+
architect: 'magenta',
|
|
10
|
+
// Hiroshi (Wolf) - lead dev
|
|
11
|
+
dev: 'blueBright',
|
|
12
|
+
// Mia (Hummingbird) - frontend
|
|
13
|
+
frontend: 'magentaBright',
|
|
14
|
+
// Vera (Fox) - QA
|
|
15
|
+
qa: 'red',
|
|
16
|
+
// Diego (Octopus) - devops
|
|
17
|
+
devops: 'cyan',
|
|
18
|
+
// Sofia (Stag) - designer
|
|
19
|
+
designer: 'green',
|
|
20
|
+
// Anika (Raven) - researcher
|
|
21
|
+
researcher: 'gray',
|
|
22
|
+
// Liam (Spider) - analyst
|
|
23
|
+
analyst: 'gray',
|
|
24
|
+
};
|
|
25
|
+
export function ConversationPane(props) {
|
|
26
|
+
if (props.rows.length === 0) {
|
|
27
|
+
return (_jsx(Box, { children: _jsx(Text, { dimColor: true, children: "Brief the workforce to begin. Try a short sentence or /help." }) }));
|
|
28
|
+
}
|
|
29
|
+
return (_jsx(Box, { flexDirection: "column", children: props.rows.map((row) => (_jsx(ConversationRow, { row: row, personaNames: props.personaNames }, row.id))) }));
|
|
30
|
+
}
|
|
31
|
+
function ConversationRow({ row, personaNames, }) {
|
|
32
|
+
switch (row.source) {
|
|
33
|
+
case 'operator':
|
|
34
|
+
return (_jsxs(Box, { children: [_jsx(Text, { bold: true, color: "cyan", children: '› ' }), _jsx(Text, { children: row.text })] }));
|
|
35
|
+
case 'system':
|
|
36
|
+
return (_jsxs(Box, { children: [_jsx(Text, { dimColor: true, children: '· ' }), _jsx(Text, { dimColor: true, children: row.text })] }));
|
|
37
|
+
case 'persona': {
|
|
38
|
+
const slug = row.personaSlug ?? '';
|
|
39
|
+
const color = HUE_COLOR_BY_SLUG[slug] ?? 'white';
|
|
40
|
+
const displayName = personaNames?.get(slug) ?? slug;
|
|
41
|
+
return (_jsxs(Box, { children: [_jsx(Text, { color: color, bold: true, children: `▸ ${displayName} ` }), _jsx(Text, { children: row.text })] }));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=conversation-pane.js.map
|