@pentoshi/clai 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -17
- package/dist/agent/context-manager.d.ts +27 -0
- package/dist/agent/context-manager.js +75 -0
- package/dist/agent/context-manager.js.map +1 -0
- package/dist/agent/runner.d.ts +21 -1
- package/dist/agent/runner.js +176 -73
- package/dist/agent/runner.js.map +1 -1
- package/dist/commands/doctor.js +20 -2
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/update.js +11 -2
- package/dist/commands/update.js.map +1 -1
- package/dist/index.js +156 -5
- package/dist/index.js.map +1 -1
- package/dist/llm/anthropic.js +29 -38
- package/dist/llm/anthropic.js.map +1 -1
- package/dist/llm/gemini.js +31 -40
- package/dist/llm/gemini.js.map +1 -1
- package/dist/llm/http.d.ts +21 -0
- package/dist/llm/http.js +140 -1
- package/dist/llm/http.js.map +1 -1
- package/dist/llm/ollama.js +18 -27
- package/dist/llm/ollama.js.map +1 -1
- package/dist/llm/router.d.ts +7 -0
- package/dist/llm/router.js +14 -23
- package/dist/llm/router.js.map +1 -1
- package/dist/modes/agent.d.ts +4 -2
- package/dist/modes/agent.js +2 -2
- package/dist/modes/agent.js.map +1 -1
- package/dist/modes/ask.js +3 -4
- package/dist/modes/ask.js.map +1 -1
- package/dist/os/pkgmgr.d.ts +7 -1
- package/dist/os/pkgmgr.js +97 -18
- package/dist/os/pkgmgr.js.map +1 -1
- package/dist/prompts/index.d.ts +7 -0
- package/dist/prompts/index.js +12 -4
- package/dist/prompts/index.js.map +1 -1
- package/dist/repl.d.ts +1 -0
- package/dist/repl.js +283 -43
- package/dist/repl.js.map +1 -1
- package/dist/safety/classifier.d.ts +5 -1
- package/dist/safety/classifier.js +244 -88
- package/dist/safety/classifier.js.map +1 -1
- package/dist/safety/patterns.d.ts +48 -1
- package/dist/safety/patterns.js +140 -7
- package/dist/safety/patterns.js.map +1 -1
- package/dist/store/config.d.ts +21 -3
- package/dist/store/config.js +28 -9
- package/dist/store/config.js.map +1 -1
- package/dist/store/history.d.ts +9 -0
- package/dist/store/history.js +58 -1
- package/dist/store/history.js.map +1 -1
- package/dist/store/keys.d.ts +2 -1
- package/dist/store/keys.js +7 -3
- package/dist/store/keys.js.map +1 -1
- package/dist/store/logs.d.ts +7 -0
- package/dist/store/logs.js +39 -1
- package/dist/store/logs.js.map +1 -1
- package/dist/store/project.d.ts +1 -0
- package/dist/store/project.js +34 -9
- package/dist/store/project.js.map +1 -1
- package/dist/store/scope.d.ts +29 -0
- package/dist/store/scope.js +113 -0
- package/dist/store/scope.js.map +1 -0
- package/dist/tools/fs.d.ts +6 -2
- package/dist/tools/fs.js +99 -87
- package/dist/tools/fs.js.map +1 -1
- package/dist/tools/http.d.ts +5 -3
- package/dist/tools/http.js +170 -31
- package/dist/tools/http.js.map +1 -1
- package/dist/tools/policies/output-policy.d.ts +13 -0
- package/dist/tools/policies/output-policy.js +56 -0
- package/dist/tools/policies/output-policy.js.map +1 -0
- package/dist/tools/reducers/ffuf.d.ts +6 -0
- package/dist/tools/reducers/ffuf.js +74 -0
- package/dist/tools/reducers/ffuf.js.map +1 -0
- package/dist/tools/reducers/generic.d.ts +2 -0
- package/dist/tools/reducers/generic.js +60 -0
- package/dist/tools/reducers/generic.js.map +1 -0
- package/dist/tools/reducers/gobuster.d.ts +2 -0
- package/dist/tools/reducers/gobuster.js +36 -0
- package/dist/tools/reducers/gobuster.js.map +1 -0
- package/dist/tools/reducers/httpx.d.ts +2 -0
- package/dist/tools/reducers/httpx.js +38 -0
- package/dist/tools/reducers/httpx.js.map +1 -0
- package/dist/tools/reducers/nmap.d.ts +7 -0
- package/dist/tools/reducers/nmap.js +82 -0
- package/dist/tools/reducers/nmap.js.map +1 -0
- package/dist/tools/reducers/nuclei.d.ts +2 -0
- package/dist/tools/reducers/nuclei.js +51 -0
- package/dist/tools/reducers/nuclei.js.map +1 -0
- package/dist/tools/reducers/sqlmap.d.ts +2 -0
- package/dist/tools/reducers/sqlmap.js +39 -0
- package/dist/tools/reducers/sqlmap.js.map +1 -0
- package/dist/tools/reducers/subdomains.d.ts +6 -0
- package/dist/tools/reducers/subdomains.js +31 -0
- package/dist/tools/reducers/subdomains.js.map +1 -0
- package/dist/tools/reducers/types.d.ts +14 -0
- package/dist/tools/reducers/types.js +2 -0
- package/dist/tools/reducers/types.js.map +1 -0
- package/dist/tools/registry.d.ts +1 -1
- package/dist/tools/registry.js +223 -79
- package/dist/tools/registry.js.map +1 -1
- package/dist/tools/shell.d.ts +45 -4
- package/dist/tools/shell.js +419 -88
- package/dist/tools/shell.js.map +1 -1
- package/dist/tools/validate.d.ts +37 -0
- package/dist/tools/validate.js +144 -0
- package/dist/tools/validate.js.map +1 -0
- package/dist/types.d.ts +7 -15
- package/dist/ui/keys.d.ts +21 -0
- package/dist/ui/keys.js +13 -0
- package/dist/ui/keys.js.map +1 -0
- package/dist/ui/output-pane.d.ts +31 -0
- package/dist/ui/output-pane.js +81 -0
- package/dist/ui/output-pane.js.map +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -69,11 +69,10 @@ clai -y "list the 10 largest files in my home directory"
|
|
|
69
69
|
- **`/agent` mode** — Agentic. AI plans, then executes shell commands, edits files, installs missing tools, parses output, and continues until the goal is met.
|
|
70
70
|
- **7 LLM providers** — Groq, Google Gemini, OpenRouter, OpenAI, Anthropic, NVIDIA NIM, and Ollama (local). All with streaming.
|
|
71
71
|
- **10 built-in tools** — `shell.exec`, `fs.read`, `fs.write`, `fs.list`, `fs.search`, `pkg.install`, `net.scan`, `http.fetch`, `sysinfo`, `pentest.recon`.
|
|
72
|
-
- **Smart safety gate** —
|
|
73
|
-
- **Bounded tool output** — Long scan output is streamed lightly while running, saved to artifacts when needed, and reduced before it reaches the model.
|
|
72
|
+
- **Smart safety gate** — Read-only commands auto-execute; mutating commands require confirmation; destructive patterns are blocked.
|
|
74
73
|
- **Cross-platform** — macOS, Linux, and Windows. Detects OS-native package managers (brew, apt, dnf, pacman, winget, choco).
|
|
75
74
|
- **Pentest-aware** — nmap, nikto, sqlmap, gobuster, ffuf, hydra, masscan, whois, dig, netcat, tshark.
|
|
76
|
-
- **
|
|
75
|
+
- **Auto-update** — Checks for new versions on startup; run `/update` or `clai update` to upgrade.
|
|
77
76
|
- **Persistent history** — Session history with automatic key redaction in logs.
|
|
78
77
|
|
|
79
78
|
## Provider Setup
|
|
@@ -90,8 +89,6 @@ clai supports 7 LLM providers with free tiers:
|
|
|
90
89
|
| NVIDIA NIM | `meta/llama-3.3-70b-instruct` | ✓ | `nvapi-` |
|
|
91
90
|
| Ollama | `llama3.1:8b` | ✓ | (local URL) |
|
|
92
91
|
|
|
93
|
-
`freeOnly` mode is enabled by default. Paid providers are excluded from fallback unless you explicitly opt in by disabling `freeOnly` in config or setting `CLAI_ALLOW_PAID=1`.
|
|
94
|
-
|
|
95
92
|
```sh
|
|
96
93
|
# Store an API key
|
|
97
94
|
clai set groq gsk_xxxxxxxxxxxxxxxx
|
|
@@ -151,7 +148,6 @@ export OLLAMA_HOST=http://localhost:11434
|
|
|
151
148
|
| `/save <name>` | Save current session |
|
|
152
149
|
| `/cwd <path>` | Change working directory |
|
|
153
150
|
| `/allow <tool>` | Whitelist a tool for the session |
|
|
154
|
-
| `/output [last]` | Toggle full output from the last tool |
|
|
155
151
|
| `/update` | Check for updates |
|
|
156
152
|
| `/exit` | Quit |
|
|
157
153
|
| `/help` | List commands |
|
|
@@ -161,25 +157,25 @@ export OLLAMA_HOST=http://localhost:11434
|
|
|
161
157
|
|
|
162
158
|
| Tool | Description | Risk Level |
|
|
163
159
|
|------------------|--------------------------------------------------------------------|------------|
|
|
164
|
-
| `shell.exec` | Run shell commands
|
|
160
|
+
| `shell.exec` | Run shell commands via execa (120s timeout, streams output) | smart* |
|
|
165
161
|
| `fs.read` | Read files (sandboxed to approved roots) | safe |
|
|
166
162
|
| `fs.write` | Write files (sandboxed) | confirm |
|
|
167
163
|
| `fs.list` | List directory contents | safe |
|
|
168
164
|
| `fs.search` | Search files with ripgrep (falls back to grep) | safe |
|
|
169
165
|
| `pkg.install` | Install packages via detected OS package manager | confirm |
|
|
170
166
|
| `net.scan` | Nmap wrapper for port scanning | confirm |
|
|
171
|
-
| `http.fetch` | HTTP GET/
|
|
167
|
+
| `http.fetch` | HTTP GET/POST with response size limits | safe |
|
|
172
168
|
| `sysinfo` | OS, architecture, shell, and working directory info | safe |
|
|
173
169
|
| `pentest.recon` | Composite: whois + dig + nmap top-100 ports | confirm |
|
|
174
170
|
|
|
175
|
-
> \* **smart** = only
|
|
171
|
+
> \* **smart** = read-only commands (`curl`, `ls`, `whoami`, `gobuster`, `dirb`, etc.) auto-execute; mutating commands require confirmation.
|
|
176
172
|
|
|
177
173
|
## Safety Gate
|
|
178
174
|
|
|
179
175
|
Every tool call passes through a 3-tier classifier:
|
|
180
176
|
|
|
181
|
-
- **`safe`** — Auto-run:
|
|
182
|
-
- **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan
|
|
177
|
+
- **`safe`** — Auto-run: read-only fs, sysinfo, http.fetch, read-only shell commands (`curl`, `ls`, `whoami`, `ifconfig`, `gobuster`, `dirb`, `ffuf`, `nikto`, etc.)
|
|
178
|
+
- **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan
|
|
183
179
|
- **`block`** — Refuse with explanation: `rm -rf /`, fork bombs, public IP scans without authorization, exfiltration patterns
|
|
184
180
|
|
|
185
181
|
### Pentest Authorization
|
|
@@ -190,15 +186,11 @@ Security tools require a one-time acknowledgment:
|
|
|
190
186
|
clai authorize-pentest AGREE
|
|
191
187
|
```
|
|
192
188
|
|
|
193
|
-
Public
|
|
194
|
-
|
|
195
|
-
### Tool Output
|
|
196
|
-
|
|
197
|
-
During long tool runs, clai shows live output in dim text so you can see progress. After the AI summarizes the result, raw output is collapsed. Press `Ctrl+O` on macOS, Linux, or Windows to toggle full output for the last tool. In non-interactive terminals, use `/output last` or open the saved artifact path.
|
|
189
|
+
Public IP scanning is blocked unless the target is private (RFC 1918) or the user explicitly confirms ownership.
|
|
198
190
|
|
|
199
191
|
## Updates
|
|
200
192
|
|
|
201
|
-
clai
|
|
193
|
+
clai checks for updates automatically on startup (every 4 hours, non-blocking). You can also check manually:
|
|
202
194
|
|
|
203
195
|
```sh
|
|
204
196
|
# CLI command
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ChatMessage } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Crude per-char token estimator. Production-grade tokenization differs by
|
|
4
|
+
* provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
|
|
5
|
+
* is enough to decide when to compact. We deliberately err on the side of
|
|
6
|
+
* over-estimating — better to compact one turn too early than to lose state
|
|
7
|
+
* to a provider context-window error.
|
|
8
|
+
*/
|
|
9
|
+
export declare function estimateTokens(text: string): number;
|
|
10
|
+
export declare function estimateMessagesTokens(messages: ChatMessage[]): number;
|
|
11
|
+
export interface CompactOptions {
|
|
12
|
+
/** Soft budget (tokens). When estimated tokens exceed this, compact. */
|
|
13
|
+
budgetTokens?: number | undefined;
|
|
14
|
+
/** Keep this many trailing messages (system + user/assistant pairs). */
|
|
15
|
+
keepRecent?: number | undefined;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Replace older messages with a single condensed "memory" message while
|
|
19
|
+
* preserving the system prompt and the most recent N messages.
|
|
20
|
+
*
|
|
21
|
+
* We do not call the LLM here — that's a future enhancement. The current
|
|
22
|
+
* compaction is mechanical: keep the system prompt; replace the prefix of
|
|
23
|
+
* older turns with a bullet list of the assistant's last lines and the
|
|
24
|
+
* tool calls that produced output. This is conservative and reversible
|
|
25
|
+
* (the artifact files still hold the raw outputs).
|
|
26
|
+
*/
|
|
27
|
+
export declare function compactMessages(messages: ChatMessage[], options?: CompactOptions): ChatMessage[];
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Crude per-char token estimator. Production-grade tokenization differs by
|
|
3
|
+
* provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
|
|
4
|
+
* is enough to decide when to compact. We deliberately err on the side of
|
|
5
|
+
* over-estimating — better to compact one turn too early than to lose state
|
|
6
|
+
* to a provider context-window error.
|
|
7
|
+
*/
|
|
8
|
+
export function estimateTokens(text) {
|
|
9
|
+
return Math.ceil(text.length / 4);
|
|
10
|
+
}
|
|
11
|
+
export function estimateMessagesTokens(messages) {
|
|
12
|
+
let sum = 0;
|
|
13
|
+
for (const message of messages) {
|
|
14
|
+
sum += estimateTokens(message.content) + 4; // role overhead
|
|
15
|
+
}
|
|
16
|
+
return sum;
|
|
17
|
+
}
|
|
18
|
+
const DEFAULT_BUDGET_TOKENS = 24_000;
|
|
19
|
+
const DEFAULT_KEEP_RECENT = 8;
|
|
20
|
+
/**
|
|
21
|
+
* Replace older messages with a single condensed "memory" message while
|
|
22
|
+
* preserving the system prompt and the most recent N messages.
|
|
23
|
+
*
|
|
24
|
+
* We do not call the LLM here — that's a future enhancement. The current
|
|
25
|
+
* compaction is mechanical: keep the system prompt; replace the prefix of
|
|
26
|
+
* older turns with a bullet list of the assistant's last lines and the
|
|
27
|
+
* tool calls that produced output. This is conservative and reversible
|
|
28
|
+
* (the artifact files still hold the raw outputs).
|
|
29
|
+
*/
|
|
30
|
+
export function compactMessages(messages, options = {}) {
|
|
31
|
+
const budget = options.budgetTokens ?? DEFAULT_BUDGET_TOKENS;
|
|
32
|
+
const keepRecent = Math.max(2, options.keepRecent ?? DEFAULT_KEEP_RECENT);
|
|
33
|
+
if (messages.length <= keepRecent + 1)
|
|
34
|
+
return messages;
|
|
35
|
+
if (estimateMessagesTokens(messages) <= budget)
|
|
36
|
+
return messages;
|
|
37
|
+
// Always keep the system prompt (index 0 if it's a system message).
|
|
38
|
+
const head = [];
|
|
39
|
+
let start = 0;
|
|
40
|
+
if (messages[0]?.role === "system") {
|
|
41
|
+
head.push(messages[0]);
|
|
42
|
+
start = 1;
|
|
43
|
+
}
|
|
44
|
+
const tail = messages.slice(Math.max(start, messages.length - keepRecent));
|
|
45
|
+
const middle = messages.slice(start, messages.length - tail.length);
|
|
46
|
+
if (middle.length === 0)
|
|
47
|
+
return messages;
|
|
48
|
+
const bullets = [];
|
|
49
|
+
for (const msg of middle) {
|
|
50
|
+
if (msg.role === "user") {
|
|
51
|
+
bullets.push(`- user asked: ${oneLine(msg.content, 200)}`);
|
|
52
|
+
}
|
|
53
|
+
else if (msg.role === "assistant") {
|
|
54
|
+
const line = oneLine(msg.content, 200);
|
|
55
|
+
if (line)
|
|
56
|
+
bullets.push(`- assistant: ${line}`);
|
|
57
|
+
}
|
|
58
|
+
else if (msg.role === "tool") {
|
|
59
|
+
bullets.push(`- tool result: ${oneLine(msg.content, 200)}`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const memo = {
|
|
63
|
+
role: "system",
|
|
64
|
+
content: `Earlier turns in this session, summarized to fit the context budget. Full artifacts (when produced) are saved on disk and can be expanded with /output.\n\n` +
|
|
65
|
+
bullets.join("\n"),
|
|
66
|
+
};
|
|
67
|
+
return [...head, memo, ...tail];
|
|
68
|
+
}
|
|
69
|
+
function oneLine(text, maxChars) {
|
|
70
|
+
const cleaned = text.replace(/\s+/g, " ").trim();
|
|
71
|
+
if (cleaned.length <= maxChars)
|
|
72
|
+
return cleaned;
|
|
73
|
+
return `${cleaned.slice(0, maxChars - 1)}…`;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=context-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-manager.js","sourceRoot":"","sources":["../../src/agent/context-manager.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAuB;IAC5D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;IAC9D,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AASD,MAAM,qBAAqB,GAAG,MAAM,CAAC;AACrC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAuB,EACvB,UAA0B,EAAE;IAE5B,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB,CAAC,CAAC;IAC1E,IAAI,QAAQ,CAAC,MAAM,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IACvD,IAAI,sBAAsB,CAAC,QAAQ,CAAC,IAAI,MAAM;QAAE,OAAO,QAAQ,CAAC;IAEhE,oEAAoE;IACpE,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,QAAQ,EAAE,CAAC;QACnC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC;IAED,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IAC3E,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACpE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAC;IAEzC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,iBAAiB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YACvC,IAAI,IAAI;gBAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,kBAAkB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAgB;QACxB,IAAI,EAAE,QAAQ;QACd,OAAO,EACL,6JAA6J;YAC7J,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;KACrB,CAAC;IAEF,OAAO,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,OAAO,CAAC,IAAY,EAAE,QAAgB;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,OAAO,CAAC;IAC/C,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC"}
|
package/dist/agent/runner.d.ts
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
import type { ChatMessage, ProviderId, ToolCall, ToolResult } from "../types.js";
|
|
2
|
+
export interface SessionPolicy {
|
|
3
|
+
/** Tools the user authorized once during this REPL session. Not persisted. */
|
|
4
|
+
allow: Set<string>;
|
|
5
|
+
/** Mutable flag so the runner can flip pentest auth for this session only. */
|
|
6
|
+
pentestAuthorized: {
|
|
7
|
+
value: boolean;
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
export declare function createSessionPolicy(): SessionPolicy;
|
|
2
11
|
export interface AgentRunOptions {
|
|
3
12
|
provider?: ProviderId | undefined;
|
|
4
13
|
model?: string | undefined;
|
|
@@ -8,6 +17,17 @@ export interface AgentRunOptions {
|
|
|
8
17
|
signal?: AbortSignal | undefined;
|
|
9
18
|
onToolStart?: ((call: ToolCall) => void) | undefined;
|
|
10
19
|
onToolResult?: ((call: ToolCall, result: ToolResult) => void) | undefined;
|
|
20
|
+
session?: SessionPolicy | undefined;
|
|
21
|
+
}
|
|
22
|
+
export interface ParseToolCallOptions {
|
|
23
|
+
/**
|
|
24
|
+
* When true, only formats that are explicitly tool-call delimited are
|
|
25
|
+
* accepted: ```tool fenced JSON, <tool_call> XML, and the Kimi sentinel
|
|
26
|
+
* token format. Loose formats (any fenced block, heading-prefix, trailing
|
|
27
|
+
* JSON) are dropped — useful when models routinely emit JSON examples in
|
|
28
|
+
* prose. Default is `false` so existing free-tier models keep working.
|
|
29
|
+
*/
|
|
30
|
+
strict?: boolean | undefined;
|
|
11
31
|
}
|
|
12
|
-
export declare function parseToolCall(text: string): ToolCall | undefined;
|
|
32
|
+
export declare function parseToolCall(text: string, options?: ParseToolCallOptions): ToolCall | undefined;
|
|
13
33
|
export declare function runAgentLoop(prompt: string, options?: AgentRunOptions): Promise<string>;
|
package/dist/agent/runner.js
CHANGED
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
import { confirm } from "@inquirer/prompts";
|
|
2
2
|
import chalk from "chalk";
|
|
3
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
4
|
+
import { homedir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
3
6
|
import { streamWithProvider } from "../llm/router.js";
|
|
4
7
|
import { renderAgentSystemPrompt } from "../prompts/index.js";
|
|
5
|
-
import { getConfig
|
|
8
|
+
import { getConfig } from "../store/config.js";
|
|
6
9
|
import { classifyToolCall, isPentestToolCall } from "../safety/classifier.js";
|
|
7
10
|
import { availableToolNames, runToolCall } from "../tools/registry.js";
|
|
11
|
+
import { reduceToolOutput } from "../tools/policies/output-policy.js";
|
|
12
|
+
import { formatViewportHint, registerViewport } from "../ui/output-pane.js";
|
|
13
|
+
import { compactMessages, estimateMessagesTokens } from "./context-manager.js";
|
|
8
14
|
import { auditLog } from "../store/logs.js";
|
|
9
15
|
import { loadProjectContext } from "../store/project.js";
|
|
16
|
+
import { loadScope, isScopeActive } from "../store/scope.js";
|
|
10
17
|
import { ensureProviderConfigured } from "../commands/providers.js";
|
|
11
|
-
import { rememberThinkingFromText, renderThinkingSummary } from "../ui/thinking.js";
|
|
18
|
+
import { rememberThinkingFromText, renderThinkingSummary, } from "../ui/thinking.js";
|
|
12
19
|
import { renderMarkdown } from "../ui/markdown.js";
|
|
13
20
|
import { startThinkingSpinner } from "../ui/spinner.js";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
21
|
+
export function createSessionPolicy() {
|
|
22
|
+
return { allow: new Set(), pentestAuthorized: { value: false } };
|
|
23
|
+
}
|
|
17
24
|
function tryParseCall(raw) {
|
|
18
25
|
try {
|
|
19
26
|
const parsed = JSON.parse(raw.trim());
|
|
@@ -72,7 +79,7 @@ function stripSentinelTokens(text) {
|
|
|
72
79
|
.replace(/<\|tool_[a-z_]*\|>/gi, "")
|
|
73
80
|
.trim();
|
|
74
81
|
}
|
|
75
|
-
export function parseToolCall(text) {
|
|
82
|
+
export function parseToolCall(text, options = {}) {
|
|
76
83
|
// 1. ```tool ... ``` (standard format)
|
|
77
84
|
const fenced = text.match(/```tool\s*\n?([\s\S]*?)```/i);
|
|
78
85
|
if (fenced?.[1]) {
|
|
@@ -80,12 +87,50 @@ export function parseToolCall(text) {
|
|
|
80
87
|
if (call)
|
|
81
88
|
return call;
|
|
82
89
|
}
|
|
83
|
-
// 2.
|
|
84
|
-
|
|
85
|
-
|
|
90
|
+
// 2. <tool_call>...</tool_call>
|
|
91
|
+
const xml = text.match(/<tool_call>([\s\S]*?)<\/tool_call>/i);
|
|
92
|
+
if (xml?.[1]) {
|
|
93
|
+
const call = tryParseCall(xml[1]);
|
|
94
|
+
if (call)
|
|
95
|
+
return call;
|
|
96
|
+
}
|
|
97
|
+
// 3. Kimi/Moonshot sentinel format (used by kimi-k2 family on NIM).
|
|
86
98
|
const kimi = parseKimiToolCall(text);
|
|
87
99
|
if (kimi)
|
|
88
100
|
return kimi;
|
|
101
|
+
// In strict mode, stop here. Headings, generic fenced blocks, and trailing
|
|
102
|
+
// JSON are too easy to accidentally trigger when the model is showing a
|
|
103
|
+
// worked example.
|
|
104
|
+
if (options.strict)
|
|
105
|
+
return undefined;
|
|
106
|
+
// 4. ### tool / ## tool / # tool heading + JSON
|
|
107
|
+
const heading = text.match(/#{1,3}\s*tool\s*\n\s*(\{[\s\S]*\})/i);
|
|
108
|
+
if (heading?.[1]) {
|
|
109
|
+
const call = tryParseCall(heading[1]);
|
|
110
|
+
if (call)
|
|
111
|
+
return call;
|
|
112
|
+
}
|
|
113
|
+
// 5. **tool** heading + JSON
|
|
114
|
+
const bold = text.match(/\*\*tool\*\*\s*\n\s*(\{[\s\S]*\})/i);
|
|
115
|
+
if (bold?.[1]) {
|
|
116
|
+
const call = tryParseCall(bold[1]);
|
|
117
|
+
if (call)
|
|
118
|
+
return call;
|
|
119
|
+
}
|
|
120
|
+
// 6. Any fenced block (```json, ```, etc.) containing name+args
|
|
121
|
+
const anyFenced = text.match(/```\w*\s*\n?([\s\S]*?)```/);
|
|
122
|
+
if (anyFenced?.[1]) {
|
|
123
|
+
const call = tryParseCall(anyFenced[1]);
|
|
124
|
+
if (call)
|
|
125
|
+
return call;
|
|
126
|
+
}
|
|
127
|
+
// 7. Trailing JSON object with "name" and "args"
|
|
128
|
+
const trailingJson = text.match(/(\{"name"\s*:\s*"[^"]+"\s*,\s*"args"\s*:\s*\{[\s\S]*?\}\s*\})\s*$/);
|
|
129
|
+
if (trailingJson?.[1]) {
|
|
130
|
+
const call = tryParseCall(trailingJson[1]);
|
|
131
|
+
if (call)
|
|
132
|
+
return call;
|
|
133
|
+
}
|
|
89
134
|
return undefined;
|
|
90
135
|
}
|
|
91
136
|
/** Extract the text before the tool call block for display purposes */
|
|
@@ -130,32 +175,31 @@ function formatToolArgs(call) {
|
|
|
130
175
|
return JSON.stringify(call.args);
|
|
131
176
|
}
|
|
132
177
|
function isAbortError(error, signal) {
|
|
133
|
-
return Boolean(signal?.aborted) ||
|
|
178
|
+
return (Boolean(signal?.aborted) ||
|
|
179
|
+
(error instanceof Error && error.name === "AbortError"));
|
|
180
|
+
}
|
|
181
|
+
function safeArtifactName(name) {
|
|
182
|
+
return (name.replace(/[^a-z0-9_.-]+/gi, "-").replace(/^-+|-+$/g, "") ||
|
|
183
|
+
"tool-output");
|
|
134
184
|
}
|
|
135
185
|
async function saveToolOutput(call, output) {
|
|
136
186
|
if (!output.trim())
|
|
137
187
|
return undefined;
|
|
138
|
-
|
|
188
|
+
const dir = join(homedir(), ".clai", "outputs");
|
|
189
|
+
await mkdir(dir, { recursive: true });
|
|
190
|
+
const stamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
191
|
+
const path = join(dir, `${stamp}-${safeArtifactName(call.name)}.txt`);
|
|
192
|
+
await writeFile(path, `${output}\n`, "utf8");
|
|
193
|
+
return path;
|
|
139
194
|
}
|
|
140
195
|
function summarizeOutput(output, maxChars = 8_000) {
|
|
141
196
|
if (output.length <= maxChars)
|
|
142
197
|
return { text: output, truncated: false };
|
|
143
198
|
const lines = output.split(/\r?\n/);
|
|
144
|
-
const signalLines = lines.filter((line) => /\b(open|vulnerable|critical|high|medium|found|success|injectable|CVE-\d{4}-\d+|200|201|204|301|302|307|308|401|403|500|error|failed)\b/i.test(line));
|
|
145
199
|
const head = [];
|
|
146
200
|
const tail = [];
|
|
147
201
|
let used = 0;
|
|
148
|
-
const
|
|
149
|
-
const half = Math.floor((maxChars - signalBudget) / 2);
|
|
150
|
-
const signals = [];
|
|
151
|
-
for (const line of signalLines) {
|
|
152
|
-
const cost = line.length + 1;
|
|
153
|
-
if (used + cost > signalBudget)
|
|
154
|
-
break;
|
|
155
|
-
signals.push(line);
|
|
156
|
-
used += cost;
|
|
157
|
-
}
|
|
158
|
-
used = 0;
|
|
202
|
+
const half = Math.floor(maxChars / 2);
|
|
159
203
|
for (const line of lines) {
|
|
160
204
|
const cost = line.length + 1;
|
|
161
205
|
if (used + cost > half)
|
|
@@ -175,40 +219,71 @@ function summarizeOutput(output, maxChars = 8_000) {
|
|
|
175
219
|
return {
|
|
176
220
|
text: [
|
|
177
221
|
...head,
|
|
178
|
-
...(signals.length > 0
|
|
179
|
-
? [`... high-signal lines from omitted output ...`, ...signals]
|
|
180
|
-
: []),
|
|
181
222
|
`... (${lines.length.toLocaleString()} output lines truncated) ...`,
|
|
182
223
|
...tail,
|
|
183
224
|
].join("\n"),
|
|
184
225
|
truncated: true,
|
|
185
226
|
};
|
|
186
227
|
}
|
|
187
|
-
function formatToolContext(result) {
|
|
188
|
-
const output =
|
|
189
|
-
|
|
190
|
-
|
|
228
|
+
function formatToolContext(call, result) {
|
|
229
|
+
const output = result.output.trim();
|
|
230
|
+
if (!output)
|
|
231
|
+
return "";
|
|
232
|
+
let reduced;
|
|
233
|
+
try {
|
|
234
|
+
const command = call.name === "shell.exec" ? String(call.args.command ?? "") : call.name;
|
|
235
|
+
const policy = reduceToolOutput(output, {
|
|
236
|
+
toolName: call.name,
|
|
237
|
+
command,
|
|
238
|
+
});
|
|
239
|
+
reduced = policy.summary.trim();
|
|
240
|
+
}
|
|
241
|
+
catch {
|
|
242
|
+
reduced = undefined;
|
|
243
|
+
}
|
|
244
|
+
// Hard cap on the reduced text — reducers should already be small, but
|
|
245
|
+
// never let one accidentally explode model context.
|
|
246
|
+
const base = reduced && reduced.length > 0 ? reduced : output;
|
|
247
|
+
const summary = summarizeOutput(base, 8_000);
|
|
248
|
+
const saved = result.outputPath
|
|
249
|
+
? `\nFull output saved to: ${result.outputPath}`
|
|
250
|
+
: "";
|
|
191
251
|
return `${summary.text}${saved}`.trim();
|
|
192
252
|
}
|
|
193
|
-
async function ensurePentestAuthorization(call, autoConfirm) {
|
|
194
|
-
|
|
195
|
-
|
|
253
|
+
async function ensurePentestAuthorization(call, autoConfirm, session) {
|
|
254
|
+
if (!isPentestToolCall(call))
|
|
255
|
+
return true;
|
|
256
|
+
// Persistent auth (via `clai authorize-pentest AGREE`) wins.
|
|
257
|
+
if (getConfig().pentestAuthorized)
|
|
258
|
+
return true;
|
|
259
|
+
// Session auth flipped earlier in this session — no re-prompt.
|
|
260
|
+
if (session.pentestAuthorized.value)
|
|
196
261
|
return true;
|
|
197
262
|
if (autoConfirm) {
|
|
263
|
+
// -y is session-scoped only. We do NOT touch the persistent config so
|
|
264
|
+
// a one-shot `-y` cannot silently authorize later interactive runs.
|
|
265
|
+
session.pentestAuthorized.value = true;
|
|
198
266
|
return true;
|
|
199
267
|
}
|
|
200
268
|
const ok = await confirm({
|
|
201
|
-
message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm?"),
|
|
269
|
+
message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm for this session?"),
|
|
202
270
|
default: false,
|
|
203
271
|
});
|
|
204
272
|
if (!ok)
|
|
205
273
|
return false;
|
|
206
|
-
|
|
274
|
+
session.pentestAuthorized.value = true;
|
|
207
275
|
return true;
|
|
208
276
|
}
|
|
209
|
-
async function confirmToolExecution(call, autoConfirm) {
|
|
277
|
+
async function confirmToolExecution(call, autoConfirm, session) {
|
|
210
278
|
const config = getConfig();
|
|
211
|
-
if (autoConfirm
|
|
279
|
+
if (autoConfirm)
|
|
280
|
+
return true;
|
|
281
|
+
if (session.allow.has(call.name))
|
|
282
|
+
return true;
|
|
283
|
+
// Persistent allowlist kept for backwards compat with users who set it
|
|
284
|
+
// through `clai config` directly, but `/allow` only mutates the session
|
|
285
|
+
// set so authorizations never leak across processes.
|
|
286
|
+
if (config.allowAlwaysTools.includes(call.name))
|
|
212
287
|
return true;
|
|
213
288
|
return confirm({
|
|
214
289
|
message: chalk.yellow(` run ${call.name}: ${formatToolArgs(call)}?`),
|
|
@@ -221,7 +296,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
221
296
|
const projectContext = await loadProjectContext();
|
|
222
297
|
const systemPrompt = renderAgentSystemPrompt(availableToolNames().join(", "));
|
|
223
298
|
const fullSystemPrompt = projectContext
|
|
224
|
-
? `${systemPrompt}\n\
|
|
299
|
+
? `${systemPrompt}\n\nProject context from .clai/context.md:\n${projectContext}`
|
|
225
300
|
: systemPrompt;
|
|
226
301
|
const messages = [
|
|
227
302
|
{ role: "system", content: fullSystemPrompt },
|
|
@@ -232,6 +307,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
232
307
|
await ensureProviderConfigured(provider);
|
|
233
308
|
let model = options.model ?? config.defaultModel;
|
|
234
309
|
let lastAnswer = "";
|
|
310
|
+
const session = options.session ?? createSessionPolicy();
|
|
235
311
|
for (let step = 0; step < maxSteps; step += 1) {
|
|
236
312
|
options.signal?.throwIfAborted();
|
|
237
313
|
// Buffer LLM output so tool JSON and hidden thinking are not printed raw.
|
|
@@ -245,7 +321,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
245
321
|
completion = await streamWithProvider({
|
|
246
322
|
provider,
|
|
247
323
|
model,
|
|
248
|
-
messages
|
|
324
|
+
messages,
|
|
249
325
|
temperature: 0.2,
|
|
250
326
|
// Reasoning models can spend a lot on hidden thinking; give
|
|
251
327
|
// them headroom so the visible answer / tool call isn't
|
|
@@ -283,7 +359,9 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
283
359
|
provider = completion.provider;
|
|
284
360
|
model = completion.model;
|
|
285
361
|
const assistantText = rememberThinkingFromText(completion.text);
|
|
286
|
-
const call = parseToolCall(assistantText.visible
|
|
362
|
+
const call = parseToolCall(assistantText.visible, {
|
|
363
|
+
strict: getConfig().parserStrict,
|
|
364
|
+
});
|
|
287
365
|
if (!call) {
|
|
288
366
|
// Detect the case where the model emitted sentinel-style tool-call
|
|
289
367
|
// markers but the body was malformed or truncated. Printing those
|
|
@@ -309,10 +387,6 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
309
387
|
if (!cleaned.endsWith("\n"))
|
|
310
388
|
process.stdout.write("\n");
|
|
311
389
|
}
|
|
312
|
-
updateLastToolSummary(cleaned);
|
|
313
|
-
if (hasToolOutputSnapshot()) {
|
|
314
|
-
process.stdout.write(`${renderToolOutputHint()}\n`);
|
|
315
|
-
}
|
|
316
390
|
if (assistantText.hasThinking) {
|
|
317
391
|
process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
|
|
318
392
|
}
|
|
@@ -329,10 +403,17 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
329
403
|
process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
|
|
330
404
|
}
|
|
331
405
|
messages.push({ role: "assistant", content: assistantText.visible });
|
|
332
|
-
const
|
|
333
|
-
|
|
406
|
+
const scope = await loadScope();
|
|
407
|
+
const decision = classifyToolCall(call, { scope });
|
|
408
|
+
await auditLog("tool.classified", {
|
|
409
|
+
call,
|
|
410
|
+
decision,
|
|
411
|
+
scope: isScopeActive(scope) ? scope.name ?? "(unnamed)" : "(none)",
|
|
412
|
+
});
|
|
334
413
|
// Show tool call
|
|
335
|
-
process.stdout.write(chalk.cyan(` ▶ ${call.name}`) +
|
|
414
|
+
process.stdout.write(chalk.cyan(` ▶ ${call.name}`) +
|
|
415
|
+
chalk.gray(` ${formatToolArgs(call)}`) +
|
|
416
|
+
"\n");
|
|
336
417
|
if (decision.level === "block") {
|
|
337
418
|
process.stdout.write(chalk.red(` ✗ blocked: ${decision.reason}`) + "\n");
|
|
338
419
|
lastAnswer = `Blocked: ${call.name} — ${decision.reason}`;
|
|
@@ -340,8 +421,10 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
340
421
|
}
|
|
341
422
|
// Pentest authorization — if user confirms this, skip the per-tool confirm
|
|
342
423
|
let pentestJustConfirmed = false;
|
|
343
|
-
const needsPentestAuth = isPentestToolCall(call) &&
|
|
344
|
-
|
|
424
|
+
const needsPentestAuth = isPentestToolCall(call) &&
|
|
425
|
+
!getConfig().pentestAuthorized &&
|
|
426
|
+
!session.pentestAuthorized.value;
|
|
427
|
+
const authorized = await ensurePentestAuthorization(call, Boolean(options.autoConfirm), session);
|
|
345
428
|
if (!authorized) {
|
|
346
429
|
lastAnswer = "Pentest authorization not confirmed.";
|
|
347
430
|
process.stdout.write(chalk.red(` ✗ ${lastAnswer}`) + "\n");
|
|
@@ -352,7 +435,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
352
435
|
}
|
|
353
436
|
// Confirm if needed (safe tools auto-execute, pentest-auth'd tools skip)
|
|
354
437
|
if (decision.level === "confirm" && !pentestJustConfirmed) {
|
|
355
|
-
const ok = await confirmToolExecution(call, Boolean(options.autoConfirm));
|
|
438
|
+
const ok = await confirmToolExecution(call, Boolean(options.autoConfirm), session);
|
|
356
439
|
if (!ok) {
|
|
357
440
|
lastAnswer = "Cancelled.";
|
|
358
441
|
process.stdout.write(chalk.yellow(` ✗ cancelled`) + "\n");
|
|
@@ -366,12 +449,13 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
366
449
|
let liveBytes = 0;
|
|
367
450
|
const liveCap = 16_000; // Stop streaming after this many bytes to avoid flooding the terminal.
|
|
368
451
|
let liveTruncatedNotified = false;
|
|
369
|
-
const livePane = createToolLivePane(formatToolArgs(call));
|
|
370
452
|
const printLive = (chunk) => {
|
|
371
453
|
// Suppress live preview for fs.read / fs.list — those are read-only
|
|
372
454
|
// and the final summary is already concise. Stream shell-style tools
|
|
373
455
|
// (shell.exec, net.scan, pentest.recon, pkg.install).
|
|
374
|
-
if (call.name === "fs.read" ||
|
|
456
|
+
if (call.name === "fs.read" ||
|
|
457
|
+
call.name === "fs.list" ||
|
|
458
|
+
call.name === "fs.search")
|
|
375
459
|
return;
|
|
376
460
|
if (liveBytes >= liveCap) {
|
|
377
461
|
if (!liveTruncatedNotified) {
|
|
@@ -383,7 +467,11 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
383
467
|
const remaining = liveCap - liveBytes;
|
|
384
468
|
const slice = chunk.length > remaining ? chunk.slice(0, remaining) : chunk;
|
|
385
469
|
liveBytes += slice.length;
|
|
386
|
-
|
|
470
|
+
// Indent each line so live output lines up under the tool call.
|
|
471
|
+
const indented = slice.replace(/\r/g, "").replace(/\n(?!$)/g, "\n ");
|
|
472
|
+
process.stdout.write(chalk.dim(indented.startsWith("\n")
|
|
473
|
+
? indented
|
|
474
|
+
: ` ${indented}`.replace(/^ /, " ")));
|
|
387
475
|
};
|
|
388
476
|
try {
|
|
389
477
|
result = await runToolCall(call, {
|
|
@@ -395,10 +483,10 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
395
483
|
},
|
|
396
484
|
});
|
|
397
485
|
// Newline separator if live output didn't already end with one.
|
|
398
|
-
|
|
486
|
+
if (liveBytes > 0)
|
|
487
|
+
process.stdout.write("\n");
|
|
399
488
|
}
|
|
400
489
|
catch (toolError) {
|
|
401
|
-
livePane.finish();
|
|
402
490
|
if (isAbortError(toolError, options.signal)) {
|
|
403
491
|
lastAnswer = "Aborted.";
|
|
404
492
|
process.stdout.write(chalk.yellow(" ⏹ Aborted.\n"));
|
|
@@ -409,26 +497,18 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
409
497
|
}
|
|
410
498
|
const output = result.output.trim();
|
|
411
499
|
const displayMax = 6_000;
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
500
|
+
// If the tool already produced an artifact (shell.exec now streams to one
|
|
501
|
+
// as it runs), respect that path. Otherwise, fall back to the post-hoc
|
|
502
|
+
// save for tools that return their full output in memory.
|
|
503
|
+
const savedOutputPath = result.outputPath ??
|
|
504
|
+
(output.length > displayMax
|
|
505
|
+
? await saveToolOutput(call, output)
|
|
506
|
+
: undefined);
|
|
415
507
|
const resultWithArtifact = {
|
|
416
508
|
...result,
|
|
417
509
|
outputPath: savedOutputPath,
|
|
418
|
-
truncated: result.truncated
|
|
419
|
-
artifacts: result.artifacts ?? (savedOutputPath
|
|
420
|
-
? [{ path: savedOutputPath, kind: "raw", redacted: true }]
|
|
421
|
-
: undefined),
|
|
510
|
+
truncated: result.truncated ?? Boolean(savedOutputPath),
|
|
422
511
|
};
|
|
423
|
-
if (output || savedOutputPath) {
|
|
424
|
-
rememberToolOutput({
|
|
425
|
-
id: `${Date.now()}-${step}`,
|
|
426
|
-
label: `${call.name} ${formatToolArgs(call)}`.trim(),
|
|
427
|
-
artifactPath: savedOutputPath,
|
|
428
|
-
fullText: savedOutputPath ? undefined : output,
|
|
429
|
-
summary: result.summary ?? result.modelContext,
|
|
430
|
-
});
|
|
431
|
-
}
|
|
432
512
|
options.onToolResult?.(call, resultWithArtifact);
|
|
433
513
|
await auditLog("tool.result", {
|
|
434
514
|
call,
|
|
@@ -446,7 +526,7 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
446
526
|
: displaySummary.text;
|
|
447
527
|
// If we already streamed live output for this call, skip re-printing
|
|
448
528
|
// the same bytes. Just note where the full output lives if it was saved.
|
|
449
|
-
if (liveBytes > 0
|
|
529
|
+
if (liveBytes > 0) {
|
|
450
530
|
if (savedOutputPath) {
|
|
451
531
|
process.stdout.write(chalk.dim(` full output saved to ${savedOutputPath}\n`));
|
|
452
532
|
}
|
|
@@ -460,11 +540,34 @@ export async function runAgentLoop(prompt, options = {}) {
|
|
|
460
540
|
process.stdout.write(chalk.yellow(" ⏹ Aborted.\n"));
|
|
461
541
|
return lastAnswer;
|
|
462
542
|
}
|
|
463
|
-
const contextOutput = formatToolContext(resultWithArtifact);
|
|
543
|
+
const contextOutput = formatToolContext(call, resultWithArtifact);
|
|
544
|
+
// Register a collapse/expand viewport so the user can pull the full raw
|
|
545
|
+
// output back with Ctrl+O or `/output last` after the AI summary lands.
|
|
546
|
+
if (output) {
|
|
547
|
+
const viewport = registerViewport({
|
|
548
|
+
toolName: call.name,
|
|
549
|
+
argsDisplay: formatToolArgs(call),
|
|
550
|
+
artifactPath: savedOutputPath,
|
|
551
|
+
summary: contextOutput,
|
|
552
|
+
});
|
|
553
|
+
process.stdout.write(`${formatViewportHint(viewport)}\n`);
|
|
554
|
+
}
|
|
464
555
|
messages.push({
|
|
465
556
|
role: "tool",
|
|
466
557
|
content: `Tool ${call.name} result (exit=${result.exitCode ?? 0}, ok=${result.ok}):\n${contextOutput}`,
|
|
467
558
|
});
|
|
559
|
+
// Compact older messages when the running estimate exceeds budget so
|
|
560
|
+
// free-tier context windows are not blown by long pentest sessions.
|
|
561
|
+
if (estimateMessagesTokens(messages) > 24_000) {
|
|
562
|
+
const compacted = compactMessages(messages);
|
|
563
|
+
if (compacted.length < messages.length) {
|
|
564
|
+
messages.splice(0, messages.length, ...compacted);
|
|
565
|
+
await auditLog("agent.compact", {
|
|
566
|
+
newLength: messages.length,
|
|
567
|
+
estimatedTokens: estimateMessagesTokens(messages),
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
}
|
|
468
571
|
}
|
|
469
572
|
lastAnswer = `Stopped after ${maxSteps} steps.`;
|
|
470
573
|
process.stdout.write(chalk.yellow(lastAnswer) + "\n");
|