@pentoshi/clai 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +9 -17
  2. package/dist/agent/context-manager.d.ts +27 -0
  3. package/dist/agent/context-manager.js +75 -0
  4. package/dist/agent/context-manager.js.map +1 -0
  5. package/dist/agent/runner.d.ts +21 -1
  6. package/dist/agent/runner.js +176 -73
  7. package/dist/agent/runner.js.map +1 -1
  8. package/dist/commands/doctor.js +20 -2
  9. package/dist/commands/doctor.js.map +1 -1
  10. package/dist/commands/update.js +11 -2
  11. package/dist/commands/update.js.map +1 -1
  12. package/dist/index.js +156 -5
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm/anthropic.js +29 -38
  15. package/dist/llm/anthropic.js.map +1 -1
  16. package/dist/llm/gemini.js +31 -40
  17. package/dist/llm/gemini.js.map +1 -1
  18. package/dist/llm/http.d.ts +21 -0
  19. package/dist/llm/http.js +140 -1
  20. package/dist/llm/http.js.map +1 -1
  21. package/dist/llm/ollama.js +18 -27
  22. package/dist/llm/ollama.js.map +1 -1
  23. package/dist/llm/router.d.ts +7 -0
  24. package/dist/llm/router.js +14 -23
  25. package/dist/llm/router.js.map +1 -1
  26. package/dist/modes/agent.d.ts +4 -2
  27. package/dist/modes/agent.js +2 -2
  28. package/dist/modes/agent.js.map +1 -1
  29. package/dist/modes/ask.js +3 -4
  30. package/dist/modes/ask.js.map +1 -1
  31. package/dist/os/pkgmgr.d.ts +7 -1
  32. package/dist/os/pkgmgr.js +97 -18
  33. package/dist/os/pkgmgr.js.map +1 -1
  34. package/dist/prompts/index.d.ts +7 -0
  35. package/dist/prompts/index.js +12 -4
  36. package/dist/prompts/index.js.map +1 -1
  37. package/dist/repl.d.ts +1 -0
  38. package/dist/repl.js +283 -43
  39. package/dist/repl.js.map +1 -1
  40. package/dist/safety/classifier.d.ts +5 -1
  41. package/dist/safety/classifier.js +244 -88
  42. package/dist/safety/classifier.js.map +1 -1
  43. package/dist/safety/patterns.d.ts +48 -1
  44. package/dist/safety/patterns.js +140 -7
  45. package/dist/safety/patterns.js.map +1 -1
  46. package/dist/store/config.d.ts +21 -3
  47. package/dist/store/config.js +28 -9
  48. package/dist/store/config.js.map +1 -1
  49. package/dist/store/history.d.ts +9 -0
  50. package/dist/store/history.js +58 -1
  51. package/dist/store/history.js.map +1 -1
  52. package/dist/store/keys.d.ts +2 -1
  53. package/dist/store/keys.js +7 -3
  54. package/dist/store/keys.js.map +1 -1
  55. package/dist/store/logs.d.ts +7 -0
  56. package/dist/store/logs.js +39 -1
  57. package/dist/store/logs.js.map +1 -1
  58. package/dist/store/project.d.ts +1 -0
  59. package/dist/store/project.js +34 -9
  60. package/dist/store/project.js.map +1 -1
  61. package/dist/store/scope.d.ts +29 -0
  62. package/dist/store/scope.js +113 -0
  63. package/dist/store/scope.js.map +1 -0
  64. package/dist/tools/fs.d.ts +6 -2
  65. package/dist/tools/fs.js +99 -87
  66. package/dist/tools/fs.js.map +1 -1
  67. package/dist/tools/http.d.ts +5 -3
  68. package/dist/tools/http.js +170 -31
  69. package/dist/tools/http.js.map +1 -1
  70. package/dist/tools/policies/output-policy.d.ts +13 -0
  71. package/dist/tools/policies/output-policy.js +56 -0
  72. package/dist/tools/policies/output-policy.js.map +1 -0
  73. package/dist/tools/reducers/ffuf.d.ts +6 -0
  74. package/dist/tools/reducers/ffuf.js +74 -0
  75. package/dist/tools/reducers/ffuf.js.map +1 -0
  76. package/dist/tools/reducers/generic.d.ts +2 -0
  77. package/dist/tools/reducers/generic.js +60 -0
  78. package/dist/tools/reducers/generic.js.map +1 -0
  79. package/dist/tools/reducers/gobuster.d.ts +2 -0
  80. package/dist/tools/reducers/gobuster.js +36 -0
  81. package/dist/tools/reducers/gobuster.js.map +1 -0
  82. package/dist/tools/reducers/httpx.d.ts +2 -0
  83. package/dist/tools/reducers/httpx.js +38 -0
  84. package/dist/tools/reducers/httpx.js.map +1 -0
  85. package/dist/tools/reducers/nmap.d.ts +7 -0
  86. package/dist/tools/reducers/nmap.js +82 -0
  87. package/dist/tools/reducers/nmap.js.map +1 -0
  88. package/dist/tools/reducers/nuclei.d.ts +2 -0
  89. package/dist/tools/reducers/nuclei.js +51 -0
  90. package/dist/tools/reducers/nuclei.js.map +1 -0
  91. package/dist/tools/reducers/sqlmap.d.ts +2 -0
  92. package/dist/tools/reducers/sqlmap.js +39 -0
  93. package/dist/tools/reducers/sqlmap.js.map +1 -0
  94. package/dist/tools/reducers/subdomains.d.ts +6 -0
  95. package/dist/tools/reducers/subdomains.js +31 -0
  96. package/dist/tools/reducers/subdomains.js.map +1 -0
  97. package/dist/tools/reducers/types.d.ts +14 -0
  98. package/dist/tools/reducers/types.js +2 -0
  99. package/dist/tools/reducers/types.js.map +1 -0
  100. package/dist/tools/registry.d.ts +1 -1
  101. package/dist/tools/registry.js +223 -79
  102. package/dist/tools/registry.js.map +1 -1
  103. package/dist/tools/shell.d.ts +45 -4
  104. package/dist/tools/shell.js +419 -88
  105. package/dist/tools/shell.js.map +1 -1
  106. package/dist/tools/validate.d.ts +37 -0
  107. package/dist/tools/validate.js +144 -0
  108. package/dist/tools/validate.js.map +1 -0
  109. package/dist/types.d.ts +7 -15
  110. package/dist/ui/keys.d.ts +21 -0
  111. package/dist/ui/keys.js +13 -0
  112. package/dist/ui/keys.js.map +1 -0
  113. package/dist/ui/output-pane.d.ts +31 -0
  114. package/dist/ui/output-pane.js +81 -0
  115. package/dist/ui/output-pane.js.map +1 -0
  116. package/package.json +1 -1
package/README.md CHANGED
@@ -69,11 +69,10 @@ clai -y "list the 10 largest files in my home directory"
69
69
  - **`/agent` mode** — Agentic. AI plans, then executes shell commands, edits files, installs missing tools, parses output, and continues until the goal is met.
70
70
  - **7 LLM providers** — Groq, Google Gemini, OpenRouter, OpenAI, Anthropic, NVIDIA NIM, and Ollama (local). All with streaming.
71
71
  - **10 built-in tools** — `shell.exec`, `fs.read`, `fs.write`, `fs.list`, `fs.search`, `pkg.install`, `net.scan`, `http.fetch`, `sysinfo`, `pentest.recon`.
72
- - **Smart safety gate** — Low-risk commands auto-execute; mutating, network, secret-touching, or shell-control commands require confirmation; destructive patterns are blocked.
73
- - **Bounded tool output** — Long scan output is streamed lightly while running, saved to artifacts when needed, and reduced before it reaches the model.
72
+ - **Smart safety gate** — Read-only commands auto-execute; mutating commands require confirmation; destructive patterns are blocked.
74
73
  - **Cross-platform** — macOS, Linux, and Windows. Detects OS-native package managers (brew, apt, dnf, pacman, winget, choco).
75
74
  - **Pentest-aware** — nmap, nikto, sqlmap, gobuster, ffuf, hydra, masscan, whois, dig, netcat, tshark.
76
- - **Manual update checks** — Run `/update` or `clai update` to check for new releases.
75
+ - **Auto-update** — Checks for new versions on startup; run `/update` or `clai update` to upgrade.
77
76
  - **Persistent history** — Session history with automatic key redaction in logs.
78
77
 
79
78
  ## Provider Setup
@@ -90,8 +89,6 @@ clai supports 7 LLM providers with free tiers:
90
89
  | NVIDIA NIM | `meta/llama-3.3-70b-instruct` | ✓ | `nvapi-` |
91
90
  | Ollama | `llama3.1:8b` | ✓ | (local URL) |
92
91
 
93
- `freeOnly` mode is enabled by default. Paid providers are excluded from fallback unless you explicitly opt in by disabling `freeOnly` in config or setting `CLAI_ALLOW_PAID=1`.
94
-
95
92
  ```sh
96
93
  # Store an API key
97
94
  clai set groq gsk_xxxxxxxxxxxxxxxx
@@ -151,7 +148,6 @@ export OLLAMA_HOST=http://localhost:11434
151
148
  | `/save <name>` | Save current session |
152
149
  | `/cwd <path>` | Change working directory |
153
150
  | `/allow <tool>` | Whitelist a tool for the session |
154
- | `/output [last]` | Toggle full output from the last tool |
155
151
  | `/update` | Check for updates |
156
152
  | `/exit` | Quit |
157
153
  | `/help` | List commands |
@@ -161,25 +157,25 @@ export OLLAMA_HOST=http://localhost:11434
161
157
 
162
158
  | Tool | Description | Risk Level |
163
159
  |------------------|--------------------------------------------------------------------|------------|
164
- | `shell.exec` | Run shell commands with bounded capture and live progress | smart* |
160
+ | `shell.exec` | Run shell commands via execa (120s timeout, streams output) | smart* |
165
161
  | `fs.read` | Read files (sandboxed to approved roots) | safe |
166
162
  | `fs.write` | Write files (sandboxed) | confirm |
167
163
  | `fs.list` | List directory contents | safe |
168
164
  | `fs.search` | Search files with ripgrep (falls back to grep) | safe |
169
165
  | `pkg.install` | Install packages via detected OS package manager | confirm |
170
166
  | `net.scan` | Nmap wrapper for port scanning | confirm |
171
- | `http.fetch` | HTTP GET/HEAD with streaming response limits | safe* |
167
+ | `http.fetch` | HTTP GET/POST with response size limits | safe |
172
168
  | `sysinfo` | OS, architecture, shell, and working directory info | safe |
173
169
  | `pentest.recon` | Composite: whois + dig + nmap top-100 ports | confirm |
174
170
 
175
- > \* **smart** = only low-risk commands such as `ls`, `whoami`, and `uname` auto-execute. Network scanners, shell control syntax, secret paths, mutating commands, and non-GET HTTP methods require confirmation.
171
+ > \* **smart** = read-only commands (`curl`, `ls`, `whoami`, `gobuster`, `dirb`, etc.) auto-execute; mutating commands require confirmation.
176
172
 
177
173
  ## Safety Gate
178
174
 
179
175
  Every tool call passes through a 3-tier classifier:
180
176
 
181
- - **`safe`** — Auto-run: sandboxed read-only fs, sysinfo, GET/HEAD http.fetch, and low-risk shell info commands.
182
- - **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan, network/private HTTP targets, scanner tools, and commands touching possible secrets.
177
+ - **`safe`** — Auto-run: read-only fs, sysinfo, http.fetch, read-only shell commands (`curl`, `ls`, `whoami`, `ifconfig`, `gobuster`, `dirb`, `ffuf`, `nikto`, etc.)
178
+ - **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan
183
179
  - **`block`** — Refuse with explanation: `rm -rf /`, fork bombs, public IP scans without authorization, exfiltration patterns
184
180
 
185
181
  ### Pentest Authorization
@@ -190,15 +186,11 @@ Security tools require a one-time acknowledgment:
190
186
  clai authorize-pentest AGREE
191
187
  ```
192
188
 
193
- Public target scanning is blocked unless the target is private/local or the tool call carries explicit structured ownership confirmation.
194
-
195
- ### Tool Output
196
-
197
- During long tool runs, clai shows live output in dim text so you can see progress. After the AI summarizes the result, raw output is collapsed. Press `Ctrl+O` on macOS, Linux, or Windows to toggle full output for the last tool. In non-interactive terminals, use `/output last` or open the saved artifact path.
189
+ Public IP scanning is blocked unless the target is private (RFC 1918) or the user explicitly confirms ownership.
198
190
 
199
191
  ## Updates
200
192
 
201
- clai does not call GitHub automatically by default. Check manually:
193
+ clai checks for updates automatically on startup (every 4 hours, non-blocking). You can also check manually:
202
194
 
203
195
  ```sh
204
196
  # CLI command
@@ -0,0 +1,27 @@
1
+ import type { ChatMessage } from "../types.js";
2
+ /**
3
+ * Crude per-char token estimator. Production-grade tokenization differs by
4
+ * provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
5
+ * is enough to decide when to compact. We deliberately err on the side of
6
+ * over-estimating — better to compact one turn too early than to lose state
7
+ * to a provider context-window error.
8
+ */
9
+ export declare function estimateTokens(text: string): number;
10
+ export declare function estimateMessagesTokens(messages: ChatMessage[]): number;
11
+ export interface CompactOptions {
12
+ /** Soft budget (tokens). When estimated tokens exceed this, compact. */
13
+ budgetTokens?: number | undefined;
14
+ /** Keep this many trailing messages (system + user/assistant pairs). */
15
+ keepRecent?: number | undefined;
16
+ }
17
+ /**
18
+ * Replace older messages with a single condensed "memory" message while
19
+ * preserving the system prompt and the most recent N messages.
20
+ *
21
+ * We do not call the LLM here — that's a future enhancement. The current
22
+ * compaction is mechanical: keep the system prompt; replace the prefix of
23
+ * older turns with a bullet list of the assistant's last lines and the
24
+ * tool calls that produced output. This is conservative and reversible
25
+ * (the artifact files still hold the raw outputs).
26
+ */
27
+ export declare function compactMessages(messages: ChatMessage[], options?: CompactOptions): ChatMessage[];
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Crude per-char token estimator. Production-grade tokenization differs by
3
+ * provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
4
+ * is enough to decide when to compact. We deliberately err on the side of
5
+ * over-estimating — better to compact one turn too early than to lose state
6
+ * to a provider context-window error.
7
+ */
8
+ export function estimateTokens(text) {
9
+ return Math.ceil(text.length / 4);
10
+ }
11
+ export function estimateMessagesTokens(messages) {
12
+ let sum = 0;
13
+ for (const message of messages) {
14
+ sum += estimateTokens(message.content) + 4; // role overhead
15
+ }
16
+ return sum;
17
+ }
18
+ const DEFAULT_BUDGET_TOKENS = 24_000;
19
+ const DEFAULT_KEEP_RECENT = 8;
20
+ /**
21
+ * Replace older messages with a single condensed "memory" message while
22
+ * preserving the system prompt and the most recent N messages.
23
+ *
24
+ * We do not call the LLM here — that's a future enhancement. The current
25
+ * compaction is mechanical: keep the system prompt; replace the prefix of
26
+ * older turns with a bullet list of the assistant's last lines and the
27
+ * tool calls that produced output. This is conservative and reversible
28
+ * (the artifact files still hold the raw outputs).
29
+ */
30
+ export function compactMessages(messages, options = {}) {
31
+ const budget = options.budgetTokens ?? DEFAULT_BUDGET_TOKENS;
32
+ const keepRecent = Math.max(2, options.keepRecent ?? DEFAULT_KEEP_RECENT);
33
+ if (messages.length <= keepRecent + 1)
34
+ return messages;
35
+ if (estimateMessagesTokens(messages) <= budget)
36
+ return messages;
37
+ // Always keep the system prompt (index 0 if it's a system message).
38
+ const head = [];
39
+ let start = 0;
40
+ if (messages[0]?.role === "system") {
41
+ head.push(messages[0]);
42
+ start = 1;
43
+ }
44
+ const tail = messages.slice(Math.max(start, messages.length - keepRecent));
45
+ const middle = messages.slice(start, messages.length - tail.length);
46
+ if (middle.length === 0)
47
+ return messages;
48
+ const bullets = [];
49
+ for (const msg of middle) {
50
+ if (msg.role === "user") {
51
+ bullets.push(`- user asked: ${oneLine(msg.content, 200)}`);
52
+ }
53
+ else if (msg.role === "assistant") {
54
+ const line = oneLine(msg.content, 200);
55
+ if (line)
56
+ bullets.push(`- assistant: ${line}`);
57
+ }
58
+ else if (msg.role === "tool") {
59
+ bullets.push(`- tool result: ${oneLine(msg.content, 200)}`);
60
+ }
61
+ }
62
+ const memo = {
63
+ role: "system",
64
+ content: `Earlier turns in this session, summarized to fit the context budget. Full artifacts (when produced) are saved on disk and can be expanded with /output.\n\n` +
65
+ bullets.join("\n"),
66
+ };
67
+ return [...head, memo, ...tail];
68
+ }
69
+ function oneLine(text, maxChars) {
70
+ const cleaned = text.replace(/\s+/g, " ").trim();
71
+ if (cleaned.length <= maxChars)
72
+ return cleaned;
73
+ return `${cleaned.slice(0, maxChars - 1)}…`;
74
+ }
75
+ //# sourceMappingURL=context-manager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"context-manager.js","sourceRoot":"","sources":["../../src/agent/context-manager.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAuB;IAC5D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;IAC9D,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AASD,MAAM,qBAAqB,GAAG,MAAM,CAAC;AACrC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAuB,EACvB,UAA0B,EAAE;IAE5B,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB,CAAC,CAAC;IAC1E,IAAI,QAAQ,CAAC,MAAM,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IACvD,IAAI,sBAAsB,CAAC,QAAQ,CAAC,IAAI,MAAM;QAAE,OAAO,QAAQ,CAAC;IAEhE,oEAAoE;IACpE,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,QAAQ,EAAE,CAAC;QACnC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC;IAED,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IAC3E,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACpE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAC;IAEzC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,iBAAiB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YACvC,IAAI,IAAI;gBAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,kBAAkB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAgB;QACxB,IAAI,EAAE,QAAQ;QACd,OAAO,EACL,6JAA6J;YAC7J,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;KACrB,CAAC;IAEF,OAAO,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,OAAO,CAAC,IAAY,EAAE,QAAgB;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,OAAO,CAAC;IAC/C,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC"}
@@ -1,4 +1,13 @@
1
1
  import type { ChatMessage, ProviderId, ToolCall, ToolResult } from "../types.js";
2
+ export interface SessionPolicy {
3
+ /** Tools the user authorized once during this REPL session. Not persisted. */
4
+ allow: Set<string>;
5
+ /** Mutable flag so the runner can flip pentest auth for this session only. */
6
+ pentestAuthorized: {
7
+ value: boolean;
8
+ };
9
+ }
10
+ export declare function createSessionPolicy(): SessionPolicy;
2
11
  export interface AgentRunOptions {
3
12
  provider?: ProviderId | undefined;
4
13
  model?: string | undefined;
@@ -8,6 +17,17 @@ export interface AgentRunOptions {
8
17
  signal?: AbortSignal | undefined;
9
18
  onToolStart?: ((call: ToolCall) => void) | undefined;
10
19
  onToolResult?: ((call: ToolCall, result: ToolResult) => void) | undefined;
20
+ session?: SessionPolicy | undefined;
21
+ }
22
+ export interface ParseToolCallOptions {
23
+ /**
24
+ * When true, only formats that are explicitly tool-call delimited are
25
+ * accepted: ```tool fenced JSON, <tool_call> XML, and the Kimi sentinel
26
+ * token format. Loose formats (any fenced block, heading-prefix, trailing
27
+ * JSON) are dropped — useful when models routinely emit JSON examples in
28
+ * prose. Default is `false` so existing free-tier models keep working.
29
+ */
30
+ strict?: boolean | undefined;
11
31
  }
12
- export declare function parseToolCall(text: string): ToolCall | undefined;
32
+ export declare function parseToolCall(text: string, options?: ParseToolCallOptions): ToolCall | undefined;
13
33
  export declare function runAgentLoop(prompt: string, options?: AgentRunOptions): Promise<string>;
@@ -1,19 +1,26 @@
1
1
  import { confirm } from "@inquirer/prompts";
2
2
  import chalk from "chalk";
3
+ import { mkdir, writeFile } from "node:fs/promises";
4
+ import { homedir } from "node:os";
5
+ import { join } from "node:path";
3
6
  import { streamWithProvider } from "../llm/router.js";
4
7
  import { renderAgentSystemPrompt } from "../prompts/index.js";
5
- import { getConfig, updateConfig } from "../store/config.js";
8
+ import { getConfig } from "../store/config.js";
6
9
  import { classifyToolCall, isPentestToolCall } from "../safety/classifier.js";
7
10
  import { availableToolNames, runToolCall } from "../tools/registry.js";
11
+ import { reduceToolOutput } from "../tools/policies/output-policy.js";
12
+ import { formatViewportHint, registerViewport } from "../ui/output-pane.js";
13
+ import { compactMessages, estimateMessagesTokens } from "./context-manager.js";
8
14
  import { auditLog } from "../store/logs.js";
9
15
  import { loadProjectContext } from "../store/project.js";
16
+ import { loadScope, isScopeActive } from "../store/scope.js";
10
17
  import { ensureProviderConfigured } from "../commands/providers.js";
11
- import { rememberThinkingFromText, renderThinkingSummary } from "../ui/thinking.js";
18
+ import { rememberThinkingFromText, renderThinkingSummary, } from "../ui/thinking.js";
12
19
  import { renderMarkdown } from "../ui/markdown.js";
13
20
  import { startThinkingSpinner } from "../ui/spinner.js";
14
- import { writeArtifact } from "../tools/artifacts.js";
15
- import { createToolLivePane, hasToolOutputSnapshot, rememberToolOutput, renderToolOutputHint, updateLastToolSummary, } from "../ui/tool-output.js";
16
- import { compactMessagesForModel, wrapUntrustedContext } from "../context/manager.js";
21
+ export function createSessionPolicy() {
22
+ return { allow: new Set(), pentestAuthorized: { value: false } };
23
+ }
17
24
  function tryParseCall(raw) {
18
25
  try {
19
26
  const parsed = JSON.parse(raw.trim());
@@ -72,7 +79,7 @@ function stripSentinelTokens(text) {
72
79
  .replace(/<\|tool_[a-z_]*\|>/gi, "")
73
80
  .trim();
74
81
  }
75
- export function parseToolCall(text) {
82
+ export function parseToolCall(text, options = {}) {
76
83
  // 1. ```tool ... ``` (standard format)
77
84
  const fenced = text.match(/```tool\s*\n?([\s\S]*?)```/i);
78
85
  if (fenced?.[1]) {
@@ -80,12 +87,50 @@ export function parseToolCall(text) {
80
87
  if (call)
81
88
  return call;
82
89
  }
83
- // 2. Kimi/Moonshot sentinel format (used by kimi-k2 family on NIM).
84
- // Keep this provider-specific compatibility path, but reject generic JSON
85
- // examples/headings/trailing objects so explanatory prose never executes.
90
+ // 2. <tool_call>...</tool_call>
91
+ const xml = text.match(/<tool_call>([\s\S]*?)<\/tool_call>/i);
92
+ if (xml?.[1]) {
93
+ const call = tryParseCall(xml[1]);
94
+ if (call)
95
+ return call;
96
+ }
97
+ // 3. Kimi/Moonshot sentinel format (used by kimi-k2 family on NIM).
86
98
  const kimi = parseKimiToolCall(text);
87
99
  if (kimi)
88
100
  return kimi;
101
+ // In strict mode, stop here. Headings, generic fenced blocks, and trailing
102
+ // JSON are too easy to accidentally trigger when the model is showing a
103
+ // worked example.
104
+ if (options.strict)
105
+ return undefined;
106
+ // 4. ### tool / ## tool / # tool heading + JSON
107
+ const heading = text.match(/#{1,3}\s*tool\s*\n\s*(\{[\s\S]*\})/i);
108
+ if (heading?.[1]) {
109
+ const call = tryParseCall(heading[1]);
110
+ if (call)
111
+ return call;
112
+ }
113
+ // 5. **tool** heading + JSON
114
+ const bold = text.match(/\*\*tool\*\*\s*\n\s*(\{[\s\S]*\})/i);
115
+ if (bold?.[1]) {
116
+ const call = tryParseCall(bold[1]);
117
+ if (call)
118
+ return call;
119
+ }
120
+ // 6. Any fenced block (```json, ```, etc.) containing name+args
121
+ const anyFenced = text.match(/```\w*\s*\n?([\s\S]*?)```/);
122
+ if (anyFenced?.[1]) {
123
+ const call = tryParseCall(anyFenced[1]);
124
+ if (call)
125
+ return call;
126
+ }
127
+ // 7. Trailing JSON object with "name" and "args"
128
+ const trailingJson = text.match(/(\{"name"\s*:\s*"[^"]+"\s*,\s*"args"\s*:\s*\{[\s\S]*?\}\s*\})\s*$/);
129
+ if (trailingJson?.[1]) {
130
+ const call = tryParseCall(trailingJson[1]);
131
+ if (call)
132
+ return call;
133
+ }
89
134
  return undefined;
90
135
  }
91
136
  /** Extract the text before the tool call block for display purposes */
@@ -130,32 +175,31 @@ function formatToolArgs(call) {
130
175
  return JSON.stringify(call.args);
131
176
  }
132
177
  function isAbortError(error, signal) {
133
- return Boolean(signal?.aborted) || (error instanceof Error && error.name === "AbortError");
178
+ return (Boolean(signal?.aborted) ||
179
+ (error instanceof Error && error.name === "AbortError"));
180
+ }
181
+ function safeArtifactName(name) {
182
+ return (name.replace(/[^a-z0-9_.-]+/gi, "-").replace(/^-+|-+$/g, "") ||
183
+ "tool-output");
134
184
  }
135
185
  async function saveToolOutput(call, output) {
136
186
  if (!output.trim())
137
187
  return undefined;
138
- return writeArtifact(call.name, output);
188
+ const dir = join(homedir(), ".clai", "outputs");
189
+ await mkdir(dir, { recursive: true });
190
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
191
+ const path = join(dir, `${stamp}-${safeArtifactName(call.name)}.txt`);
192
+ await writeFile(path, `${output}\n`, "utf8");
193
+ return path;
139
194
  }
140
195
  function summarizeOutput(output, maxChars = 8_000) {
141
196
  if (output.length <= maxChars)
142
197
  return { text: output, truncated: false };
143
198
  const lines = output.split(/\r?\n/);
144
- const signalLines = lines.filter((line) => /\b(open|vulnerable|critical|high|medium|found|success|injectable|CVE-\d{4}-\d+|200|201|204|301|302|307|308|401|403|500|error|failed)\b/i.test(line));
145
199
  const head = [];
146
200
  const tail = [];
147
201
  let used = 0;
148
- const signalBudget = Math.floor(maxChars * 0.45);
149
- const half = Math.floor((maxChars - signalBudget) / 2);
150
- const signals = [];
151
- for (const line of signalLines) {
152
- const cost = line.length + 1;
153
- if (used + cost > signalBudget)
154
- break;
155
- signals.push(line);
156
- used += cost;
157
- }
158
- used = 0;
202
+ const half = Math.floor(maxChars / 2);
159
203
  for (const line of lines) {
160
204
  const cost = line.length + 1;
161
205
  if (used + cost > half)
@@ -175,40 +219,71 @@ function summarizeOutput(output, maxChars = 8_000) {
175
219
  return {
176
220
  text: [
177
221
  ...head,
178
- ...(signals.length > 0
179
- ? [`... high-signal lines from omitted output ...`, ...signals]
180
- : []),
181
222
  `... (${lines.length.toLocaleString()} output lines truncated) ...`,
182
223
  ...tail,
183
224
  ].join("\n"),
184
225
  truncated: true,
185
226
  };
186
227
  }
187
- function formatToolContext(result) {
188
- const output = (result.modelContext ?? result.summary ?? result.output).trim();
189
- const summary = summarizeOutput(output, 8_000);
190
- const saved = result.outputPath ? `\nFull output saved to: ${result.outputPath}` : "";
228
+ function formatToolContext(call, result) {
229
+ const output = result.output.trim();
230
+ if (!output)
231
+ return "";
232
+ let reduced;
233
+ try {
234
+ const command = call.name === "shell.exec" ? String(call.args.command ?? "") : call.name;
235
+ const policy = reduceToolOutput(output, {
236
+ toolName: call.name,
237
+ command,
238
+ });
239
+ reduced = policy.summary.trim();
240
+ }
241
+ catch {
242
+ reduced = undefined;
243
+ }
244
+ // Hard cap on the reduced text — reducers should already be small, but
245
+ // never let one accidentally explode model context.
246
+ const base = reduced && reduced.length > 0 ? reduced : output;
247
+ const summary = summarizeOutput(base, 8_000);
248
+ const saved = result.outputPath
249
+ ? `\nFull output saved to: ${result.outputPath}`
250
+ : "";
191
251
  return `${summary.text}${saved}`.trim();
192
252
  }
193
- async function ensurePentestAuthorization(call, autoConfirm) {
194
- const config = getConfig();
195
- if (!isPentestToolCall(call) || config.pentestAuthorized)
253
+ async function ensurePentestAuthorization(call, autoConfirm, session) {
254
+ if (!isPentestToolCall(call))
255
+ return true;
256
+ // Persistent auth (via `clai authorize-pentest AGREE`) wins.
257
+ if (getConfig().pentestAuthorized)
258
+ return true;
259
+ // Session auth flipped earlier in this session — no re-prompt.
260
+ if (session.pentestAuthorized.value)
196
261
  return true;
197
262
  if (autoConfirm) {
263
+ // -y is session-scoped only. We do NOT touch the persistent config so
264
+ // a one-shot `-y` cannot silently authorize later interactive runs.
265
+ session.pentestAuthorized.value = true;
198
266
  return true;
199
267
  }
200
268
  const ok = await confirm({
201
- message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm?"),
269
+ message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm for this session?"),
202
270
  default: false,
203
271
  });
204
272
  if (!ok)
205
273
  return false;
206
- updateConfig({ pentestAuthorized: true });
274
+ session.pentestAuthorized.value = true;
207
275
  return true;
208
276
  }
209
- async function confirmToolExecution(call, autoConfirm) {
277
+ async function confirmToolExecution(call, autoConfirm, session) {
210
278
  const config = getConfig();
211
- if (autoConfirm || config.allowAlwaysTools.includes(call.name))
279
+ if (autoConfirm)
280
+ return true;
281
+ if (session.allow.has(call.name))
282
+ return true;
283
+ // Persistent allowlist kept for backwards compat with users who set it
284
+ // through `clai config` directly, but `/allow` only mutates the session
285
+ // set so authorizations never leak across processes.
286
+ if (config.allowAlwaysTools.includes(call.name))
212
287
  return true;
213
288
  return confirm({
214
289
  message: chalk.yellow(` run ${call.name}: ${formatToolArgs(call)}?`),
@@ -221,7 +296,7 @@ export async function runAgentLoop(prompt, options = {}) {
221
296
  const projectContext = await loadProjectContext();
222
297
  const systemPrompt = renderAgentSystemPrompt(availableToolNames().join(", "));
223
298
  const fullSystemPrompt = projectContext
224
- ? `${systemPrompt}\n\n${wrapUntrustedContext("Project context from .clai/context.md", projectContext)}`
299
+ ? `${systemPrompt}\n\nProject context from .clai/context.md:\n${projectContext}`
225
300
  : systemPrompt;
226
301
  const messages = [
227
302
  { role: "system", content: fullSystemPrompt },
@@ -232,6 +307,7 @@ export async function runAgentLoop(prompt, options = {}) {
232
307
  await ensureProviderConfigured(provider);
233
308
  let model = options.model ?? config.defaultModel;
234
309
  let lastAnswer = "";
310
+ const session = options.session ?? createSessionPolicy();
235
311
  for (let step = 0; step < maxSteps; step += 1) {
236
312
  options.signal?.throwIfAborted();
237
313
  // Buffer LLM output so tool JSON and hidden thinking are not printed raw.
@@ -245,7 +321,7 @@ export async function runAgentLoop(prompt, options = {}) {
245
321
  completion = await streamWithProvider({
246
322
  provider,
247
323
  model,
248
- messages: compactMessagesForModel(messages),
324
+ messages,
249
325
  temperature: 0.2,
250
326
  // Reasoning models can spend a lot on hidden thinking; give
251
327
  // them headroom so the visible answer / tool call isn't
@@ -283,7 +359,9 @@ export async function runAgentLoop(prompt, options = {}) {
283
359
  provider = completion.provider;
284
360
  model = completion.model;
285
361
  const assistantText = rememberThinkingFromText(completion.text);
286
- const call = parseToolCall(assistantText.visible);
362
+ const call = parseToolCall(assistantText.visible, {
363
+ strict: getConfig().parserStrict,
364
+ });
287
365
  if (!call) {
288
366
  // Detect the case where the model emitted sentinel-style tool-call
289
367
  // markers but the body was malformed or truncated. Printing those
@@ -309,10 +387,6 @@ export async function runAgentLoop(prompt, options = {}) {
309
387
  if (!cleaned.endsWith("\n"))
310
388
  process.stdout.write("\n");
311
389
  }
312
- updateLastToolSummary(cleaned);
313
- if (hasToolOutputSnapshot()) {
314
- process.stdout.write(`${renderToolOutputHint()}\n`);
315
- }
316
390
  if (assistantText.hasThinking) {
317
391
  process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
318
392
  }
@@ -329,10 +403,17 @@ export async function runAgentLoop(prompt, options = {}) {
329
403
  process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
330
404
  }
331
405
  messages.push({ role: "assistant", content: assistantText.visible });
332
- const decision = classifyToolCall(call);
333
- await auditLog("tool.classified", { call, decision });
406
+ const scope = await loadScope();
407
+ const decision = classifyToolCall(call, { scope });
408
+ await auditLog("tool.classified", {
409
+ call,
410
+ decision,
411
+ scope: isScopeActive(scope) ? scope.name ?? "(unnamed)" : "(none)",
412
+ });
334
413
  // Show tool call
335
- process.stdout.write(chalk.cyan(` ▶ ${call.name}`) + chalk.gray(` ${formatToolArgs(call)}`) + "\n");
414
+ process.stdout.write(chalk.cyan(` ▶ ${call.name}`) +
415
+ chalk.gray(` ${formatToolArgs(call)}`) +
416
+ "\n");
336
417
  if (decision.level === "block") {
337
418
  process.stdout.write(chalk.red(` ✗ blocked: ${decision.reason}`) + "\n");
338
419
  lastAnswer = `Blocked: ${call.name} — ${decision.reason}`;
@@ -340,8 +421,10 @@ export async function runAgentLoop(prompt, options = {}) {
340
421
  }
341
422
  // Pentest authorization — if user confirms this, skip the per-tool confirm
342
423
  let pentestJustConfirmed = false;
343
- const needsPentestAuth = isPentestToolCall(call) && !getConfig().pentestAuthorized;
344
- const authorized = await ensurePentestAuthorization(call, Boolean(options.autoConfirm));
424
+ const needsPentestAuth = isPentestToolCall(call) &&
425
+ !getConfig().pentestAuthorized &&
426
+ !session.pentestAuthorized.value;
427
+ const authorized = await ensurePentestAuthorization(call, Boolean(options.autoConfirm), session);
345
428
  if (!authorized) {
346
429
  lastAnswer = "Pentest authorization not confirmed.";
347
430
  process.stdout.write(chalk.red(` ✗ ${lastAnswer}`) + "\n");
@@ -352,7 +435,7 @@ export async function runAgentLoop(prompt, options = {}) {
352
435
  }
353
436
  // Confirm if needed (safe tools auto-execute, pentest-auth'd tools skip)
354
437
  if (decision.level === "confirm" && !pentestJustConfirmed) {
355
- const ok = await confirmToolExecution(call, Boolean(options.autoConfirm));
438
+ const ok = await confirmToolExecution(call, Boolean(options.autoConfirm), session);
356
439
  if (!ok) {
357
440
  lastAnswer = "Cancelled.";
358
441
  process.stdout.write(chalk.yellow(` ✗ cancelled`) + "\n");
@@ -366,12 +449,13 @@ export async function runAgentLoop(prompt, options = {}) {
366
449
  let liveBytes = 0;
367
450
  const liveCap = 16_000; // Stop streaming after this many bytes to avoid flooding the terminal.
368
451
  let liveTruncatedNotified = false;
369
- const livePane = createToolLivePane(formatToolArgs(call));
370
452
  const printLive = (chunk) => {
371
453
  // Suppress live preview for fs.read / fs.list — those are read-only
372
454
  // and the final summary is already concise. Stream shell-style tools
373
455
  // (shell.exec, net.scan, pentest.recon, pkg.install).
374
- if (call.name === "fs.read" || call.name === "fs.list" || call.name === "fs.search")
456
+ if (call.name === "fs.read" ||
457
+ call.name === "fs.list" ||
458
+ call.name === "fs.search")
375
459
  return;
376
460
  if (liveBytes >= liveCap) {
377
461
  if (!liveTruncatedNotified) {
@@ -383,7 +467,11 @@ export async function runAgentLoop(prompt, options = {}) {
383
467
  const remaining = liveCap - liveBytes;
384
468
  const slice = chunk.length > remaining ? chunk.slice(0, remaining) : chunk;
385
469
  liveBytes += slice.length;
386
- livePane.append(slice);
470
+ // Indent each line so live output lines up under the tool call.
471
+ const indented = slice.replace(/\r/g, "").replace(/\n(?!$)/g, "\n ");
472
+ process.stdout.write(chalk.dim(indented.startsWith("\n")
473
+ ? indented
474
+ : ` ${indented}`.replace(/^ /, " ")));
387
475
  };
388
476
  try {
389
477
  result = await runToolCall(call, {
@@ -395,10 +483,10 @@ export async function runAgentLoop(prompt, options = {}) {
395
483
  },
396
484
  });
397
485
  // Newline separator if live output didn't already end with one.
398
- livePane.finish();
486
+ if (liveBytes > 0)
487
+ process.stdout.write("\n");
399
488
  }
400
489
  catch (toolError) {
401
- livePane.finish();
402
490
  if (isAbortError(toolError, options.signal)) {
403
491
  lastAnswer = "Aborted.";
404
492
  process.stdout.write(chalk.yellow(" ⏹ Aborted.\n"));
@@ -409,26 +497,18 @@ export async function runAgentLoop(prompt, options = {}) {
409
497
  }
410
498
  const output = result.output.trim();
411
499
  const displayMax = 6_000;
412
- const savedOutputPath = result.outputPath ?? (output.length > displayMax
413
- ? await saveToolOutput(call, output)
414
- : undefined);
500
+ // If the tool already produced an artifact (shell.exec now streams to one
501
+ // as it runs), respect that path. Otherwise, fall back to the post-hoc
502
+ // save for tools that return their full output in memory.
503
+ const savedOutputPath = result.outputPath ??
504
+ (output.length > displayMax
505
+ ? await saveToolOutput(call, output)
506
+ : undefined);
415
507
  const resultWithArtifact = {
416
508
  ...result,
417
509
  outputPath: savedOutputPath,
418
- truncated: result.truncated || Boolean(savedOutputPath),
419
- artifacts: result.artifacts ?? (savedOutputPath
420
- ? [{ path: savedOutputPath, kind: "raw", redacted: true }]
421
- : undefined),
510
+ truncated: result.truncated ?? Boolean(savedOutputPath),
422
511
  };
423
- if (output || savedOutputPath) {
424
- rememberToolOutput({
425
- id: `${Date.now()}-${step}`,
426
- label: `${call.name} ${formatToolArgs(call)}`.trim(),
427
- artifactPath: savedOutputPath,
428
- fullText: savedOutputPath ? undefined : output,
429
- summary: result.summary ?? result.modelContext,
430
- });
431
- }
432
512
  options.onToolResult?.(call, resultWithArtifact);
433
513
  await auditLog("tool.result", {
434
514
  call,
@@ -446,7 +526,7 @@ export async function runAgentLoop(prompt, options = {}) {
446
526
  : displaySummary.text;
447
527
  // If we already streamed live output for this call, skip re-printing
448
528
  // the same bytes. Just note where the full output lives if it was saved.
449
- if (liveBytes > 0 && process.stdout.isTTY) {
529
+ if (liveBytes > 0) {
450
530
  if (savedOutputPath) {
451
531
  process.stdout.write(chalk.dim(` full output saved to ${savedOutputPath}\n`));
452
532
  }
@@ -460,11 +540,34 @@ export async function runAgentLoop(prompt, options = {}) {
460
540
  process.stdout.write(chalk.yellow(" ⏹ Aborted.\n"));
461
541
  return lastAnswer;
462
542
  }
463
- const contextOutput = formatToolContext(resultWithArtifact);
543
+ const contextOutput = formatToolContext(call, resultWithArtifact);
544
+ // Register a collapse/expand viewport so the user can pull the full raw
545
+ // output back with Ctrl+O or `/output last` after the AI summary lands.
546
+ if (output) {
547
+ const viewport = registerViewport({
548
+ toolName: call.name,
549
+ argsDisplay: formatToolArgs(call),
550
+ artifactPath: savedOutputPath,
551
+ summary: contextOutput,
552
+ });
553
+ process.stdout.write(`${formatViewportHint(viewport)}\n`);
554
+ }
464
555
  messages.push({
465
556
  role: "tool",
466
557
  content: `Tool ${call.name} result (exit=${result.exitCode ?? 0}, ok=${result.ok}):\n${contextOutput}`,
467
558
  });
559
+ // Compact older messages when the running estimate exceeds budget so
560
+ // free-tier context windows are not blown by long pentest sessions.
561
+ if (estimateMessagesTokens(messages) > 24_000) {
562
+ const compacted = compactMessages(messages);
563
+ if (compacted.length < messages.length) {
564
+ messages.splice(0, messages.length, ...compacted);
565
+ await auditLog("agent.compact", {
566
+ newLength: messages.length,
567
+ estimatedTokens: estimateMessagesTokens(messages),
568
+ });
569
+ }
570
+ }
468
571
  }
469
572
  lastAnswer = `Stopped after ${maxSteps} steps.`;
470
573
  process.stdout.write(chalk.yellow(lastAnswer) + "\n");