npm - @pentoshi/clai - Versions diffs - 0.6.0 → 0.7.1 - Mend

@pentoshi/clai 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/README.md +9 -17
package/dist/agent/context-manager.d.ts +27 -0
package/dist/agent/context-manager.js +75 -0
package/dist/agent/context-manager.js.map +1 -0
package/dist/agent/runner.d.ts +21 -1
package/dist/agent/runner.js +176 -73
package/dist/agent/runner.js.map +1 -1
package/dist/commands/doctor.js +20 -2
package/dist/commands/doctor.js.map +1 -1
package/dist/commands/update.js +11 -2
package/dist/commands/update.js.map +1 -1
package/dist/index.js +156 -5
package/dist/index.js.map +1 -1
package/dist/llm/anthropic.js +29 -38
package/dist/llm/anthropic.js.map +1 -1
package/dist/llm/gemini.js +31 -40
package/dist/llm/gemini.js.map +1 -1
package/dist/llm/http.d.ts +21 -0
package/dist/llm/http.js +140 -1
package/dist/llm/http.js.map +1 -1
package/dist/llm/ollama.js +18 -27
package/dist/llm/ollama.js.map +1 -1
package/dist/llm/router.d.ts +7 -0
package/dist/llm/router.js +14 -23
package/dist/llm/router.js.map +1 -1
package/dist/modes/agent.d.ts +4 -2
package/dist/modes/agent.js +2 -2
package/dist/modes/agent.js.map +1 -1
package/dist/modes/ask.js +3 -4
package/dist/modes/ask.js.map +1 -1
package/dist/os/pkgmgr.d.ts +7 -1
package/dist/os/pkgmgr.js +97 -18
package/dist/os/pkgmgr.js.map +1 -1
package/dist/prompts/index.d.ts +7 -0
package/dist/prompts/index.js +12 -4
package/dist/prompts/index.js.map +1 -1
package/dist/repl.d.ts +1 -0
package/dist/repl.js +283 -43
package/dist/repl.js.map +1 -1
package/dist/safety/classifier.d.ts +5 -1
package/dist/safety/classifier.js +244 -88
package/dist/safety/classifier.js.map +1 -1
package/dist/safety/patterns.d.ts +48 -1
package/dist/safety/patterns.js +140 -7
package/dist/safety/patterns.js.map +1 -1
package/dist/store/config.d.ts +21 -3
package/dist/store/config.js +28 -9
package/dist/store/config.js.map +1 -1
package/dist/store/history.d.ts +9 -0
package/dist/store/history.js +58 -1
package/dist/store/history.js.map +1 -1
package/dist/store/keys.d.ts +2 -1
package/dist/store/keys.js +7 -3
package/dist/store/keys.js.map +1 -1
package/dist/store/logs.d.ts +7 -0
package/dist/store/logs.js +39 -1
package/dist/store/logs.js.map +1 -1
package/dist/store/project.d.ts +1 -0
package/dist/store/project.js +34 -9
package/dist/store/project.js.map +1 -1
package/dist/store/scope.d.ts +29 -0
package/dist/store/scope.js +113 -0
package/dist/store/scope.js.map +1 -0
package/dist/tools/fs.d.ts +6 -2
package/dist/tools/fs.js +99 -87
package/dist/tools/fs.js.map +1 -1
package/dist/tools/http.d.ts +5 -3
package/dist/tools/http.js +170 -31
package/dist/tools/http.js.map +1 -1
package/dist/tools/policies/output-policy.d.ts +13 -0
package/dist/tools/policies/output-policy.js +56 -0
package/dist/tools/policies/output-policy.js.map +1 -0
package/dist/tools/reducers/ffuf.d.ts +6 -0
package/dist/tools/reducers/ffuf.js +74 -0
package/dist/tools/reducers/ffuf.js.map +1 -0
package/dist/tools/reducers/generic.d.ts +2 -0
package/dist/tools/reducers/generic.js +60 -0
package/dist/tools/reducers/generic.js.map +1 -0
package/dist/tools/reducers/gobuster.d.ts +2 -0
package/dist/tools/reducers/gobuster.js +36 -0
package/dist/tools/reducers/gobuster.js.map +1 -0
package/dist/tools/reducers/httpx.d.ts +2 -0
package/dist/tools/reducers/httpx.js +38 -0
package/dist/tools/reducers/httpx.js.map +1 -0
package/dist/tools/reducers/nmap.d.ts +7 -0
package/dist/tools/reducers/nmap.js +82 -0
package/dist/tools/reducers/nmap.js.map +1 -0
package/dist/tools/reducers/nuclei.d.ts +2 -0
package/dist/tools/reducers/nuclei.js +51 -0
package/dist/tools/reducers/nuclei.js.map +1 -0
package/dist/tools/reducers/sqlmap.d.ts +2 -0
package/dist/tools/reducers/sqlmap.js +39 -0
package/dist/tools/reducers/sqlmap.js.map +1 -0
package/dist/tools/reducers/subdomains.d.ts +6 -0
package/dist/tools/reducers/subdomains.js +31 -0
package/dist/tools/reducers/subdomains.js.map +1 -0
package/dist/tools/reducers/types.d.ts +14 -0
package/dist/tools/reducers/types.js +2 -0
package/dist/tools/reducers/types.js.map +1 -0
package/dist/tools/registry.d.ts +1 -1
package/dist/tools/registry.js +223 -79
package/dist/tools/registry.js.map +1 -1
package/dist/tools/shell.d.ts +45 -4
package/dist/tools/shell.js +419 -88
package/dist/tools/shell.js.map +1 -1
package/dist/tools/validate.d.ts +37 -0
package/dist/tools/validate.js +144 -0
package/dist/tools/validate.js.map +1 -0
package/dist/types.d.ts +7 -15
package/dist/ui/keys.d.ts +21 -0
package/dist/ui/keys.js +13 -0
package/dist/ui/keys.js.map +1 -0
package/dist/ui/output-pane.d.ts +31 -0
package/dist/ui/output-pane.js +81 -0
package/dist/ui/output-pane.js.map +1 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -69,11 +69,10 @@ clai -y "list the 10 largest files in my home directory"
 - **`/agent` mode** — Agentic. AI plans, then executes shell commands, edits files, installs missing tools, parses output, and continues until the goal is met.
 - **7 LLM providers** — Groq, Google Gemini, OpenRouter, OpenAI, Anthropic, NVIDIA NIM, and Ollama (local). All with streaming.
 - **10 built-in tools** — `shell.exec`, `fs.read`, `fs.write`, `fs.list`, `fs.search`, `pkg.install`, `net.scan`, `http.fetch`, `sysinfo`, `pentest.recon`.
-- **Smart safety gate** — Low-risk commands auto-execute; mutating, network, secret-touching, or shell-control commands require confirmation; destructive patterns are blocked.
-- **Bounded tool output** — Long scan output is streamed lightly while running, saved to artifacts when needed, and reduced before it reaches the model.
+- **Smart safety gate** — Read-only commands auto-execute; mutating commands require confirmation; destructive patterns are blocked.
 - **Cross-platform** — macOS, Linux, and Windows. Detects OS-native package managers (brew, apt, dnf, pacman, winget, choco).
 - **Pentest-aware** — nmap, nikto, sqlmap, gobuster, ffuf, hydra, masscan, whois, dig, netcat, tshark.
-- **Manual update checks** — Run `/update` or `clai update` to check for new releases.
+- **Auto-update** — Checks for new versions on startup; run `/update` or `clai update` to upgrade.
 - **Persistent history** — Session history with automatic key redaction in logs.
 ## Provider Setup
@@ -90,8 +89,6 @@ clai supports 7 LLM providers with free tiers:
 | NVIDIA NIM  | `meta/llama-3.3-70b-instruct`                | ✓     | `nvapi-`       |
 | Ollama      | `llama3.1:8b`                                | ✓     | (local URL)    |
-`freeOnly` mode is enabled by default. Paid providers are excluded from fallback unless you explicitly opt in by disabling `freeOnly` in config or setting `CLAI_ALLOW_PAID=1`.
 ```sh
 # Store an API key
 clai set groq gsk_xxxxxxxxxxxxxxxx
@@ -151,7 +148,6 @@ export OLLAMA_HOST=http://localhost:11434
 | `/save <name>`          | Save current session                               |
 | `/cwd <path>`           | Change working directory                           |
 | `/allow <tool>`         | Whitelist a tool for the session                   |
-| `/output [last]`        | Toggle full output from the last tool               |
 | `/update`               | Check for updates                                  |
 | `/exit`                 | Quit                                               |
 | `/help`                 | List commands                                      |
@@ -161,25 +157,25 @@ export OLLAMA_HOST=http://localhost:11434
 | Tool             | Description                                                        | Risk Level |
 |------------------|--------------------------------------------------------------------|------------|
-| `shell.exec`     | Run shell commands with bounded capture and live progress          | smart*     |
+| `shell.exec`     | Run shell commands via execa (120s timeout, streams output)        | smart*     |
 | `fs.read`        | Read files (sandboxed to approved roots)                           | safe       |
 | `fs.write`       | Write files (sandboxed)                                            | confirm    |
 | `fs.list`        | List directory contents                                            | safe       |
 | `fs.search`      | Search files with ripgrep (falls back to grep)                     | safe       |
 | `pkg.install`    | Install packages via detected OS package manager                   | confirm    |
 | `net.scan`       | Nmap wrapper for port scanning                                     | confirm    |
-| `http.fetch`     | HTTP GET/HEAD with streaming response limits                       | safe*      |
+| `http.fetch`     | HTTP GET/POST with response size limits                            | safe       |
 | `sysinfo`        | OS, architecture, shell, and working directory info                | safe       |
 | `pentest.recon`  | Composite: whois + dig + nmap top-100 ports                       | confirm    |
-> \* **smart** = only low-risk commands such as `ls`, `whoami`, and `uname` auto-execute. Network scanners, shell control syntax, secret paths, mutating commands, and non-GET HTTP methods require confirmation.
+> \* **smart** = read-only commands (`curl`, `ls`, `whoami`, `gobuster`, `dirb`, etc.) auto-execute; mutating commands require confirmation.
 ## Safety Gate
 Every tool call passes through a 3-tier classifier:
-- **`safe`** — Auto-run: sandboxed read-only fs, sysinfo, GET/HEAD http.fetch, and low-risk shell info commands.
-- **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan, network/private HTTP targets, scanner tools, and commands touching possible secrets.
+- **`safe`** — Auto-run: read-only fs, sysinfo, http.fetch, read-only shell commands (`curl`, `ls`, `whoami`, `ifconfig`, `gobuster`, `dirb`, `ffuf`, `nikto`, etc.)
+- **`confirm`** — User prompt: mutating shell commands, fs.write, pkg.install, net.scan
 - **`block`** — Refuse with explanation: `rm -rf /`, fork bombs, public IP scans without authorization, exfiltration patterns
 ### Pentest Authorization
@@ -190,15 +186,11 @@ Security tools require a one-time acknowledgment:
 clai authorize-pentest AGREE
 ```
-Public target scanning is blocked unless the target is private/local or the tool call carries explicit structured ownership confirmation.
-### Tool Output
-During long tool runs, clai shows live output in dim text so you can see progress. After the AI summarizes the result, raw output is collapsed. Press `Ctrl+O` on macOS, Linux, or Windows to toggle full output for the last tool. In non-interactive terminals, use `/output last` or open the saved artifact path.
+Public IP scanning is blocked unless the target is private (RFC 1918) or the user explicitly confirms ownership.
 ## Updates
-clai does not call GitHub automatically by default. Check manually:
+clai checks for updates automatically on startup (every 4 hours, non-blocking). You can also check manually:
 ```sh
 # CLI command

package/dist/agent/context-manager.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { ChatMessage } from "../types.js";
+/**
+ * Crude per-char token estimator. Production-grade tokenization differs by
+ * provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
+ * is enough to decide when to compact. We deliberately err on the side of
+ * over-estimating — better to compact one turn too early than to lose state
+ * to a provider context-window error.
+ */
+export declare function estimateTokens(text: string): number;
+export declare function estimateMessagesTokens(messages: ChatMessage[]): number;
+export interface CompactOptions {
+    /** Soft budget (tokens). When estimated tokens exceed this, compact. */
+    budgetTokens?: number | undefined;
+    /** Keep this many trailing messages (system + user/assistant pairs). */
+    keepRecent?: number | undefined;
+}
+/**
+ * Replace older messages with a single condensed "memory" message while
+ * preserving the system prompt and the most recent N messages.
+ *
+ * We do not call the LLM here — that's a future enhancement. The current
+ * compaction is mechanical: keep the system prompt; replace the prefix of
+ * older turns with a bullet list of the assistant's last lines and the
+ * tool calls that produced output. This is conservative and reversible
+ * (the artifact files still hold the raw outputs).
+ */
+export declare function compactMessages(messages: ChatMessage[], options?: CompactOptions): ChatMessage[];

package/dist/agent/context-manager.js ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Crude per-char token estimator. Production-grade tokenization differs by
+ * provider, but for budgeting an order-of-magnitude heuristic ("chars / 4")
+ * is enough to decide when to compact. We deliberately err on the side of
+ * over-estimating — better to compact one turn too early than to lose state
+ * to a provider context-window error.
+ */
+export function estimateTokens(text) {
+    return Math.ceil(text.length / 4);
+}
+export function estimateMessagesTokens(messages) {
+    let sum = 0;
+    for (const message of messages) {
+        sum += estimateTokens(message.content) + 4; // role overhead
+    }
+    return sum;
+}
+const DEFAULT_BUDGET_TOKENS = 24_000;
+const DEFAULT_KEEP_RECENT = 8;
+/**
+ * Replace older messages with a single condensed "memory" message while
+ * preserving the system prompt and the most recent N messages.
+ *
+ * We do not call the LLM here — that's a future enhancement. The current
+ * compaction is mechanical: keep the system prompt; replace the prefix of
+ * older turns with a bullet list of the assistant's last lines and the
+ * tool calls that produced output. This is conservative and reversible
+ * (the artifact files still hold the raw outputs).
+ */
+export function compactMessages(messages, options = {}) {
+    const budget = options.budgetTokens ?? DEFAULT_BUDGET_TOKENS;
+    const keepRecent = Math.max(2, options.keepRecent ?? DEFAULT_KEEP_RECENT);
+    if (messages.length <= keepRecent + 1)
+        return messages;
+    if (estimateMessagesTokens(messages) <= budget)
+        return messages;
+    // Always keep the system prompt (index 0 if it's a system message).
+    const head = [];
+    let start = 0;
+    if (messages[0]?.role === "system") {
+        head.push(messages[0]);
+        start = 1;
+    }
+    const tail = messages.slice(Math.max(start, messages.length - keepRecent));
+    const middle = messages.slice(start, messages.length - tail.length);
+    if (middle.length === 0)
+        return messages;
+    const bullets = [];
+    for (const msg of middle) {
+        if (msg.role === "user") {
+            bullets.push(`- user asked: ${oneLine(msg.content, 200)}`);
+        }
+        else if (msg.role === "assistant") {
+            const line = oneLine(msg.content, 200);
+            if (line)
+                bullets.push(`- assistant: ${line}`);
+        }
+        else if (msg.role === "tool") {
+            bullets.push(`- tool result: ${oneLine(msg.content, 200)}`);
+        }
+    }
+    const memo = {
+        role: "system",
+        content: `Earlier turns in this session, summarized to fit the context budget. Full artifacts (when produced) are saved on disk and can be expanded with /output.\n\n` +
+            bullets.join("\n"),
+    };
+    return [...head, memo, ...tail];
+}
+function oneLine(text, maxChars) {
+    const cleaned = text.replace(/\s+/g, " ").trim();
+    if (cleaned.length <= maxChars)
+        return cleaned;
+    return `${cleaned.slice(0, maxChars - 1)}…`;
+}
+//# sourceMappingURL=context-manager.js.map

package/dist/agent/context-manager.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"context-manager.js","sourceRoot":"","sources":["../../src/agent/context-manager.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAuB;IAC5D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;IAC9D,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AASD,MAAM,qBAAqB,GAAG,MAAM,CAAC;AACrC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAuB,EACvB,UAA0B,EAAE;IAE5B,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB,CAAC,CAAC;IAC1E,IAAI,QAAQ,CAAC,MAAM,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IACvD,IAAI,sBAAsB,CAAC,QAAQ,CAAC,IAAI,MAAM;QAAE,OAAO,QAAQ,CAAC;IAEhE,oEAAoE;IACpE,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,QAAQ,EAAE,CAAC;QACnC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC;IAED,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IAC3E,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACpE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAC;IAEzC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,iBAAiB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YACvC,IAAI,IAAI;gBAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,kBAAkB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAgB;QACxB,IAAI,EAAE,QAAQ;QACd,OAAO,EACL,6JAA6J;YAC7J,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;KACrB,CAAC;IAEF,OAAO,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,OAAO,CAAC,IAAY,EAAE,QAAgB;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,OAAO,CAAC;IAC/C,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC"}

package/dist/agent/runner.d.ts CHANGED Viewed

@@ -1,4 +1,13 @@
 import type { ChatMessage, ProviderId, ToolCall, ToolResult } from "../types.js";
+export interface SessionPolicy {
+    /** Tools the user authorized once during this REPL session. Not persisted. */
+    allow: Set<string>;
+    /** Mutable flag so the runner can flip pentest auth for this session only. */
+    pentestAuthorized: {
+        value: boolean;
+    };
+}
+export declare function createSessionPolicy(): SessionPolicy;
 export interface AgentRunOptions {
     provider?: ProviderId | undefined;
     model?: string | undefined;
@@ -8,6 +17,17 @@ export interface AgentRunOptions {
     signal?: AbortSignal | undefined;
     onToolStart?: ((call: ToolCall) => void) | undefined;
     onToolResult?: ((call: ToolCall, result: ToolResult) => void) | undefined;
+    session?: SessionPolicy | undefined;
+}
+export interface ParseToolCallOptions {
+    /**
+     * When true, only formats that are explicitly tool-call delimited are
+     * accepted: ```tool fenced JSON, <tool_call> XML, and the Kimi sentinel
+     * token format. Loose formats (any fenced block, heading-prefix, trailing
+     * JSON) are dropped — useful when models routinely emit JSON examples in
+     * prose. Default is `false` so existing free-tier models keep working.
+     */
+    strict?: boolean | undefined;
 }
-export declare function parseToolCall(text: string): ToolCall | undefined;
+export declare function parseToolCall(text: string, options?: ParseToolCallOptions): ToolCall | undefined;
 export declare function runAgentLoop(prompt: string, options?: AgentRunOptions): Promise<string>;

package/dist/agent/runner.js CHANGED Viewed

@@ -1,19 +1,26 @@
 import { confirm } from "@inquirer/prompts";
 import chalk from "chalk";
+import { mkdir, writeFile } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
 import { streamWithProvider } from "../llm/router.js";
 import { renderAgentSystemPrompt } from "../prompts/index.js";
-import { getConfig, updateConfig } from "../store/config.js";
+import { getConfig } from "../store/config.js";
 import { classifyToolCall, isPentestToolCall } from "../safety/classifier.js";
 import { availableToolNames, runToolCall } from "../tools/registry.js";
+import { reduceToolOutput } from "../tools/policies/output-policy.js";
+import { formatViewportHint, registerViewport } from "../ui/output-pane.js";
+import { compactMessages, estimateMessagesTokens } from "./context-manager.js";
 import { auditLog } from "../store/logs.js";
 import { loadProjectContext } from "../store/project.js";
+import { loadScope, isScopeActive } from "../store/scope.js";
 import { ensureProviderConfigured } from "../commands/providers.js";
-import { rememberThinkingFromText, renderThinkingSummary } from "../ui/thinking.js";
+import { rememberThinkingFromText, renderThinkingSummary, } from "../ui/thinking.js";
 import { renderMarkdown } from "../ui/markdown.js";
 import { startThinkingSpinner } from "../ui/spinner.js";
-import { writeArtifact } from "../tools/artifacts.js";
-import { createToolLivePane, hasToolOutputSnapshot, rememberToolOutput, renderToolOutputHint, updateLastToolSummary, } from "../ui/tool-output.js";
-import { compactMessagesForModel, wrapUntrustedContext } from "../context/manager.js";
+export function createSessionPolicy() {
+    return { allow: new Set(), pentestAuthorized: { value: false } };
+}
 function tryParseCall(raw) {
     try {
         const parsed = JSON.parse(raw.trim());
@@ -72,7 +79,7 @@ function stripSentinelTokens(text) {
         .replace(/<\|tool_[a-z_]*\|>/gi, "")
         .trim();
 }
-export function parseToolCall(text) {
+export function parseToolCall(text, options = {}) {
     // 1. ```tool ... ``` (standard format)
     const fenced = text.match(/```tool\s*\n?([\s\S]*?)```/i);
     if (fenced?.[1]) {
@@ -80,12 +87,50 @@ export function parseToolCall(text) {
         if (call)
             return call;
     }
-    // 2. Kimi/Moonshot sentinel format (used by kimi-k2 family on NIM).
-    // Keep this provider-specific compatibility path, but reject generic JSON
-    // examples/headings/trailing objects so explanatory prose never executes.
+    // 2. <tool_call>...</tool_call>
+    const xml = text.match(/<tool_call>([\s\S]*?)<\/tool_call>/i);
+    if (xml?.[1]) {
+        const call = tryParseCall(xml[1]);
+        if (call)
+            return call;
+    }
+    // 3. Kimi/Moonshot sentinel format (used by kimi-k2 family on NIM).
     const kimi = parseKimiToolCall(text);
     if (kimi)
         return kimi;
+    // In strict mode, stop here. Headings, generic fenced blocks, and trailing
+    // JSON are too easy to accidentally trigger when the model is showing a
+    // worked example.
+    if (options.strict)
+        return undefined;
+    // 4. ### tool / ## tool / # tool heading + JSON
+    const heading = text.match(/#{1,3}\s*tool\s*\n\s*(\{[\s\S]*\})/i);
+    if (heading?.[1]) {
+        const call = tryParseCall(heading[1]);
+        if (call)
+            return call;
+    }
+    // 5. **tool** heading + JSON
+    const bold = text.match(/\*\*tool\*\*\s*\n\s*(\{[\s\S]*\})/i);
+    if (bold?.[1]) {
+        const call = tryParseCall(bold[1]);
+        if (call)
+            return call;
+    }
+    // 6. Any fenced block (```json, ```, etc.) containing name+args
+    const anyFenced = text.match(/```\w*\s*\n?([\s\S]*?)```/);
+    if (anyFenced?.[1]) {
+        const call = tryParseCall(anyFenced[1]);
+        if (call)
+            return call;
+    }
+    // 7. Trailing JSON object with "name" and "args"
+    const trailingJson = text.match(/(\{"name"\s*:\s*"[^"]+"\s*,\s*"args"\s*:\s*\{[\s\S]*?\}\s*\})\s*$/);
+    if (trailingJson?.[1]) {
+        const call = tryParseCall(trailingJson[1]);
+        if (call)
+            return call;
+    }
     return undefined;
 }
 /** Extract the text before the tool call block for display purposes */
@@ -130,32 +175,31 @@ function formatToolArgs(call) {
     return JSON.stringify(call.args);
 }
 function isAbortError(error, signal) {
-    return Boolean(signal?.aborted) || (error instanceof Error && error.name === "AbortError");
+    return (Boolean(signal?.aborted) ||
+        (error instanceof Error && error.name === "AbortError"));
+}
+function safeArtifactName(name) {
+    return (name.replace(/[^a-z0-9_.-]+/gi, "-").replace(/^-+|-+$/g, "") ||
+        "tool-output");
 }
 async function saveToolOutput(call, output) {
     if (!output.trim())
         return undefined;
-    return writeArtifact(call.name, output);
+    const dir = join(homedir(), ".clai", "outputs");
+    await mkdir(dir, { recursive: true });
+    const stamp = new Date().toISOString().replace(/[:.]/g, "-");
+    const path = join(dir, `${stamp}-${safeArtifactName(call.name)}.txt`);
+    await writeFile(path, `${output}\n`, "utf8");
+    return path;
 }
 function summarizeOutput(output, maxChars = 8_000) {
     if (output.length <= maxChars)
         return { text: output, truncated: false };
     const lines = output.split(/\r?\n/);
-    const signalLines = lines.filter((line) => /\b(open|vulnerable|critical|high|medium|found|success|injectable|CVE-\d{4}-\d+|200|201|204|301|302|307|308|401|403|500|error|failed)\b/i.test(line));
     const head = [];
     const tail = [];
     let used = 0;
-    const signalBudget = Math.floor(maxChars * 0.45);
-    const half = Math.floor((maxChars - signalBudget) / 2);
-    const signals = [];
-    for (const line of signalLines) {
-        const cost = line.length + 1;
-        if (used + cost > signalBudget)
-            break;
-        signals.push(line);
-        used += cost;
-    }
-    used = 0;
+    const half = Math.floor(maxChars / 2);
     for (const line of lines) {
         const cost = line.length + 1;
         if (used + cost > half)
@@ -175,40 +219,71 @@ function summarizeOutput(output, maxChars = 8_000) {
     return {
         text: [
             ...head,
-            ...(signals.length > 0
-                ? [`... high-signal lines from omitted output ...`, ...signals]
-                : []),
             `... (${lines.length.toLocaleString()} output lines truncated) ...`,
             ...tail,
         ].join("\n"),
         truncated: true,
     };
 }
-function formatToolContext(result) {
-    const output = (result.modelContext ?? result.summary ?? result.output).trim();
-    const summary = summarizeOutput(output, 8_000);
-    const saved = result.outputPath ? `\nFull output saved to: ${result.outputPath}` : "";
+function formatToolContext(call, result) {
+    const output = result.output.trim();
+    if (!output)
+        return "";
+    let reduced;
+    try {
+        const command = call.name === "shell.exec" ? String(call.args.command ?? "") : call.name;
+        const policy = reduceToolOutput(output, {
+            toolName: call.name,
+            command,
+        });
+        reduced = policy.summary.trim();
+    }
+    catch {
+        reduced = undefined;
+    }
+    // Hard cap on the reduced text — reducers should already be small, but
+    // never let one accidentally explode model context.
+    const base = reduced && reduced.length > 0 ? reduced : output;
+    const summary = summarizeOutput(base, 8_000);
+    const saved = result.outputPath
+        ? `\nFull output saved to: ${result.outputPath}`
+        : "";
     return `${summary.text}${saved}`.trim();
 }
-async function ensurePentestAuthorization(call, autoConfirm) {
-    const config = getConfig();
-    if (!isPentestToolCall(call) || config.pentestAuthorized)
+async function ensurePentestAuthorization(call, autoConfirm, session) {
+    if (!isPentestToolCall(call))
+        return true;
+    // Persistent auth (via `clai authorize-pentest AGREE`) wins.
+    if (getConfig().pentestAuthorized)
+        return true;
+    // Session auth flipped earlier in this session — no re-prompt.
+    if (session.pentestAuthorized.value)
         return true;
     if (autoConfirm) {
+        // -y is session-scoped only. We do NOT touch the persistent config so
+        // a one-shot `-y` cannot silently authorize later interactive runs.
+        session.pentestAuthorized.value = true;
         return true;
     }
     const ok = await confirm({
-        message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm?"),
+        message: chalk.red("clai only assists with security testing on systems you own or have written permission to test. Confirm for this session?"),
         default: false,
     });
     if (!ok)
         return false;
-    updateConfig({ pentestAuthorized: true });
+    session.pentestAuthorized.value = true;
     return true;
 }
-async function confirmToolExecution(call, autoConfirm) {
+async function confirmToolExecution(call, autoConfirm, session) {
     const config = getConfig();
-    if (autoConfirm || config.allowAlwaysTools.includes(call.name))
+    if (autoConfirm)
+        return true;
+    if (session.allow.has(call.name))
+        return true;
+    // Persistent allowlist kept for backwards compat with users who set it
+    // through `clai config` directly, but `/allow` only mutates the session
+    // set so authorizations never leak across processes.
+    if (config.allowAlwaysTools.includes(call.name))
         return true;
     return confirm({
         message: chalk.yellow(`  run ${call.name}: ${formatToolArgs(call)}?`),
@@ -221,7 +296,7 @@ export async function runAgentLoop(prompt, options = {}) {
     const projectContext = await loadProjectContext();
     const systemPrompt = renderAgentSystemPrompt(availableToolNames().join(", "));
     const fullSystemPrompt = projectContext
-        ? `${systemPrompt}\n\n${wrapUntrustedContext("Project context from .clai/context.md", projectContext)}`
+        ? `${systemPrompt}\n\nProject context from .clai/context.md:\n${projectContext}`
         : systemPrompt;
     const messages = [
         { role: "system", content: fullSystemPrompt },
@@ -232,6 +307,7 @@ export async function runAgentLoop(prompt, options = {}) {
     await ensureProviderConfigured(provider);
     let model = options.model ?? config.defaultModel;
     let lastAnswer = "";
+    const session = options.session ?? createSessionPolicy();
     for (let step = 0; step < maxSteps; step += 1) {
         options.signal?.throwIfAborted();
         // Buffer LLM output so tool JSON and hidden thinking are not printed raw.
@@ -245,7 +321,7 @@ export async function runAgentLoop(prompt, options = {}) {
             completion = await streamWithProvider({
                 provider,
                 model,
-                messages: compactMessagesForModel(messages),
+                messages,
                 temperature: 0.2,
                 // Reasoning models can spend a lot on hidden thinking; give
                 // them headroom so the visible answer / tool call isn't
@@ -283,7 +359,9 @@ export async function runAgentLoop(prompt, options = {}) {
         provider = completion.provider;
         model = completion.model;
         const assistantText = rememberThinkingFromText(completion.text);
-        const call = parseToolCall(assistantText.visible);
+        const call = parseToolCall(assistantText.visible, {
+            strict: getConfig().parserStrict,
+        });
         if (!call) {
             // Detect the case where the model emitted sentinel-style tool-call
             // markers but the body was malformed or truncated. Printing those
@@ -309,10 +387,6 @@ export async function runAgentLoop(prompt, options = {}) {
                 if (!cleaned.endsWith("\n"))
                     process.stdout.write("\n");
             }
-            updateLastToolSummary(cleaned);
-            if (hasToolOutputSnapshot()) {
-                process.stdout.write(`${renderToolOutputHint()}\n`);
-            }
             if (assistantText.hasThinking) {
                 process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
             }
@@ -329,10 +403,17 @@ export async function runAgentLoop(prompt, options = {}) {
             process.stdout.write(`${renderThinkingSummary(assistantText.thinkContent)}\n`);
         }
         messages.push({ role: "assistant", content: assistantText.visible });
-        const decision = classifyToolCall(call);
-        await auditLog("tool.classified", { call, decision });
+        const scope = await loadScope();
+        const decision = classifyToolCall(call, { scope });
+        await auditLog("tool.classified", {
+            call,
+            decision,
+            scope: isScopeActive(scope) ? scope.name ?? "(unnamed)" : "(none)",
+        });
         // Show tool call
-        process.stdout.write(chalk.cyan(`  ▶ ${call.name}`) + chalk.gray(` ${formatToolArgs(call)}`) + "\n");
+        process.stdout.write(chalk.cyan(`  ▶ ${call.name}`) +
+            chalk.gray(` ${formatToolArgs(call)}`) +
+            "\n");
         if (decision.level === "block") {
             process.stdout.write(chalk.red(`  ✗ blocked: ${decision.reason}`) + "\n");
             lastAnswer = `Blocked: ${call.name} — ${decision.reason}`;
@@ -340,8 +421,10 @@ export async function runAgentLoop(prompt, options = {}) {
         }
         // Pentest authorization — if user confirms this, skip the per-tool confirm
         let pentestJustConfirmed = false;
-        const needsPentestAuth = isPentestToolCall(call) && !getConfig().pentestAuthorized;
-        const authorized = await ensurePentestAuthorization(call, Boolean(options.autoConfirm));
+        const needsPentestAuth = isPentestToolCall(call) &&
+            !getConfig().pentestAuthorized &&
+            !session.pentestAuthorized.value;
+        const authorized = await ensurePentestAuthorization(call, Boolean(options.autoConfirm), session);
         if (!authorized) {
             lastAnswer = "Pentest authorization not confirmed.";
             process.stdout.write(chalk.red(`  ✗ ${lastAnswer}`) + "\n");
@@ -352,7 +435,7 @@ export async function runAgentLoop(prompt, options = {}) {
         }
         // Confirm if needed (safe tools auto-execute, pentest-auth'd tools skip)
         if (decision.level === "confirm" && !pentestJustConfirmed) {
-            const ok = await confirmToolExecution(call, Boolean(options.autoConfirm));
+            const ok = await confirmToolExecution(call, Boolean(options.autoConfirm), session);
             if (!ok) {
                 lastAnswer = "Cancelled.";
                 process.stdout.write(chalk.yellow(`  ✗ cancelled`) + "\n");
@@ -366,12 +449,13 @@ export async function runAgentLoop(prompt, options = {}) {
         let liveBytes = 0;
         const liveCap = 16_000; // Stop streaming after this many bytes to avoid flooding the terminal.
         let liveTruncatedNotified = false;
-        const livePane = createToolLivePane(formatToolArgs(call));
         const printLive = (chunk) => {
             // Suppress live preview for fs.read / fs.list — those are read-only
             // and the final summary is already concise. Stream shell-style tools
             // (shell.exec, net.scan, pentest.recon, pkg.install).
-            if (call.name === "fs.read" || call.name === "fs.list" || call.name === "fs.search")
+            if (call.name === "fs.read" ||
+                call.name === "fs.list" ||
+                call.name === "fs.search")
                 return;
             if (liveBytes >= liveCap) {
                 if (!liveTruncatedNotified) {
@@ -383,7 +467,11 @@ export async function runAgentLoop(prompt, options = {}) {
             const remaining = liveCap - liveBytes;
             const slice = chunk.length > remaining ? chunk.slice(0, remaining) : chunk;
             liveBytes += slice.length;
-            livePane.append(slice);
+            // Indent each line so live output lines up under the tool call.
+            const indented = slice.replace(/\r/g, "").replace(/\n(?!$)/g, "\n  ");
+            process.stdout.write(chalk.dim(indented.startsWith("\n")
+                ? indented
+                : `  ${indented}`.replace(/^  /, "  ")));
         };
         try {
             result = await runToolCall(call, {
@@ -395,10 +483,10 @@ export async function runAgentLoop(prompt, options = {}) {
                 },
             });
             // Newline separator if live output didn't already end with one.
-            livePane.finish();
+            if (liveBytes > 0)
+                process.stdout.write("\n");
         }
         catch (toolError) {
-            livePane.finish();
             if (isAbortError(toolError, options.signal)) {
                 lastAnswer = "Aborted.";
                 process.stdout.write(chalk.yellow("  ⏹ Aborted.\n"));
@@ -409,26 +497,18 @@ export async function runAgentLoop(prompt, options = {}) {
         }
         const output = result.output.trim();
         const displayMax = 6_000;
-        const savedOutputPath = result.outputPath ?? (output.length > displayMax
-            ? await saveToolOutput(call, output)
-            : undefined);
+        // If the tool already produced an artifact (shell.exec now streams to one
+        // as it runs), respect that path. Otherwise, fall back to the post-hoc
+        // save for tools that return their full output in memory.
+        const savedOutputPath = result.outputPath ??
+            (output.length > displayMax
+                ? await saveToolOutput(call, output)
+                : undefined);
         const resultWithArtifact = {
             ...result,
             outputPath: savedOutputPath,
-            truncated: result.truncated || Boolean(savedOutputPath),
-            artifacts: result.artifacts ?? (savedOutputPath
-                ? [{ path: savedOutputPath, kind: "raw", redacted: true }]
-                : undefined),
+            truncated: result.truncated ?? Boolean(savedOutputPath),
         };
-        if (output || savedOutputPath) {
-            rememberToolOutput({
-                id: `${Date.now()}-${step}`,
-                label: `${call.name} ${formatToolArgs(call)}`.trim(),
-                artifactPath: savedOutputPath,
-                fullText: savedOutputPath ? undefined : output,
-                summary: result.summary ?? result.modelContext,
-            });
-        }
         options.onToolResult?.(call, resultWithArtifact);
         await auditLog("tool.result", {
             call,
@@ -446,7 +526,7 @@ export async function runAgentLoop(prompt, options = {}) {
                 : displaySummary.text;
             // If we already streamed live output for this call, skip re-printing
             // the same bytes. Just note where the full output lives if it was saved.
-            if (liveBytes > 0 && process.stdout.isTTY) {
+            if (liveBytes > 0) {
                 if (savedOutputPath) {
                     process.stdout.write(chalk.dim(`  full output saved to ${savedOutputPath}\n`));
                 }
@@ -460,11 +540,34 @@ export async function runAgentLoop(prompt, options = {}) {
             process.stdout.write(chalk.yellow("  ⏹ Aborted.\n"));
             return lastAnswer;
         }
-        const contextOutput = formatToolContext(resultWithArtifact);
+        const contextOutput = formatToolContext(call, resultWithArtifact);
+        // Register a collapse/expand viewport so the user can pull the full raw
+        // output back with Ctrl+O or `/output last` after the AI summary lands.
+        if (output) {
+            const viewport = registerViewport({
+                toolName: call.name,
+                argsDisplay: formatToolArgs(call),
+                artifactPath: savedOutputPath,
+                summary: contextOutput,
+            });
+            process.stdout.write(`${formatViewportHint(viewport)}\n`);
+        }
         messages.push({
             role: "tool",
             content: `Tool ${call.name} result (exit=${result.exitCode ?? 0}, ok=${result.ok}):\n${contextOutput}`,
         });
+        // Compact older messages when the running estimate exceeds budget so
+        // free-tier context windows are not blown by long pentest sessions.
+        if (estimateMessagesTokens(messages) > 24_000) {
+            const compacted = compactMessages(messages);
+            if (compacted.length < messages.length) {
+                messages.splice(0, messages.length, ...compacted);
+                await auditLog("agent.compact", {
+                    newLength: messages.length,
+                    estimatedTokens: estimateMessagesTokens(messages),
+                });
+            }
+        }
     }
     lastAnswer = `Stopped after ${maxSteps} steps.`;
     process.stdout.write(chalk.yellow(lastAnswer) + "\n");