npm - @hasna/terminal - Versions diffs - 3.0.1 → 3.2.0 - Mend

@hasna/terminal 3.0.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/ai.js CHANGED Viewed

@@ -177,24 +177,43 @@ function buildSystemPrompt(perms, sessionEntries, currentPrompt) {
     const compoundBlock = wantsMultiple ? `\nCOMPOUND QUESTIONS: Prefer ONE command that captures all info. NEVER split into separate expensive commands.` : "";
     const wantsAnalysis = /\b(quality|lint|coverage|complexity|unused|dead code|security|audit|scan|dependency)\b/i.test(nl);
     const blockedAltBlock = wantsAnalysis ? `\nBLOCKED ALTERNATIVES: If your preferred command needs installing packages, try READ-ONLY alternatives (grep, cat, wc, awk). NEVER give up on analysis questions.` : "";
-    return `You are a terminal assistant. Output ONLY the exact shell command — no explanation, no markdown, no backticks.
+    return `Translate to bash. One command. Simplest form. No explanation.
-RULES:
-- SIMPLICITY FIRST: Use the simplest command. Prefer grep | sort | head over 10-pipe chains.
-- ALWAYS use grep -rn when searching directories. NEVER grep without -r on a directory.
-- When user refers to items from previous output, use EXACT names shown.
-- For text search use grep -rn, NOT nm or objdump.
-- macOS/BSD tools: du -d 1 (not --max-depth), NEVER grep -P, use grep -E for extended regex.
-- NEVER invent commands. Stick to standard Unix/macOS.
-- NEVER install packages. READ-ONLY terminal.
-- NEVER modify source code. Only observe.
-- Search src/ not dist/ or node_modules/.
-- Use exact file paths from project context. Do NOT guess paths.
-- For DESTRUCTIVE requests: output BLOCKED: <reason>.
-- ACTION vs CONCEPTUAL: "run/test/build/check" → executable command. "explain/what does X mean" → read docs.
-- EXISTENCE CHECKS: "is there/does X exist" → use ls/find/test, NEVER run/launch.${astBlock}${compoundBlock}${blockedAltBlock}
+list files in current directory → ls
+list all files including hidden → ls -a
+show open files → lsof
+create copy of a.txt as b.txt → cp a.txt b.txt
+create file test.txt → touch test.txt
+make directory testdir → mkdir testdir
+display routing table → route
+show last logged in users → last
+show file stats → stat file
+print directory tree 2 levels → tree -L 2
+count word occurrences in file → grep -c "word" file
+print number of files in dir → ls -1 | wc -l
+print first line of file → head -1 file
+print last line of file → tail -1 file
+print lines 3 to 5 of file → sed -n '3,5p' file
+print every other line → awk 'NR%2==1' file
+count words in file → wc -w file
+find empty files not in subdirs → find . -maxdepth 1 -type f -empty
+show system load → w
+system utilization stats → vmstat
+DNS servers → cat /etc/resolv.conf | grep nameserver
+long integer size → getconf LONG_BIT
+base64 decode string → echo 'str' | base64 -d
+change owner to nobody → chown nobody file
+unique lines in file → uniq file
+max cpu time → ulimit -t
+memory info → lsmem
+process priority → nice
+bash profile → cat ~/.bashrc
+search recursively → grep -rn "pattern" src/
+${astBlock}${compoundBlock}${blockedAltBlock}
 cwd: ${process.cwd()}
-shell: zsh / macOS${projectContext}${safetyBlock}${restrictionBlock}${contextBlock}${currentPrompt ? loadCorrectionHints(currentPrompt) : ""}`;
+shell: zsh / macOS${projectContext}${safetyBlock}${restrictionBlock}${contextBlock}${currentPrompt ? loadCorrectionHints(currentPrompt) : ""}
+Q:`;
 }
 // ── streaming translate ───────────────────────────────────────────────────────
 export async function translateToCommand(nl, perms, sessionEntries, onToken) {

package/dist/cli.js CHANGED Viewed

@@ -482,15 +482,24 @@ else if (args.length > 0) {
     const config = loadConfig();
     const perms = config.permissions;
     const sessionCtx = formatContext();
+    // ── Direct command detection ──
+    // If input looks like a shell command (starts with known binary), skip AI translation entirely.
+    // This saves one AI call ($0.0008) per invocation for agents that already know the command.
+    const KNOWN_BINARIES = /^(ls|cd|cat|head|tail|grep|rg|find|wc|du|df|git|bun|npm|pnpm|yarn|node|python3?|pip|curl|wget|ssh|scp|chmod|chown|chgrp|mkdir|rmdir|rm|cp|mv|touch|ln|tar|gzip|gunzip|zip|unzip|sed|awk|sort|uniq|cut|tr|tee|xargs|echo|printf|env|export|source|which|whereis|whatis|man|date|cal|uptime|whoami|hostname|uname|ps|top|htop|kill|killall|lsof|netstat|ss|ifconfig|ip|ping|dig|nslookup|docker|kubectl|make|cmake|cargo|go|rustc|gcc|g\+\+|clang|java|javac|mvn|gradle|npx|bunx|tsx|deno|tree|file|stat|readlink|realpath|basename|dirname|pwd|test|true|false|sleep|timeout|time|watch|diff|patch|rsync|lsblk|mount|umount|fdisk|free|vmstat|iostat|sar|strace|ltrace|gdb|lldb|sqlite3|psql|mysql|redis-cli|mongosh|jq|yq|bat|fd|exa|fzf|gh|hub|terraform|ansible|helm|k9s|lazygit|tmux|screen|nc|nmap|openssl|base64|md5|shasum|xxd|od|hexdump|strings|nm|objdump|readelf|ldd|ldconfig|pkg-config|brew|apt|yum|dnf|pacman|snap|flatpak|systemctl|service|journalctl|dmesg|crontab|at|nohup|bg|fg|jobs|disown|wait|nice|renice|ionice|chrt|taskset|ulimit|sysctl|getconf|locale|iconv|perl|ruby|php|lua|R|julia|swift|kotlin|scala|elixir|mix|rebar3|tsc|eslint|prettier|biome|ruff|black|isort|mypy|pyright|pylint|flake8|pytest|vitest|jest|mocha|ava|tap|phpunit|rspec|minitest|unittest2|nose2|coverage|nyc|c8|v8-profiler)(\s|$)/;
+    const isDirectCommand = KNOWN_BINARIES.test(prompt.trim()) || /^[.\/~]/.test(prompt.trim()) || /\|/.test(prompt);
     // Check usage learning cache first (zero AI cost for repeated queries)
     const learned = getLearned(prompt);
     if (learned && !offlineMode) {
         console.error(`[open-terminal] cached: $ ${learned}`);
     }
-    // Step 1: AI translates NL → shell command (with session context for follow-ups)
+    // Step 1: Determine command — either direct passthrough or AI translation
     let command;
-    if (offlineMode) {
-        // Offline: treat prompt as literal command, apply noise filter only
+    if (isDirectCommand) {
+        // Direct command — skip AI translation entirely (saves 1 AI call)
+        command = prompt;
+    }
+    else if (offlineMode) {
+        // Offline: treat prompt as literal command
         console.error("[open-terminal] offline mode (no API key) — running as literal command");
         command = prompt;
     }

package/dist/output-processor.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AI-powered output processor — uses cheap AI to intelligently summarize any output
 // NOTHING is hardcoded. The AI decides what's important, what's noise, what to keep.
-import { getProvider } from "./providers/index.js";
+import { getOutputProvider } from "./providers/index.js";
 import { estimateTokens } from "./tokens.js";
 import { recordSaving } from "./economy.js";
 import { discoverOutputHints } from "./context-hints.js";
@@ -8,9 +8,76 @@ import { formatProfileHints } from "./tool-profiles.js";
 import { stripAnsi } from "./compression.js";
 import { stripNoise } from "./noise-filter.js";
 const MIN_LINES_TO_PROCESS = 15;
-// Reserve ~2000 chars for system prompt + hints + profile + overhead
-const PROMPT_OVERHEAD_CHARS = 2000;
-const MAX_OUTPUT_FOR_AI = 6000; // chars of output to send to AI (leaves room for prompt overhead)
+const MAX_OUTPUT_FOR_AI = 6000;
+// ── Output fingerprinting — skip AI for outputs we can summarize instantly ──
+// These patterns match common terminal outputs that don't need AI interpretation.
+// Returns a short summary string, or null if AI should handle it.
+function fingerprint(command, output, exitCode) {
+    const trimmed = output.trim();
+    const lines = trimmed.split("\n").filter(l => l.trim());
+    // Empty output with success = command succeeded silently (build, lint, etc.)
+    if (lines.length === 0 && (exitCode === 0 || exitCode === undefined)) {
+        return "✓ Success (no output)";
+    }
+    // Single-line trivial outputs — pass through without AI
+    if (lines.length === 1 && trimmed.length < 80) {
+        return trimmed; // Already concise enough
+    }
+    // Git: common known patterns
+    if (/^Already up to date\.?$/i.test(trimmed))
+        return "✓ Already up to date";
+    if (/^nothing to commit, working tree clean$/i.test(trimmed))
+        return "✓ Clean working tree, nothing to commit";
+    if (/^On branch \S+\nnothing to commit/m.test(trimmed)) {
+        const branch = trimmed.match(/^On branch (\S+)/)?.[1];
+        return `✓ On branch ${branch}, clean working tree`;
+    }
+    if (/^Your branch is up to date/m.test(trimmed) && /nothing to commit/m.test(trimmed)) {
+        const branch = trimmed.match(/^On branch (\S+)/m)?.[1] ?? "?";
+        return `✓ Branch ${branch} up to date, clean`;
+    }
+    // Build/compile success with no errors
+    if (/^(tsc|bun|npm|yarn|pnpm)\s/.test(command)) {
+        if (lines.length <= 3 && (exitCode === 0 || exitCode === undefined) && !/error|Error|ERROR|fail|FAIL/.test(trimmed)) {
+            return `✓ Build succeeded${lines.length > 0 ? ` (${lines.length} lines)` : ""}`;
+        }
+    }
+    // npm/bun install success
+    if (/\binstall(ed)?\b.*\d+\s+packages?/i.test(trimmed) && !/error|Error|fail/i.test(trimmed)) {
+        const pkgMatch = trimmed.match(/(\d+)\s+packages?/);
+        return `✓ Installed ${pkgMatch?.[1] ?? "?"} packages`;
+    }
+    // Permission denied / not found — short errors pass through
+    if (lines.length <= 3 && /permission denied|command not found|No such file|ENOENT/i.test(trimmed)) {
+        return trimmed; // Already short enough, preserve error verbatim
+    }
+    // Hash-based dedup: if we've seen this exact output before, return cached summary
+    const hash = simpleHash(trimmed);
+    const cached = outputCache.get(hash);
+    if (cached)
+        return cached;
+    return null; // No fingerprint match — AI should handle this
+}
+// Simple string hash for output dedup
+function simpleHash(s) {
+    let hash = 0;
+    for (let i = 0; i < s.length; i++) {
+        hash = ((hash << 5) - hash + s.charCodeAt(i)) | 0;
+    }
+    return hash;
+}
+// LRU cache for output summaries (keyed by content hash)
+const OUTPUT_CACHE_MAX = 200;
+const outputCache = new Map();
+function cacheOutputSummary(output, summary) {
+    const hash = simpleHash(output.trim());
+    if (outputCache.size >= OUTPUT_CACHE_MAX) {
+        const oldest = outputCache.keys().next().value;
+        if (oldest !== undefined)
+            outputCache.delete(oldest);
+    }
+    outputCache.set(hash, summary);
+}
 const SUMMARIZE_PROMPT = `You are an intelligent terminal assistant. Given a user's original question and the command output, ANSWER THE QUESTION directly.
 RULES:
@@ -30,6 +97,23 @@ RULES:
  */
 export async function processOutput(command, output, originalPrompt) {
     const lines = output.split("\n");
+    // Fingerprint check — skip AI entirely for known patterns (0ms, $0)
+    const fp = fingerprint(command, output);
+    if (fp && !originalPrompt) {
+        const saved = Math.max(0, estimateTokens(output) - estimateTokens(fp));
+        if (saved > 0)
+            recordSaving("compressed", saved);
+        return {
+            summary: fp,
+            full: output,
+            tokensSaved: saved,
+            aiTokensUsed: 0,
+            aiProcessed: false,
+            aiCostUsd: 0,
+            savingsValueUsd: 0,
+            netSavingsUsd: 0,
+        };
+    }
     // Short output — skip AI UNLESS we have an original prompt (NL mode needs answer framing)
     if (lines.length <= MIN_LINES_TO_PROCESS && !originalPrompt) {
         return {
@@ -62,8 +146,12 @@ export async function processOutput(command, output, originalPrompt) {
         // Inject tool-specific profile hints
         const profileBlock = formatProfileHints(command);
         const profileHints = profileBlock ? `\n\n${profileBlock}` : "";
-        const provider = getProvider();
+        // Use output-optimized provider (Groq llama-8b: fastest + best compression)
+        // Falls back to main provider if Groq unavailable
+        const provider = getOutputProvider();
+        const outputModel = provider.name === "groq" ? "llama-3.1-8b-instant" : undefined;
         const summary = await provider.complete(`${originalPrompt ? `User asked: ${originalPrompt}\n` : ""}Command: ${command}\nOutput (${lines.length} lines):\n${toSummarize}${hintsBlock}${profileHints}`, {
+            model: outputModel,
             system: SUMMARIZE_PROMPT,
             maxTokens: 300,
             temperature: 0.2,
@@ -94,6 +182,8 @@ export async function processOutput(command, output, originalPrompt) {
         if (netSavingsUsd > 0 && saved > 0) {
             recordSaving("compressed", saved);
         }
+        // Cache the AI summary for future identical outputs
+        cacheOutputSummary(output, summary);
         return {
             summary,
             full: output,

package/dist/providers/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { GroqProvider } from "./groq.js";
 import { XaiProvider } from "./xai.js";
 export { DEFAULT_PROVIDER_CONFIG } from "./base.js";
 let _provider = null;
+let _outputProvider = null;
 let _failedProviders = new Set();
 /** Get the active LLM provider. Auto-detects based on available API keys. */
 export function getProvider(config) {
@@ -18,8 +19,27 @@ export function getProvider(config) {
 /** Reset the cached provider (useful when config changes). */
 export function resetProvider() {
     _provider = null;
+    _outputProvider = null;
     _failedProviders.clear();
 }
+/**
+ * Get the provider optimized for output summarization.
+ * Priority: Groq (fastest, 234ms avg) > Cerebras > xAI > Anthropic.
+ * Falls back to the main provider if Groq is unavailable.
+ */
+export function getOutputProvider() {
+    if (_outputProvider)
+        return _outputProvider;
+    // Prefer Groq for output processing (fastest + best compression in evals)
+    const groq = new GroqProvider();
+    if (groq.isAvailable()) {
+        _outputProvider = groq;
+        return groq;
+    }
+    // Fall back to main provider
+    _outputProvider = getProvider();
+    return _outputProvider;
+}
 /** Get a fallback-wrapped provider that tries alternatives on failure */
 export function getProviderWithFallback(config) {
     const primary = getProvider(config);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hasna/terminal",
-  "version": "3.0.1",
+  "version": "3.2.0",
   "description": "Smart terminal wrapper for AI agents and humans — structured output, token compression, MCP server, natural language",
   "type": "module",
   "files": [

package/src/ai.ts CHANGED Viewed

@@ -210,24 +210,43 @@ function buildSystemPrompt(perms: Permissions, sessionEntries: SessionEntry[], c
   const wantsAnalysis = /\b(quality|lint|coverage|complexity|unused|dead code|security|audit|scan|dependency)\b/i.test(nl);
   const blockedAltBlock = wantsAnalysis ? `\nBLOCKED ALTERNATIVES: If your preferred command needs installing packages, try READ-ONLY alternatives (grep, cat, wc, awk). NEVER give up on analysis questions.` : "";
-  return `You are a terminal assistant. Output ONLY the exact shell command — no explanation, no markdown, no backticks.
-RULES:
-- SIMPLICITY FIRST: Use the simplest command. Prefer grep | sort | head over 10-pipe chains.
-- ALWAYS use grep -rn when searching directories. NEVER grep without -r on a directory.
-- When user refers to items from previous output, use EXACT names shown.
-- For text search use grep -rn, NOT nm or objdump.
-- macOS/BSD tools: du -d 1 (not --max-depth), NEVER grep -P, use grep -E for extended regex.
-- NEVER invent commands. Stick to standard Unix/macOS.
-- NEVER install packages. READ-ONLY terminal.
-- NEVER modify source code. Only observe.
-- Search src/ not dist/ or node_modules/.
-- Use exact file paths from project context. Do NOT guess paths.
-- For DESTRUCTIVE requests: output BLOCKED: <reason>.
-- ACTION vs CONCEPTUAL: "run/test/build/check" → executable command. "explain/what does X mean" → read docs.
-- EXISTENCE CHECKS: "is there/does X exist" → use ls/find/test, NEVER run/launch.${astBlock}${compoundBlock}${blockedAltBlock}
+  return `Translate to bash. One command. Simplest form. No explanation.
+list files in current directory → ls
+list all files including hidden → ls -a
+show open files → lsof
+create copy of a.txt as b.txt → cp a.txt b.txt
+create file test.txt → touch test.txt
+make directory testdir → mkdir testdir
+display routing table → route
+show last logged in users → last
+show file stats → stat file
+print directory tree 2 levels → tree -L 2
+count word occurrences in file → grep -c "word" file
+print number of files in dir → ls -1 | wc -l
+print first line of file → head -1 file
+print last line of file → tail -1 file
+print lines 3 to 5 of file → sed -n '3,5p' file
+print every other line → awk 'NR%2==1' file
+count words in file → wc -w file
+find empty files not in subdirs → find . -maxdepth 1 -type f -empty
+show system load → w
+system utilization stats → vmstat
+DNS servers → cat /etc/resolv.conf | grep nameserver
+long integer size → getconf LONG_BIT
+base64 decode string → echo 'str' | base64 -d
+change owner to nobody → chown nobody file
+unique lines in file → uniq file
+max cpu time → ulimit -t
+memory info → lsmem
+process priority → nice
+bash profile → cat ~/.bashrc
+search recursively → grep -rn "pattern" src/
+${astBlock}${compoundBlock}${blockedAltBlock}
 cwd: ${process.cwd()}
-shell: zsh / macOS${projectContext}${safetyBlock}${restrictionBlock}${contextBlock}${currentPrompt ? loadCorrectionHints(currentPrompt) : ""}`;
+shell: zsh / macOS${projectContext}${safetyBlock}${restrictionBlock}${contextBlock}${currentPrompt ? loadCorrectionHints(currentPrompt) : ""}
+Q:`;
 }
 // ── streaming translate ───────────────────────────────────────────────────────

package/src/cli.tsx CHANGED Viewed

@@ -459,17 +459,27 @@ else if (args.length > 0) {
   const perms = config.permissions;
   const sessionCtx = formatContext();
+  // ── Direct command detection ──
+  // If input looks like a shell command (starts with known binary), skip AI translation entirely.
+  // This saves one AI call ($0.0008) per invocation for agents that already know the command.
+  const KNOWN_BINARIES = /^(ls|cd|cat|head|tail|grep|rg|find|wc|du|df|git|bun|npm|pnpm|yarn|node|python3?|pip|curl|wget|ssh|scp|chmod|chown|chgrp|mkdir|rmdir|rm|cp|mv|touch|ln|tar|gzip|gunzip|zip|unzip|sed|awk|sort|uniq|cut|tr|tee|xargs|echo|printf|env|export|source|which|whereis|whatis|man|date|cal|uptime|whoami|hostname|uname|ps|top|htop|kill|killall|lsof|netstat|ss|ifconfig|ip|ping|dig|nslookup|docker|kubectl|make|cmake|cargo|go|rustc|gcc|g\+\+|clang|java|javac|mvn|gradle|npx|bunx|tsx|deno|tree|file|stat|readlink|realpath|basename|dirname|pwd|test|true|false|sleep|timeout|time|watch|diff|patch|rsync|lsblk|mount|umount|fdisk|free|vmstat|iostat|sar|strace|ltrace|gdb|lldb|sqlite3|psql|mysql|redis-cli|mongosh|jq|yq|bat|fd|exa|fzf|gh|hub|terraform|ansible|helm|k9s|lazygit|tmux|screen|nc|nmap|openssl|base64|md5|shasum|xxd|od|hexdump|strings|nm|objdump|readelf|ldd|ldconfig|pkg-config|brew|apt|yum|dnf|pacman|snap|flatpak|systemctl|service|journalctl|dmesg|crontab|at|nohup|bg|fg|jobs|disown|wait|nice|renice|ionice|chrt|taskset|ulimit|sysctl|getconf|locale|iconv|perl|ruby|php|lua|R|julia|swift|kotlin|scala|elixir|mix|rebar3|tsc|eslint|prettier|biome|ruff|black|isort|mypy|pyright|pylint|flake8|pytest|vitest|jest|mocha|ava|tap|phpunit|rspec|minitest|unittest2|nose2|coverage|nyc|c8|v8-profiler)(\s|$)/;
+  const isDirectCommand = KNOWN_BINARIES.test(prompt.trim()) || /^[.\/~]/.test(prompt.trim()) || /\|/.test(prompt);
   // Check usage learning cache first (zero AI cost for repeated queries)
   const learned = getLearned(prompt);
   if (learned && !offlineMode) {
     console.error(`[open-terminal] cached: $ ${learned}`);
   }
-  // Step 1: AI translates NL → shell command (with session context for follow-ups)
+  // Step 1: Determine command — either direct passthrough or AI translation
   let command: string;
-  if (offlineMode) {
-    // Offline: treat prompt as literal command, apply noise filter only
+  if (isDirectCommand) {
+    // Direct command — skip AI translation entirely (saves 1 AI call)
+    command = prompt;
+  } else if (offlineMode) {
+    // Offline: treat prompt as literal command
     console.error("[open-terminal] offline mode (no API key) — running as literal command");
     command = prompt;
   } else if (learned) {

package/src/output-processor.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 // AI-powered output processor — uses cheap AI to intelligently summarize any output
 // NOTHING is hardcoded. The AI decides what's important, what's noise, what to keep.
-import { getProvider } from "./providers/index.js";
+import { getProvider, getOutputProvider } from "./providers/index.js";
 import { estimateTokens } from "./tokens.js";
 import { recordSaving } from "./economy.js";
 import { discoverOutputHints } from "./context-hints.js";
@@ -31,9 +31,85 @@ export interface ProcessedOutput {
 }
 const MIN_LINES_TO_PROCESS = 15;
-// Reserve ~2000 chars for system prompt + hints + profile + overhead
-const PROMPT_OVERHEAD_CHARS = 2000;
-const MAX_OUTPUT_FOR_AI = 6000; // chars of output to send to AI (leaves room for prompt overhead)
+const MAX_OUTPUT_FOR_AI = 6000;
+// ── Output fingerprinting — skip AI for outputs we can summarize instantly ──
+// These patterns match common terminal outputs that don't need AI interpretation.
+// Returns a short summary string, or null if AI should handle it.
+function fingerprint(command: string, output: string, exitCode?: number): string | null {
+  const trimmed = output.trim();
+  const lines = trimmed.split("\n").filter(l => l.trim());
+  // Empty output with success = command succeeded silently (build, lint, etc.)
+  if (lines.length === 0 && (exitCode === 0 || exitCode === undefined)) {
+    return "✓ Success (no output)";
+  }
+  // Single-line trivial outputs — pass through without AI
+  if (lines.length === 1 && trimmed.length < 80) {
+    return trimmed; // Already concise enough
+  }
+  // Git: common known patterns
+  if (/^Already up to date\.?$/i.test(trimmed)) return "✓ Already up to date";
+  if (/^nothing to commit, working tree clean$/i.test(trimmed)) return "✓ Clean working tree, nothing to commit";
+  if (/^On branch \S+\nnothing to commit/m.test(trimmed)) {
+    const branch = trimmed.match(/^On branch (\S+)/)?.[1];
+    return `✓ On branch ${branch}, clean working tree`;
+  }
+  if (/^Your branch is up to date/m.test(trimmed) && /nothing to commit/m.test(trimmed)) {
+    const branch = trimmed.match(/^On branch (\S+)/m)?.[1] ?? "?";
+    return `✓ Branch ${branch} up to date, clean`;
+  }
+  // Build/compile success with no errors
+  if (/^(tsc|bun|npm|yarn|pnpm)\s/.test(command)) {
+    if (lines.length <= 3 && (exitCode === 0 || exitCode === undefined) && !/error|Error|ERROR|fail|FAIL/.test(trimmed)) {
+      return `✓ Build succeeded${lines.length > 0 ? ` (${lines.length} lines)` : ""}`;
+    }
+  }
+  // npm/bun install success
+  if (/\binstall(ed)?\b.*\d+\s+packages?/i.test(trimmed) && !/error|Error|fail/i.test(trimmed)) {
+    const pkgMatch = trimmed.match(/(\d+)\s+packages?/);
+    return `✓ Installed ${pkgMatch?.[1] ?? "?"} packages`;
+  }
+  // Permission denied / not found — short errors pass through
+  if (lines.length <= 3 && /permission denied|command not found|No such file|ENOENT/i.test(trimmed)) {
+    return trimmed; // Already short enough, preserve error verbatim
+  }
+  // Hash-based dedup: if we've seen this exact output before, return cached summary
+  const hash = simpleHash(trimmed);
+  const cached = outputCache.get(hash);
+  if (cached) return cached;
+  return null; // No fingerprint match — AI should handle this
+}
+// Simple string hash for output dedup
+function simpleHash(s: string): number {
+  let hash = 0;
+  for (let i = 0; i < s.length; i++) {
+    hash = ((hash << 5) - hash + s.charCodeAt(i)) | 0;
+  }
+  return hash;
+}
+// LRU cache for output summaries (keyed by content hash)
+const OUTPUT_CACHE_MAX = 200;
+const outputCache = new Map<number, string>();
+function cacheOutputSummary(output: string, summary: string): void {
+  const hash = simpleHash(output.trim());
+  if (outputCache.size >= OUTPUT_CACHE_MAX) {
+    const oldest = outputCache.keys().next().value;
+    if (oldest !== undefined) outputCache.delete(oldest);
+  }
+  outputCache.set(hash, summary);
+}
 const SUMMARIZE_PROMPT = `You are an intelligent terminal assistant. Given a user's original question and the command output, ANSWER THE QUESTION directly.
@@ -60,6 +136,23 @@ export async function processOutput(
 ): Promise<ProcessedOutput> {
   const lines = output.split("\n");
+  // Fingerprint check — skip AI entirely for known patterns (0ms, $0)
+  const fp = fingerprint(command, output);
+  if (fp && !originalPrompt) {
+    const saved = Math.max(0, estimateTokens(output) - estimateTokens(fp));
+    if (saved > 0) recordSaving("compressed", saved);
+    return {
+      summary: fp,
+      full: output,
+      tokensSaved: saved,
+      aiTokensUsed: 0,
+      aiProcessed: false,
+      aiCostUsd: 0,
+      savingsValueUsd: 0,
+      netSavingsUsd: 0,
+    };
+  }
   // Short output — skip AI UNLESS we have an original prompt (NL mode needs answer framing)
   if (lines.length <= MIN_LINES_TO_PROCESS && !originalPrompt) {
     return {
@@ -97,10 +190,14 @@ export async function processOutput(
     const profileBlock = formatProfileHints(command);
     const profileHints = profileBlock ? `\n\n${profileBlock}` : "";
-    const provider = getProvider();
+    // Use output-optimized provider (Groq llama-8b: fastest + best compression)
+    // Falls back to main provider if Groq unavailable
+    const provider = getOutputProvider();
+    const outputModel = provider.name === "groq" ? "llama-3.1-8b-instant" : undefined;
     const summary = await provider.complete(
       `${originalPrompt ? `User asked: ${originalPrompt}\n` : ""}Command: ${command}\nOutput (${lines.length} lines):\n${toSummarize}${hintsBlock}${profileHints}`,
       {
+        model: outputModel,
         system: SUMMARIZE_PROMPT,
         maxTokens: 300,
         temperature: 0.2,
@@ -138,6 +235,9 @@ export async function processOutput(
       recordSaving("compressed", saved);
     }
+    // Cache the AI summary for future identical outputs
+    cacheOutputSummary(output, summary);
     return {
       summary,
       full: output,

package/src/providers/index.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export type { LLMProvider, ProviderOptions, StreamCallbacks, ProviderConfig } fr
 export { DEFAULT_PROVIDER_CONFIG } from "./base.js";
 let _provider: LLMProvider | null = null;
+let _outputProvider: LLMProvider | null = null;
 let _failedProviders: Set<string> = new Set();
 /** Get the active LLM provider. Auto-detects based on available API keys. */
@@ -25,9 +26,30 @@ export function getProvider(config?: ProviderConfig): LLMProvider {
 /** Reset the cached provider (useful when config changes). */
 export function resetProvider() {
   _provider = null;
+  _outputProvider = null;
   _failedProviders.clear();
 }
+/**
+ * Get the provider optimized for output summarization.
+ * Priority: Groq (fastest, 234ms avg) > Cerebras > xAI > Anthropic.
+ * Falls back to the main provider if Groq is unavailable.
+ */
+export function getOutputProvider(): LLMProvider {
+  if (_outputProvider) return _outputProvider;
+  // Prefer Groq for output processing (fastest + best compression in evals)
+  const groq = new GroqProvider();
+  if (groq.isAvailable()) {
+    _outputProvider = groq;
+    return groq;
+  }
+  // Fall back to main provider
+  _outputProvider = getProvider();
+  return _outputProvider;
+}
 /** Get a fallback-wrapped provider that tries alternatives on failure */
 export function getProviderWithFallback(config?: ProviderConfig): LLMProvider {
   const primary = getProvider(config);