@hasna/terminal 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -482,15 +482,24 @@ else if (args.length > 0) {
482
482
  const config = loadConfig();
483
483
  const perms = config.permissions;
484
484
  const sessionCtx = formatContext();
485
+ // ── Direct command detection ──
486
+ // If input looks like a shell command (starts with known binary), skip AI translation entirely.
487
+ // This saves one AI call ($0.0008) per invocation for agents that already know the command.
488
+ const KNOWN_BINARIES = /^(ls|cd|cat|head|tail|grep|rg|find|wc|du|df|git|bun|npm|pnpm|yarn|node|python3?|pip|curl|wget|ssh|scp|chmod|chown|chgrp|mkdir|rmdir|rm|cp|mv|touch|ln|tar|gzip|gunzip|zip|unzip|sed|awk|sort|uniq|cut|tr|tee|xargs|echo|printf|env|export|source|which|whereis|whatis|man|date|cal|uptime|whoami|hostname|uname|ps|top|htop|kill|killall|lsof|netstat|ss|ifconfig|ip|ping|dig|nslookup|docker|kubectl|make|cmake|cargo|go|rustc|gcc|g\+\+|clang|java|javac|mvn|gradle|npx|bunx|tsx|deno|tree|file|stat|readlink|realpath|basename|dirname|pwd|test|true|false|sleep|timeout|time|watch|diff|patch|rsync|lsblk|mount|umount|fdisk|free|vmstat|iostat|sar|strace|ltrace|gdb|lldb|sqlite3|psql|mysql|redis-cli|mongosh|jq|yq|bat|fd|exa|fzf|gh|hub|terraform|ansible|helm|k9s|lazygit|tmux|screen|nc|nmap|openssl|base64|md5|shasum|xxd|od|hexdump|strings|nm|objdump|readelf|ldd|ldconfig|pkg-config|brew|apt|yum|dnf|pacman|snap|flatpak|systemctl|service|journalctl|dmesg|crontab|at|nohup|bg|fg|jobs|disown|wait|nice|renice|ionice|chrt|taskset|ulimit|sysctl|getconf|locale|iconv|perl|ruby|php|lua|R|julia|swift|kotlin|scala|elixir|mix|rebar3|tsc|eslint|prettier|biome|ruff|black|isort|mypy|pyright|pylint|flake8|pytest|vitest|jest|mocha|ava|tap|phpunit|rspec|minitest|unittest2|nose2|coverage|nyc|c8|v8-profiler)(\s|$)/;
489
+ const isDirectCommand = KNOWN_BINARIES.test(prompt.trim()) || /^[.\/~]/.test(prompt.trim()) || /\|/.test(prompt);
485
490
  // Check usage learning cache first (zero AI cost for repeated queries)
486
491
  const learned = getLearned(prompt);
487
492
  if (learned && !offlineMode) {
488
493
  console.error(`[open-terminal] cached: $ ${learned}`);
489
494
  }
490
- // Step 1: AI translates NL shell command (with session context for follow-ups)
495
+ // Step 1: Determine command either direct passthrough or AI translation
491
496
  let command;
492
- if (offlineMode) {
493
- // Offline: treat prompt as literal command, apply noise filter only
497
+ if (isDirectCommand) {
498
+ // Direct command skip AI translation entirely (saves 1 AI call)
499
+ command = prompt;
500
+ }
501
+ else if (offlineMode) {
502
+ // Offline: treat prompt as literal command
494
503
  console.error("[open-terminal] offline mode (no API key) — running as literal command");
495
504
  command = prompt;
496
505
  }
@@ -1,6 +1,6 @@
1
1
  // AI-powered output processor — uses cheap AI to intelligently summarize any output
2
2
  // NOTHING is hardcoded. The AI decides what's important, what's noise, what to keep.
3
- import { getProvider } from "./providers/index.js";
3
+ import { getOutputProvider } from "./providers/index.js";
4
4
  import { estimateTokens } from "./tokens.js";
5
5
  import { recordSaving } from "./economy.js";
6
6
  import { discoverOutputHints } from "./context-hints.js";
@@ -8,9 +8,76 @@ import { formatProfileHints } from "./tool-profiles.js";
8
8
  import { stripAnsi } from "./compression.js";
9
9
  import { stripNoise } from "./noise-filter.js";
10
10
  const MIN_LINES_TO_PROCESS = 15;
11
- // Reserve ~2000 chars for system prompt + hints + profile + overhead
12
- const PROMPT_OVERHEAD_CHARS = 2000;
13
- const MAX_OUTPUT_FOR_AI = 6000; // chars of output to send to AI (leaves room for prompt overhead)
11
+ const MAX_OUTPUT_FOR_AI = 6000;
12
+ // ── Output fingerprinting — skip AI for outputs we can summarize instantly ──
13
+ // These patterns match common terminal outputs that don't need AI interpretation.
14
+ // Returns a short summary string, or null if AI should handle it.
15
+ function fingerprint(command, output, exitCode) {
16
+ const trimmed = output.trim();
17
+ const lines = trimmed.split("\n").filter(l => l.trim());
18
+ // Empty output with success = command succeeded silently (build, lint, etc.)
19
+ if (lines.length === 0 && (exitCode === 0 || exitCode === undefined)) {
20
+ return "✓ Success (no output)";
21
+ }
22
+ // Single-line trivial outputs — pass through without AI
23
+ if (lines.length === 1 && trimmed.length < 80) {
24
+ return trimmed; // Already concise enough
25
+ }
26
+ // Git: common known patterns
27
+ if (/^Already up to date\.?$/i.test(trimmed))
28
+ return "✓ Already up to date";
29
+ if (/^nothing to commit, working tree clean$/i.test(trimmed))
30
+ return "✓ Clean working tree, nothing to commit";
31
+ if (/^On branch \S+\nnothing to commit/m.test(trimmed)) {
32
+ const branch = trimmed.match(/^On branch (\S+)/)?.[1];
33
+ return `✓ On branch ${branch}, clean working tree`;
34
+ }
35
+ if (/^Your branch is up to date/m.test(trimmed) && /nothing to commit/m.test(trimmed)) {
36
+ const branch = trimmed.match(/^On branch (\S+)/m)?.[1] ?? "?";
37
+ return `✓ Branch ${branch} up to date, clean`;
38
+ }
39
+ // Build/compile success with no errors
40
+ if (/^(tsc|bun|npm|yarn|pnpm)\s/.test(command)) {
41
+ if (lines.length <= 3 && (exitCode === 0 || exitCode === undefined) && !/error|Error|ERROR|fail|FAIL/.test(trimmed)) {
42
+ return `✓ Build succeeded${lines.length > 0 ? ` (${lines.length} lines)` : ""}`;
43
+ }
44
+ }
45
+ // npm/bun install success
46
+ if (/\binstall(ed)?\b.*\d+\s+packages?/i.test(trimmed) && !/error|Error|fail/i.test(trimmed)) {
47
+ const pkgMatch = trimmed.match(/(\d+)\s+packages?/);
48
+ return `✓ Installed ${pkgMatch?.[1] ?? "?"} packages`;
49
+ }
50
+ // Permission denied / not found — short errors pass through
51
+ if (lines.length <= 3 && /permission denied|command not found|No such file|ENOENT/i.test(trimmed)) {
52
+ return trimmed; // Already short enough, preserve error verbatim
53
+ }
54
+ // Hash-based dedup: if we've seen this exact output before, return cached summary
55
+ const hash = simpleHash(trimmed);
56
+ const cached = outputCache.get(hash);
57
+ if (cached)
58
+ return cached;
59
+ return null; // No fingerprint match — AI should handle this
60
+ }
61
+ // Simple string hash for output dedup
62
+ function simpleHash(s) {
63
+ let hash = 0;
64
+ for (let i = 0; i < s.length; i++) {
65
+ hash = ((hash << 5) - hash + s.charCodeAt(i)) | 0;
66
+ }
67
+ return hash;
68
+ }
69
+ // LRU cache for output summaries (keyed by content hash)
70
+ const OUTPUT_CACHE_MAX = 200;
71
+ const outputCache = new Map();
72
+ function cacheOutputSummary(output, summary) {
73
+ const hash = simpleHash(output.trim());
74
+ if (outputCache.size >= OUTPUT_CACHE_MAX) {
75
+ const oldest = outputCache.keys().next().value;
76
+ if (oldest !== undefined)
77
+ outputCache.delete(oldest);
78
+ }
79
+ outputCache.set(hash, summary);
80
+ }
14
81
  const SUMMARIZE_PROMPT = `You are an intelligent terminal assistant. Given a user's original question and the command output, ANSWER THE QUESTION directly.
15
82
 
16
83
  RULES:
@@ -30,6 +97,23 @@ RULES:
30
97
  */
31
98
  export async function processOutput(command, output, originalPrompt) {
32
99
  const lines = output.split("\n");
100
+ // Fingerprint check — skip AI entirely for known patterns (0ms, $0)
101
+ const fp = fingerprint(command, output);
102
+ if (fp && !originalPrompt) {
103
+ const saved = Math.max(0, estimateTokens(output) - estimateTokens(fp));
104
+ if (saved > 0)
105
+ recordSaving("compressed", saved);
106
+ return {
107
+ summary: fp,
108
+ full: output,
109
+ tokensSaved: saved,
110
+ aiTokensUsed: 0,
111
+ aiProcessed: false,
112
+ aiCostUsd: 0,
113
+ savingsValueUsd: 0,
114
+ netSavingsUsd: 0,
115
+ };
116
+ }
33
117
  // Short output — skip AI UNLESS we have an original prompt (NL mode needs answer framing)
34
118
  if (lines.length <= MIN_LINES_TO_PROCESS && !originalPrompt) {
35
119
  return {
@@ -62,8 +146,12 @@ export async function processOutput(command, output, originalPrompt) {
62
146
  // Inject tool-specific profile hints
63
147
  const profileBlock = formatProfileHints(command);
64
148
  const profileHints = profileBlock ? `\n\n${profileBlock}` : "";
65
- const provider = getProvider();
149
+ // Use output-optimized provider (Groq llama-8b: fastest + best compression)
150
+ // Falls back to main provider if Groq unavailable
151
+ const provider = getOutputProvider();
152
+ const outputModel = provider.name === "groq" ? "llama-3.1-8b-instant" : undefined;
66
153
  const summary = await provider.complete(`${originalPrompt ? `User asked: ${originalPrompt}\n` : ""}Command: ${command}\nOutput (${lines.length} lines):\n${toSummarize}${hintsBlock}${profileHints}`, {
154
+ model: outputModel,
67
155
  system: SUMMARIZE_PROMPT,
68
156
  maxTokens: 300,
69
157
  temperature: 0.2,
@@ -94,6 +182,8 @@ export async function processOutput(command, output, originalPrompt) {
94
182
  if (netSavingsUsd > 0 && saved > 0) {
95
183
  recordSaving("compressed", saved);
96
184
  }
185
+ // Cache the AI summary for future identical outputs
186
+ cacheOutputSummary(output, summary);
97
187
  return {
98
188
  summary,
99
189
  full: output,
@@ -6,6 +6,7 @@ import { GroqProvider } from "./groq.js";
6
6
  import { XaiProvider } from "./xai.js";
7
7
  export { DEFAULT_PROVIDER_CONFIG } from "./base.js";
8
8
  let _provider = null;
9
+ let _outputProvider = null;
9
10
  let _failedProviders = new Set();
10
11
  /** Get the active LLM provider. Auto-detects based on available API keys. */
11
12
  export function getProvider(config) {
@@ -18,8 +19,27 @@ export function getProvider(config) {
18
19
  /** Reset the cached provider (useful when config changes). */
19
20
  export function resetProvider() {
20
21
  _provider = null;
22
+ _outputProvider = null;
21
23
  _failedProviders.clear();
22
24
  }
25
+ /**
26
+ * Get the provider optimized for output summarization.
27
+ * Priority: Groq (fastest, 234ms avg) > Cerebras > xAI > Anthropic.
28
+ * Falls back to the main provider if Groq is unavailable.
29
+ */
30
+ export function getOutputProvider() {
31
+ if (_outputProvider)
32
+ return _outputProvider;
33
+ // Prefer Groq for output processing (fastest + best compression in evals)
34
+ const groq = new GroqProvider();
35
+ if (groq.isAvailable()) {
36
+ _outputProvider = groq;
37
+ return groq;
38
+ }
39
+ // Fall back to main provider
40
+ _outputProvider = getProvider();
41
+ return _outputProvider;
42
+ }
23
43
  /** Get a fallback-wrapped provider that tries alternatives on failure */
24
44
  export function getProviderWithFallback(config) {
25
45
  const primary = getProvider(config);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/terminal",
3
- "version": "3.1.0",
3
+ "version": "3.2.0",
4
4
  "description": "Smart terminal wrapper for AI agents and humans — structured output, token compression, MCP server, natural language",
5
5
  "type": "module",
6
6
  "files": [
package/src/cli.tsx CHANGED
@@ -459,17 +459,27 @@ else if (args.length > 0) {
459
459
  const perms = config.permissions;
460
460
  const sessionCtx = formatContext();
461
461
 
462
+ // ── Direct command detection ──
463
+ // If input looks like a shell command (starts with known binary), skip AI translation entirely.
464
+ // This saves one AI call ($0.0008) per invocation for agents that already know the command.
465
+ const KNOWN_BINARIES = /^(ls|cd|cat|head|tail|grep|rg|find|wc|du|df|git|bun|npm|pnpm|yarn|node|python3?|pip|curl|wget|ssh|scp|chmod|chown|chgrp|mkdir|rmdir|rm|cp|mv|touch|ln|tar|gzip|gunzip|zip|unzip|sed|awk|sort|uniq|cut|tr|tee|xargs|echo|printf|env|export|source|which|whereis|whatis|man|date|cal|uptime|whoami|hostname|uname|ps|top|htop|kill|killall|lsof|netstat|ss|ifconfig|ip|ping|dig|nslookup|docker|kubectl|make|cmake|cargo|go|rustc|gcc|g\+\+|clang|java|javac|mvn|gradle|npx|bunx|tsx|deno|tree|file|stat|readlink|realpath|basename|dirname|pwd|test|true|false|sleep|timeout|time|watch|diff|patch|rsync|lsblk|mount|umount|fdisk|free|vmstat|iostat|sar|strace|ltrace|gdb|lldb|sqlite3|psql|mysql|redis-cli|mongosh|jq|yq|bat|fd|exa|fzf|gh|hub|terraform|ansible|helm|k9s|lazygit|tmux|screen|nc|nmap|openssl|base64|md5|shasum|xxd|od|hexdump|strings|nm|objdump|readelf|ldd|ldconfig|pkg-config|brew|apt|yum|dnf|pacman|snap|flatpak|systemctl|service|journalctl|dmesg|crontab|at|nohup|bg|fg|jobs|disown|wait|nice|renice|ionice|chrt|taskset|ulimit|sysctl|getconf|locale|iconv|perl|ruby|php|lua|R|julia|swift|kotlin|scala|elixir|mix|rebar3|tsc|eslint|prettier|biome|ruff|black|isort|mypy|pyright|pylint|flake8|pytest|vitest|jest|mocha|ava|tap|phpunit|rspec|minitest|unittest2|nose2|coverage|nyc|c8|v8-profiler)(\s|$)/;
466
+
467
+ const isDirectCommand = KNOWN_BINARIES.test(prompt.trim()) || /^[.\/~]/.test(prompt.trim()) || /\|/.test(prompt);
468
+
462
469
  // Check usage learning cache first (zero AI cost for repeated queries)
463
470
  const learned = getLearned(prompt);
464
471
  if (learned && !offlineMode) {
465
472
  console.error(`[open-terminal] cached: $ ${learned}`);
466
473
  }
467
474
 
468
- // Step 1: AI translates NL shell command (with session context for follow-ups)
475
+ // Step 1: Determine command either direct passthrough or AI translation
469
476
  let command: string;
470
477
 
471
- if (offlineMode) {
472
- // Offline: treat prompt as literal command, apply noise filter only
478
+ if (isDirectCommand) {
479
+ // Direct command skip AI translation entirely (saves 1 AI call)
480
+ command = prompt;
481
+ } else if (offlineMode) {
482
+ // Offline: treat prompt as literal command
473
483
  console.error("[open-terminal] offline mode (no API key) — running as literal command");
474
484
  command = prompt;
475
485
  } else if (learned) {
@@ -1,7 +1,7 @@
1
1
  // AI-powered output processor — uses cheap AI to intelligently summarize any output
2
2
  // NOTHING is hardcoded. The AI decides what's important, what's noise, what to keep.
3
3
 
4
- import { getProvider } from "./providers/index.js";
4
+ import { getProvider, getOutputProvider } from "./providers/index.js";
5
5
  import { estimateTokens } from "./tokens.js";
6
6
  import { recordSaving } from "./economy.js";
7
7
  import { discoverOutputHints } from "./context-hints.js";
@@ -31,9 +31,85 @@ export interface ProcessedOutput {
31
31
  }
32
32
 
33
33
  const MIN_LINES_TO_PROCESS = 15;
34
- // Reserve ~2000 chars for system prompt + hints + profile + overhead
35
- const PROMPT_OVERHEAD_CHARS = 2000;
36
- const MAX_OUTPUT_FOR_AI = 6000; // chars of output to send to AI (leaves room for prompt overhead)
34
+ const MAX_OUTPUT_FOR_AI = 6000;
35
+
36
+ // ── Output fingerprinting skip AI for outputs we can summarize instantly ──
37
+ // These patterns match common terminal outputs that don't need AI interpretation.
38
+ // Returns a short summary string, or null if AI should handle it.
39
+
40
+ function fingerprint(command: string, output: string, exitCode?: number): string | null {
41
+ const trimmed = output.trim();
42
+ const lines = trimmed.split("\n").filter(l => l.trim());
43
+
44
+ // Empty output with success = command succeeded silently (build, lint, etc.)
45
+ if (lines.length === 0 && (exitCode === 0 || exitCode === undefined)) {
46
+ return "✓ Success (no output)";
47
+ }
48
+
49
+ // Single-line trivial outputs — pass through without AI
50
+ if (lines.length === 1 && trimmed.length < 80) {
51
+ return trimmed; // Already concise enough
52
+ }
53
+
54
+ // Git: common known patterns
55
+ if (/^Already up to date\.?$/i.test(trimmed)) return "✓ Already up to date";
56
+ if (/^nothing to commit, working tree clean$/i.test(trimmed)) return "✓ Clean working tree, nothing to commit";
57
+ if (/^On branch \S+\nnothing to commit/m.test(trimmed)) {
58
+ const branch = trimmed.match(/^On branch (\S+)/)?.[1];
59
+ return `✓ On branch ${branch}, clean working tree`;
60
+ }
61
+ if (/^Your branch is up to date/m.test(trimmed) && /nothing to commit/m.test(trimmed)) {
62
+ const branch = trimmed.match(/^On branch (\S+)/m)?.[1] ?? "?";
63
+ return `✓ Branch ${branch} up to date, clean`;
64
+ }
65
+
66
+ // Build/compile success with no errors
67
+ if (/^(tsc|bun|npm|yarn|pnpm)\s/.test(command)) {
68
+ if (lines.length <= 3 && (exitCode === 0 || exitCode === undefined) && !/error|Error|ERROR|fail|FAIL/.test(trimmed)) {
69
+ return `✓ Build succeeded${lines.length > 0 ? ` (${lines.length} lines)` : ""}`;
70
+ }
71
+ }
72
+
73
+ // npm/bun install success
74
+ if (/\binstall(ed)?\b.*\d+\s+packages?/i.test(trimmed) && !/error|Error|fail/i.test(trimmed)) {
75
+ const pkgMatch = trimmed.match(/(\d+)\s+packages?/);
76
+ return `✓ Installed ${pkgMatch?.[1] ?? "?"} packages`;
77
+ }
78
+
79
+ // Permission denied / not found — short errors pass through
80
+ if (lines.length <= 3 && /permission denied|command not found|No such file|ENOENT/i.test(trimmed)) {
81
+ return trimmed; // Already short enough, preserve error verbatim
82
+ }
83
+
84
+ // Hash-based dedup: if we've seen this exact output before, return cached summary
85
+ const hash = simpleHash(trimmed);
86
+ const cached = outputCache.get(hash);
87
+ if (cached) return cached;
88
+
89
+ return null; // No fingerprint match — AI should handle this
90
+ }
91
+
92
+ // Simple string hash for output dedup
93
+ function simpleHash(s: string): number {
94
+ let hash = 0;
95
+ for (let i = 0; i < s.length; i++) {
96
+ hash = ((hash << 5) - hash + s.charCodeAt(i)) | 0;
97
+ }
98
+ return hash;
99
+ }
100
+
101
+ // LRU cache for output summaries (keyed by content hash)
102
+ const OUTPUT_CACHE_MAX = 200;
103
+ const outputCache = new Map<number, string>();
104
+
105
+ function cacheOutputSummary(output: string, summary: string): void {
106
+ const hash = simpleHash(output.trim());
107
+ if (outputCache.size >= OUTPUT_CACHE_MAX) {
108
+ const oldest = outputCache.keys().next().value;
109
+ if (oldest !== undefined) outputCache.delete(oldest);
110
+ }
111
+ outputCache.set(hash, summary);
112
+ }
37
113
 
38
114
  const SUMMARIZE_PROMPT = `You are an intelligent terminal assistant. Given a user's original question and the command output, ANSWER THE QUESTION directly.
39
115
 
@@ -60,6 +136,23 @@ export async function processOutput(
60
136
  ): Promise<ProcessedOutput> {
61
137
  const lines = output.split("\n");
62
138
 
139
+ // Fingerprint check — skip AI entirely for known patterns (0ms, $0)
140
+ const fp = fingerprint(command, output);
141
+ if (fp && !originalPrompt) {
142
+ const saved = Math.max(0, estimateTokens(output) - estimateTokens(fp));
143
+ if (saved > 0) recordSaving("compressed", saved);
144
+ return {
145
+ summary: fp,
146
+ full: output,
147
+ tokensSaved: saved,
148
+ aiTokensUsed: 0,
149
+ aiProcessed: false,
150
+ aiCostUsd: 0,
151
+ savingsValueUsd: 0,
152
+ netSavingsUsd: 0,
153
+ };
154
+ }
155
+
63
156
  // Short output — skip AI UNLESS we have an original prompt (NL mode needs answer framing)
64
157
  if (lines.length <= MIN_LINES_TO_PROCESS && !originalPrompt) {
65
158
  return {
@@ -97,10 +190,14 @@ export async function processOutput(
97
190
  const profileBlock = formatProfileHints(command);
98
191
  const profileHints = profileBlock ? `\n\n${profileBlock}` : "";
99
192
 
100
- const provider = getProvider();
193
+ // Use output-optimized provider (Groq llama-8b: fastest + best compression)
194
+ // Falls back to main provider if Groq unavailable
195
+ const provider = getOutputProvider();
196
+ const outputModel = provider.name === "groq" ? "llama-3.1-8b-instant" : undefined;
101
197
  const summary = await provider.complete(
102
198
  `${originalPrompt ? `User asked: ${originalPrompt}\n` : ""}Command: ${command}\nOutput (${lines.length} lines):\n${toSummarize}${hintsBlock}${profileHints}`,
103
199
  {
200
+ model: outputModel,
104
201
  system: SUMMARIZE_PROMPT,
105
202
  maxTokens: 300,
106
203
  temperature: 0.2,
@@ -138,6 +235,9 @@ export async function processOutput(
138
235
  recordSaving("compressed", saved);
139
236
  }
140
237
 
238
+ // Cache the AI summary for future identical outputs
239
+ cacheOutputSummary(output, summary);
240
+
141
241
  return {
142
242
  summary,
143
243
  full: output,
@@ -11,6 +11,7 @@ export type { LLMProvider, ProviderOptions, StreamCallbacks, ProviderConfig } fr
11
11
  export { DEFAULT_PROVIDER_CONFIG } from "./base.js";
12
12
 
13
13
  let _provider: LLMProvider | null = null;
14
+ let _outputProvider: LLMProvider | null = null;
14
15
  let _failedProviders: Set<string> = new Set();
15
16
 
16
17
  /** Get the active LLM provider. Auto-detects based on available API keys. */
@@ -25,9 +26,30 @@ export function getProvider(config?: ProviderConfig): LLMProvider {
25
26
  /** Reset the cached provider (useful when config changes). */
26
27
  export function resetProvider() {
27
28
  _provider = null;
29
+ _outputProvider = null;
28
30
  _failedProviders.clear();
29
31
  }
30
32
 
33
+ /**
34
+ * Get the provider optimized for output summarization.
35
+ * Priority: Groq (fastest, 234ms avg) > Cerebras > xAI > Anthropic.
36
+ * Falls back to the main provider if Groq is unavailable.
37
+ */
38
+ export function getOutputProvider(): LLMProvider {
39
+ if (_outputProvider) return _outputProvider;
40
+
41
+ // Prefer Groq for output processing (fastest + best compression in evals)
42
+ const groq = new GroqProvider();
43
+ if (groq.isAvailable()) {
44
+ _outputProvider = groq;
45
+ return groq;
46
+ }
47
+
48
+ // Fall back to main provider
49
+ _outputProvider = getProvider();
50
+ return _outputProvider;
51
+ }
52
+
31
53
  /** Get a fallback-wrapped provider that tries alternatives on failure */
32
54
  export function getProviderWithFallback(config?: ProviderConfig): LLMProvider {
33
55
  const primary = getProvider(config);