@hasna/terminal 4.3.1 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/dist/App.js +404 -0
  2. package/dist/Browse.js +79 -0
  3. package/dist/FuzzyPicker.js +47 -0
  4. package/dist/Onboarding.js +51 -0
  5. package/dist/Spinner.js +12 -0
  6. package/dist/StatusBar.js +49 -0
  7. package/dist/ai.js +316 -0
  8. package/dist/cache.js +42 -0
  9. package/dist/cli.js +778 -0
  10. package/dist/command-rewriter.js +64 -0
  11. package/dist/command-validator.js +86 -0
  12. package/dist/compression.js +91 -0
  13. package/dist/context-hints.js +285 -0
  14. package/dist/diff-cache.js +107 -0
  15. package/dist/discover.js +212 -0
  16. package/dist/economy.js +155 -0
  17. package/dist/expand-store.js +44 -0
  18. package/dist/file-cache.js +72 -0
  19. package/dist/file-index.js +62 -0
  20. package/dist/history.js +62 -0
  21. package/dist/lazy-executor.js +54 -0
  22. package/dist/line-dedup.js +59 -0
  23. package/dist/loop-detector.js +75 -0
  24. package/dist/mcp/install.js +189 -0
  25. package/dist/mcp/server.js +56 -0
  26. package/dist/mcp/tools/batch.js +111 -0
  27. package/dist/mcp/tools/execute.js +194 -0
  28. package/dist/mcp/tools/files.js +290 -0
  29. package/dist/mcp/tools/git.js +233 -0
  30. package/dist/mcp/tools/helpers.js +63 -0
  31. package/dist/mcp/tools/memory.js +151 -0
  32. package/dist/mcp/tools/meta.js +138 -0
  33. package/dist/mcp/tools/process.js +50 -0
  34. package/dist/mcp/tools/project.js +251 -0
  35. package/dist/mcp/tools/search.js +86 -0
  36. package/dist/noise-filter.js +94 -0
  37. package/dist/output-processor.js +233 -0
  38. package/dist/output-store.js +112 -0
  39. package/dist/paths.js +28 -0
  40. package/dist/providers/anthropic.js +43 -0
  41. package/dist/providers/base.js +4 -0
  42. package/dist/providers/cerebras.js +8 -0
  43. package/dist/providers/groq.js +8 -0
  44. package/dist/providers/index.js +142 -0
  45. package/dist/providers/openai-compat.js +93 -0
  46. package/dist/providers/xai.js +8 -0
  47. package/dist/recipes/model.js +20 -0
  48. package/dist/recipes/storage.js +153 -0
  49. package/dist/search/content-search.js +70 -0
  50. package/dist/search/file-search.js +61 -0
  51. package/dist/search/filters.js +34 -0
  52. package/dist/search/index.js +5 -0
  53. package/dist/search/semantic.js +346 -0
  54. package/dist/session-boot.js +59 -0
  55. package/dist/session-context.js +55 -0
  56. package/dist/sessions-db.js +240 -0
  57. package/dist/smart-display.js +286 -0
  58. package/dist/snapshots.js +51 -0
  59. package/dist/supervisor.js +112 -0
  60. package/dist/test-watchlist.js +131 -0
  61. package/dist/tokens.js +17 -0
  62. package/dist/tool-profiles.js +130 -0
  63. package/dist/tree.js +94 -0
  64. package/dist/usage-cache.js +65 -0
  65. package/package.json +2 -1
  66. package/src/Onboarding.tsx +1 -1
  67. package/src/ai.ts +5 -4
  68. package/src/cache.ts +2 -2
  69. package/src/economy.ts +3 -3
  70. package/src/history.ts +2 -2
  71. package/src/mcp/server.ts +2 -0
  72. package/src/mcp/tools/memory.ts +4 -2
  73. package/src/output-store.ts +2 -1
  74. package/src/paths.ts +32 -0
  75. package/src/recipes/storage.ts +3 -3
  76. package/src/session-context.ts +2 -2
  77. package/src/sessions-db.ts +15 -4
  78. package/src/tool-profiles.ts +4 -3
  79. package/src/usage-cache.ts +2 -2
@@ -0,0 +1,233 @@
1
+ // AI-powered output processor — uses cheap AI to intelligently summarize any output
2
+ // NOTHING is hardcoded. The AI decides what's important, what's noise, what to keep.
3
+ import { getOutputProvider } from "./providers/index.js";
4
+ import { estimateTokens } from "./tokens.js";
5
+ import { recordSaving } from "./economy.js";
6
+ import { discoverOutputHints } from "./context-hints.js";
7
+ import { formatProfileHints } from "./tool-profiles.js";
8
+ import { stripAnsi } from "./compression.js";
9
+ import { stripNoise } from "./noise-filter.js";
10
+ const MIN_LINES_TO_PROCESS = 15;
11
+ const MAX_OUTPUT_FOR_AI = 6000;
12
+ // ── Output fingerprinting — skip AI for outputs we can summarize instantly ──
13
+ // These patterns match common terminal outputs that don't need AI interpretation.
14
+ // Returns a short summary string, or null if AI should handle it.
15
+ function fingerprint(command, output, exitCode) {
16
+ const trimmed = output.trim();
17
+ const lines = trimmed.split("\n").filter(l => l.trim());
18
+ // Empty output with success — provide context-aware confirmation
19
+ if (lines.length === 0 && (exitCode === 0 || exitCode === undefined)) {
20
+ // Write commands get a specific confirmation
21
+ if (/\btee\b|>\s*\S|>>|cat\s*<<|echo\s.*>|sed\s+-i|cp\b|mv\b|mkdir\b|touch\b/.test(command)) {
22
+ return "✓ Write succeeded (no output)";
23
+ }
24
+ return "✓ Success (no output)";
25
+ }
26
+ // Single-line trivial outputs — pass through without AI
27
+ if (lines.length === 1 && trimmed.length < 80) {
28
+ return trimmed; // Already concise enough
29
+ }
30
+ // Git: common known patterns
31
+ if (/^Already up to date\.?$/i.test(trimmed))
32
+ return "✓ Already up to date";
33
+ if (/^nothing to commit, working tree clean$/i.test(trimmed))
34
+ return "✓ Clean working tree, nothing to commit";
35
+ if (/^On branch \S+\nnothing to commit/m.test(trimmed)) {
36
+ const branch = trimmed.match(/^On branch (\S+)/)?.[1];
37
+ return `✓ On branch ${branch}, clean working tree`;
38
+ }
39
+ if (/^Your branch is up to date/m.test(trimmed) && /nothing to commit/m.test(trimmed)) {
40
+ const branch = trimmed.match(/^On branch (\S+)/m)?.[1] ?? "?";
41
+ return `✓ Branch ${branch} up to date, clean`;
42
+ }
43
+ // Build/compile success with no errors
44
+ if (/^(tsc|bun|npm|yarn|pnpm)\s/.test(command)) {
45
+ if (lines.length <= 3 && (exitCode === 0 || exitCode === undefined) && !/error|Error|ERROR|fail|FAIL/.test(trimmed)) {
46
+ return `✓ Build succeeded${lines.length > 0 ? ` (${lines.length} lines)` : ""}`;
47
+ }
48
+ }
49
+ // npm/bun install success
50
+ if (/\binstall(ed)?\b.*\d+\s+packages?/i.test(trimmed) && !/error|Error|fail/i.test(trimmed)) {
51
+ const pkgMatch = trimmed.match(/(\d+)\s+packages?/);
52
+ return `✓ Installed ${pkgMatch?.[1] ?? "?"} packages`;
53
+ }
54
+ // Permission denied / not found — short errors pass through
55
+ if (lines.length <= 3 && /permission denied|command not found|No such file|ENOENT/i.test(trimmed)) {
56
+ return trimmed; // Already short enough, preserve error verbatim
57
+ }
58
+ // Hash-based dedup: if we've seen this exact output before, return cached summary
59
+ const hash = simpleHash(trimmed);
60
+ const cached = outputCache.get(hash);
61
+ if (cached)
62
+ return cached;
63
+ return null; // No fingerprint match — AI should handle this
64
+ }
65
+ // Simple string hash for output dedup
66
+ function simpleHash(s) {
67
+ let hash = 0;
68
+ for (let i = 0; i < s.length; i++) {
69
+ hash = ((hash << 5) - hash + s.charCodeAt(i)) | 0;
70
+ }
71
+ return hash;
72
+ }
73
+ // LRU cache for output summaries (keyed by content hash)
74
+ const OUTPUT_CACHE_MAX = 200;
75
+ const outputCache = new Map();
76
+ function cacheOutputSummary(output, summary) {
77
+ const hash = simpleHash(output.trim());
78
+ if (outputCache.size >= OUTPUT_CACHE_MAX) {
79
+ const oldest = outputCache.keys().next().value;
80
+ if (oldest !== undefined)
81
+ outputCache.delete(oldest);
82
+ }
83
+ outputCache.set(hash, summary);
84
+ }
85
+ const SUMMARIZE_PROMPT = `You are an intelligent terminal assistant. Given a user's original question and the command output, ANSWER THE QUESTION directly.
86
+
87
+ RULES:
88
+ - If the user asked a YES/NO question, start with Yes or No, then explain briefly
89
+ - If the user asked "how many", give the number first, then context
90
+ - If the user asked "show me X", show only X, not everything
91
+ - ANSWER the question using the data — don't just summarize the raw output
92
+ - Use symbols: ✓ for success/yes, ✗ for failure/no, ⚠ for warnings
93
+ - Maximum 8 lines
94
+ - Keep errors/failures verbatim
95
+ - Be direct and concise — the user wants an ANSWER, not a data dump
96
+ - For TEST OUTPUT: look for "X pass" and "X fail" lines. These are DEFINITIVE. If you see "42 pass, 0 fail" in the output, the answer is "42 tests pass, 0 fail." NEVER say "no tests found" or "incomplete" when pass/fail counts are visible.
97
+ - For BUILD OUTPUT: if tsc/build exits 0 with no output, it SUCCEEDED. Empty output = success.
98
+ - For GREP/SEARCH OUTPUT (file:line:match format): List ALL matches grouped by file. NEVER summarize into one sentence. Format: "N matches in M files:" then list each match. The agent needs every match, not a prose interpretation.
99
+ - For FILE LISTINGS (ls, find): show count + key entries. "23 files: src/ai.ts, src/cli.tsx, ..."
100
+ - For GIT LOG/DIFF: preserve commit hashes, file names, and +/- line counts.`;
101
+ /**
102
+ * Process command output through AI summarization.
103
+ * Cheap AI call (~100 tokens) saves 1000+ tokens downstream.
104
+ */
105
+ export async function processOutput(command, output, originalPrompt, verbosity) {
106
+ const lines = output.split("\n");
107
+ // Fingerprint check — skip AI entirely for known patterns (0ms, $0)
108
+ const fp = fingerprint(command, output);
109
+ if (fp && !originalPrompt) {
110
+ const saved = Math.max(0, estimateTokens(output) - estimateTokens(fp));
111
+ if (saved > 0)
112
+ recordSaving("compressed", saved);
113
+ return {
114
+ summary: fp,
115
+ full: output,
116
+ tokensSaved: saved,
117
+ aiTokensUsed: 0,
118
+ aiProcessed: false,
119
+ aiCostUsd: 0,
120
+ savingsValueUsd: 0,
121
+ netSavingsUsd: 0,
122
+ };
123
+ }
124
+ // Short output — skip AI UNLESS we have an original prompt (NL mode needs answer framing)
125
+ if (lines.length <= MIN_LINES_TO_PROCESS && !originalPrompt) {
126
+ return {
127
+ summary: output,
128
+ full: output,
129
+ tokensSaved: 0,
130
+ aiTokensUsed: 0,
131
+ aiProcessed: false,
132
+ aiCostUsd: 0,
133
+ savingsValueUsd: 0,
134
+ netSavingsUsd: 0,
135
+ };
136
+ }
137
+ // Clean output before AI processing — strip ANSI codes and noise
138
+ let toSummarize = stripAnsi(output);
139
+ toSummarize = stripNoise(toSummarize).cleaned;
140
+ if (toSummarize.length > MAX_OUTPUT_FOR_AI) {
141
+ const headChars = Math.floor(MAX_OUTPUT_FOR_AI * 0.6);
142
+ const tailChars = Math.floor(MAX_OUTPUT_FOR_AI * 0.3);
143
+ toSummarize = output.slice(0, headChars) +
144
+ `\n\n... (${lines.length} total lines, middle truncated) ...\n\n` +
145
+ output.slice(-tailChars);
146
+ }
147
+ try {
148
+ // Discover output hints — regex discovers patterns, AI decides what matters
149
+ const outputHints = discoverOutputHints(output, command);
150
+ const hintsBlock = outputHints.length > 0
151
+ ? `\n\nOUTPUT OBSERVATIONS:\n${outputHints.join("\n")}`
152
+ : "";
153
+ // Inject tool-specific profile hints
154
+ const profileBlock = formatProfileHints(command);
155
+ const profileHints = profileBlock ? `\n\n${profileBlock}` : "";
156
+ // Use output-optimized provider (Groq llama-8b: fastest + best compression)
157
+ // Falls back to main provider if Groq unavailable
158
+ const provider = getOutputProvider();
159
+ const outputModel = provider.name === "groq" ? "llama-3.1-8b-instant" : undefined;
160
+ const verbosityHint = verbosity === "minimal" ? "\nBe ULTRA concise — 1-2 lines max. Status + key number only."
161
+ : verbosity === "detailed" ? "\nBe thorough — include all relevant details, up to 15 lines."
162
+ : ""; // normal = default 8 lines from SUMMARIZE_PROMPT
163
+ const maxTok = verbosity === "minimal" ? 100 : verbosity === "detailed" ? 500 : 300;
164
+ const summary = await provider.complete(`${originalPrompt ? `User asked: ${originalPrompt}\n` : ""}Command: ${command}\nOutput (${lines.length} lines):\n${toSummarize}${hintsBlock}${profileHints}`, {
165
+ model: outputModel,
166
+ system: SUMMARIZE_PROMPT + verbosityHint,
167
+ maxTokens: maxTok,
168
+ temperature: 0.2,
169
+ });
170
+ const originalTokens = estimateTokens(output);
171
+ const summaryTokens = estimateTokens(summary);
172
+ const saved = Math.max(0, originalTokens - summaryTokens);
173
+ // Try to extract structured JSON if the AI returned it
174
+ let structured;
175
+ try {
176
+ const jsonMatch = summary.match(/\{[\s\S]*\}/);
177
+ if (jsonMatch) {
178
+ structured = JSON.parse(jsonMatch[0]);
179
+ }
180
+ }
181
+ catch { /* not JSON, that's fine */ }
182
+ // Cost calculation
183
+ // AI input: system prompt (~200 tokens) + command + output sent to AI
184
+ const aiInputTokens = estimateTokens(SUMMARIZE_PROMPT) + estimateTokens(toSummarize) + 20;
185
+ const aiOutputTokens = summaryTokens;
186
+ const aiTokensUsed = aiInputTokens + aiOutputTokens;
187
+ // Cerebras qwen-3-235b pricing: $0.60/M input, $1.20/M output
188
+ const aiCostUsd = (aiInputTokens * 0.60 + aiOutputTokens * 1.20) / 1_000_000;
189
+ // Value of tokens saved (at Claude Sonnet $3/M input — what the agent would pay)
190
+ const savingsValueUsd = (saved * 3.0) / 1_000_000;
191
+ const netSavingsUsd = savingsValueUsd - aiCostUsd;
192
+ // Only record savings if net positive (AI cost < token savings value)
193
+ if (netSavingsUsd > 0 && saved > 0) {
194
+ recordSaving("compressed", saved);
195
+ }
196
+ // Cache the AI summary for future identical outputs
197
+ cacheOutputSummary(output, summary);
198
+ return {
199
+ summary,
200
+ full: output,
201
+ structured,
202
+ tokensSaved: saved,
203
+ aiTokensUsed,
204
+ aiProcessed: true,
205
+ aiCostUsd,
206
+ savingsValueUsd,
207
+ netSavingsUsd,
208
+ };
209
+ }
210
+ catch {
211
+ // AI unavailable — fall back to simple truncation
212
+ const head = lines.slice(0, 5).join("\n");
213
+ const tail = lines.slice(-5).join("\n");
214
+ const fallback = `${head}\n ... (${lines.length - 10} lines hidden) ...\n${tail}`;
215
+ return {
216
+ summary: fallback,
217
+ full: output,
218
+ tokensSaved: Math.max(0, estimateTokens(output) - estimateTokens(fallback)),
219
+ aiTokensUsed: 0,
220
+ aiProcessed: false,
221
+ aiCostUsd: 0,
222
+ savingsValueUsd: 0,
223
+ netSavingsUsd: 0,
224
+ };
225
+ }
226
+ }
227
+ /**
228
+ * Lightweight version — just decides IF output should be processed.
229
+ * Returns true if the output would benefit from AI summarization.
230
+ */
231
+ export function shouldProcess(output) {
232
+ return output.split("\n").length > MIN_LINES_TO_PROCESS;
233
+ }
@@ -0,0 +1,112 @@
1
+ // Output store — saves full raw output to disk when AI compresses it
2
+ // Agents can read the file for full detail. Tiered retention strategy.
3
+ import { existsSync, mkdirSync, writeFileSync, readdirSync, statSync, unlinkSync } from "fs";
4
+ import { join } from "path";
5
+ import { createHash } from "crypto";
6
+ import { getTerminalDir } from "./paths.js";
7
+ const OUTPUTS_DIR = join(getTerminalDir(), "outputs");
8
+ /** Ensure outputs directory exists */
9
+ function ensureDir() {
10
+ if (!existsSync(OUTPUTS_DIR))
11
+ mkdirSync(OUTPUTS_DIR, { recursive: true });
12
+ }
13
+ /** Generate a short hash for an output */
14
+ function hashOutput(command, output) {
15
+ return createHash("md5").update(command + output.slice(0, 1000)).digest("hex").slice(0, 12);
16
+ }
17
+ /** Tiered retention: recent = keep all, older = keep only high-value */
18
+ function rotate() {
19
+ try {
20
+ const now = Date.now();
21
+ const ONE_HOUR = 60 * 60 * 1000;
22
+ const ONE_DAY = 24 * ONE_HOUR;
23
+ const files = readdirSync(OUTPUTS_DIR)
24
+ .filter(f => f.endsWith(".txt"))
25
+ .map(f => {
26
+ const path = join(OUTPUTS_DIR, f);
27
+ const stat = statSync(path);
28
+ return { name: f, path, mtime: stat.mtimeMs, size: stat.size };
29
+ })
30
+ .sort((a, b) => b.mtime - a.mtime); // newest first
31
+ for (const file of files) {
32
+ const age = now - file.mtime;
33
+ // Last 1 hour: keep everything
34
+ if (age < ONE_HOUR)
35
+ continue;
36
+ // Last 24 hours: keep outputs >2KB (meaningful compression)
37
+ if (age < ONE_DAY) {
38
+ if (file.size < 2000) {
39
+ try {
40
+ unlinkSync(file.path);
41
+ }
42
+ catch { }
43
+ }
44
+ continue;
45
+ }
46
+ // Older than 24h: keep only >10KB (high-value saves)
47
+ if (file.size < 10000) {
48
+ try {
49
+ unlinkSync(file.path);
50
+ }
51
+ catch { }
52
+ continue;
53
+ }
54
+ // Older than 7 days: remove everything
55
+ if (age > 7 * ONE_DAY) {
56
+ try {
57
+ unlinkSync(file.path);
58
+ }
59
+ catch { }
60
+ }
61
+ }
62
+ // Hard cap: never exceed 100 files or 10MB total
63
+ const remaining = readdirSync(OUTPUTS_DIR)
64
+ .filter(f => f.endsWith(".txt"))
65
+ .map(f => ({ path: join(OUTPUTS_DIR, f), mtime: statSync(join(OUTPUTS_DIR, f)).mtimeMs, size: statSync(join(OUTPUTS_DIR, f)).size }))
66
+ .sort((a, b) => b.mtime - a.mtime);
67
+ let totalSize = 0;
68
+ for (let i = 0; i < remaining.length; i++) {
69
+ totalSize += remaining[i].size;
70
+ if (i >= 100 || totalSize > 10 * 1024 * 1024) {
71
+ try {
72
+ unlinkSync(remaining[i].path);
73
+ }
74
+ catch { }
75
+ }
76
+ }
77
+ }
78
+ catch { }
79
+ }
80
+ /** Save full output to disk, return the file path */
81
+ export function saveOutput(command, rawOutput) {
82
+ ensureDir();
83
+ const hash = hashOutput(command, rawOutput);
84
+ const filename = `${hash}.txt`;
85
+ const filepath = join(OUTPUTS_DIR, filename);
86
+ const content = `$ ${command}\n${"─".repeat(60)}\n${rawOutput}`;
87
+ writeFileSync(filepath, content, "utf8");
88
+ rotate();
89
+ return filepath;
90
+ }
91
+ /** Format the hint line that tells agents where to find full output */
92
+ export function formatOutputHint(filepath) {
93
+ return `[full output: ${filepath}]`;
94
+ }
95
+ /** Get the outputs directory path */
96
+ export function getOutputsDir() {
97
+ return OUTPUTS_DIR;
98
+ }
99
+ /** Manually purge all outputs */
100
+ export function purgeOutputs() {
101
+ if (!existsSync(OUTPUTS_DIR))
102
+ return 0;
103
+ let count = 0;
104
+ for (const f of readdirSync(OUTPUTS_DIR)) {
105
+ try {
106
+ unlinkSync(join(OUTPUTS_DIR, f));
107
+ count++;
108
+ }
109
+ catch { }
110
+ }
111
+ return count;
112
+ }
package/dist/paths.js ADDED
@@ -0,0 +1,28 @@
1
+ // Centralized path resolution for open-terminal global data directory.
2
+ // Migrated from ~/.terminal/ to ~/.hasna/terminal/ with backward compat.
3
+ import { existsSync, mkdirSync } from "fs";
4
+ import { homedir } from "os";
5
+ import { join } from "path";
6
+ /**
7
+ * Get the global terminal data directory.
8
+ * New default: ~/.hasna/terminal/
9
+ * Legacy fallback: ~/.terminal/ (if it exists and new dir doesn't)
10
+ * Env override: HASNA_TERMINAL_DIR or TERMINAL_DIR
11
+ */
12
+ export function getTerminalDir() {
13
+ if (process.env.HASNA_TERMINAL_DIR)
14
+ return process.env.HASNA_TERMINAL_DIR;
15
+ if (process.env.TERMINAL_DIR)
16
+ return process.env.TERMINAL_DIR;
17
+ const home = homedir();
18
+ const newDir = join(home, ".hasna", "terminal");
19
+ const legacyDir = join(home, ".terminal");
20
+ // Use legacy dir if it exists and new one doesn't yet (backward compat)
21
+ if (!existsSync(newDir) && existsSync(legacyDir)) {
22
+ return legacyDir;
23
+ }
24
+ if (!existsSync(newDir)) {
25
+ mkdirSync(newDir, { recursive: true });
26
+ }
27
+ return newDir;
28
+ }
@@ -0,0 +1,43 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+ export class AnthropicProvider {
3
+ name = "anthropic";
4
+ client;
5
+ constructor() {
6
+ this.client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
7
+ }
8
+ isAvailable() {
9
+ return !!process.env.ANTHROPIC_API_KEY;
10
+ }
11
+ async complete(prompt, options) {
12
+ const message = await this.client.messages.create({
13
+ model: options.model ?? "claude-haiku-4-5-20251001",
14
+ max_tokens: options.maxTokens ?? 256,
15
+ temperature: options.temperature ?? 0,
16
+ ...(options.stop ? { stop_sequences: options.stop } : {}),
17
+ system: [{ type: "text", text: options.system, cache_control: { type: "ephemeral" } }],
18
+ messages: [{ role: "user", content: prompt }],
19
+ });
20
+ const block = message.content[0];
21
+ if (block.type !== "text")
22
+ throw new Error("Unexpected response type");
23
+ return block.text.trim();
24
+ }
25
+ async stream(prompt, options, callbacks) {
26
+ let result = "";
27
+ const stream = await this.client.messages.stream({
28
+ model: options.model ?? "claude-haiku-4-5-20251001",
29
+ max_tokens: options.maxTokens ?? 256,
30
+ temperature: options.temperature ?? 0,
31
+ ...(options.stop ? { stop_sequences: options.stop } : {}),
32
+ system: [{ type: "text", text: options.system, cache_control: { type: "ephemeral" } }],
33
+ messages: [{ role: "user", content: prompt }],
34
+ });
35
+ for await (const chunk of stream) {
36
+ if (chunk.type === "content_block_delta" && chunk.delta.type === "text_delta") {
37
+ result += chunk.delta.text;
38
+ callbacks.onToken(result.trim());
39
+ }
40
+ }
41
+ return result.trim();
42
+ }
43
+ }
@@ -0,0 +1,4 @@
1
+ // Provider interface for LLM backends (Anthropic, Cerebras, etc.)
2
+ export const DEFAULT_PROVIDER_CONFIG = {
3
+ provider: "auto",
4
+ };
@@ -0,0 +1,8 @@
1
+ // Cerebras provider — fast inference on Qwen/Llama models
2
+ import { OpenAICompatibleProvider } from "./openai-compat.js";
3
+ export class CerebrasProvider extends OpenAICompatibleProvider {
4
+ name = "cerebras";
5
+ baseUrl = "https://api.cerebras.ai/v1";
6
+ defaultModel = "qwen-3-235b-a22b-instruct-2507";
7
+ apiKeyEnvVar = "CEREBRAS_API_KEY";
8
+ }
@@ -0,0 +1,8 @@
1
+ // Groq provider — ultra-fast inference
2
+ import { OpenAICompatibleProvider } from "./openai-compat.js";
3
+ export class GroqProvider extends OpenAICompatibleProvider {
4
+ name = "groq";
5
+ baseUrl = "https://api.groq.com/openai/v1";
6
+ defaultModel = "openai/gpt-oss-120b";
7
+ apiKeyEnvVar = "GROQ_API_KEY";
8
+ }
@@ -0,0 +1,142 @@
1
+ // Provider auto-detection and management — with fallback on failure
2
+ import { DEFAULT_PROVIDER_CONFIG } from "./base.js";
3
+ import { AnthropicProvider } from "./anthropic.js";
4
+ import { CerebrasProvider } from "./cerebras.js";
5
+ import { GroqProvider } from "./groq.js";
6
+ import { XaiProvider } from "./xai.js";
7
+ export { DEFAULT_PROVIDER_CONFIG } from "./base.js";
8
+ let _provider = null;
9
+ let _outputProvider = null;
10
+ let _failedProviders = new Set();
11
+ /** Get the active LLM provider. Auto-detects based on available API keys. */
12
+ export function getProvider(config) {
13
+ if (_provider && !_failedProviders.has(_provider.name))
14
+ return _provider;
15
+ const cfg = config ?? DEFAULT_PROVIDER_CONFIG;
16
+ _provider = resolveProvider(cfg);
17
+ return _provider;
18
+ }
19
+ /** Reset the cached provider (useful when config changes). */
20
+ export function resetProvider() {
21
+ _provider = null;
22
+ _outputProvider = null;
23
+ _failedProviders.clear();
24
+ }
25
+ /**
26
+ * Get the provider optimized for output summarization.
27
+ * Priority: Groq (fastest, 234ms avg) > Cerebras > xAI > Anthropic.
28
+ * Falls back to the main provider if Groq is unavailable.
29
+ */
30
+ export function getOutputProvider() {
31
+ if (_outputProvider)
32
+ return _outputProvider;
33
+ // Prefer Groq for output processing (fastest + best compression in evals)
34
+ const groq = new GroqProvider();
35
+ if (groq.isAvailable()) {
36
+ _outputProvider = groq;
37
+ return groq;
38
+ }
39
+ // Fall back to main provider
40
+ _outputProvider = getProvider();
41
+ return _outputProvider;
42
+ }
43
+ /** Get a fallback-wrapped provider that tries alternatives on failure */
44
+ export function getProviderWithFallback(config) {
45
+ const primary = getProvider(config);
46
+ return new FallbackProvider(primary);
47
+ }
48
+ function resolveProvider(config) {
49
+ if (config.provider !== "auto") {
50
+ const providers = {
51
+ cerebras: () => new CerebrasProvider(),
52
+ anthropic: () => new AnthropicProvider(),
53
+ groq: () => new GroqProvider(),
54
+ xai: () => new XaiProvider(),
55
+ };
56
+ const factory = providers[config.provider];
57
+ if (factory) {
58
+ const p = factory();
59
+ if (!p.isAvailable())
60
+ throw new Error(`${config.provider.toUpperCase()}_API_KEY not set`);
61
+ return p;
62
+ }
63
+ }
64
+ // auto: prefer Cerebras, then xAI, then Groq, then Anthropic — skip failed
65
+ const candidates = [
66
+ new CerebrasProvider(),
67
+ new XaiProvider(),
68
+ new GroqProvider(),
69
+ new AnthropicProvider(),
70
+ ];
71
+ for (const p of candidates) {
72
+ if (p.isAvailable() && !_failedProviders.has(p.name))
73
+ return p;
74
+ }
75
+ // If all failed, clear failures and try again
76
+ if (_failedProviders.size > 0) {
77
+ _failedProviders.clear();
78
+ for (const p of candidates) {
79
+ if (p.isAvailable())
80
+ return p;
81
+ }
82
+ }
83
+ throw new Error("No API key found. Set one of:\n" +
84
+ " export CEREBRAS_API_KEY=your-key (free, open-source)\n" +
85
+ " export GROQ_API_KEY=your-key (free, fast)\n" +
86
+ " export XAI_API_KEY=your-key (Grok, code-optimized)\n" +
87
+ " export ANTHROPIC_API_KEY=your-key (Claude)");
88
+ }
89
+ /** Provider wrapper that falls back to alternatives on API errors */
90
+ class FallbackProvider {
91
+ name;
92
+ primary;
93
+ constructor(primary) {
94
+ this.primary = primary;
95
+ this.name = primary.name;
96
+ }
97
+ isAvailable() {
98
+ return this.primary.isAvailable();
99
+ }
100
+ async complete(prompt, options) {
101
+ try {
102
+ return await this.primary.complete(prompt, options);
103
+ }
104
+ catch (err) {
105
+ const fallback = this.getFallback();
106
+ if (fallback)
107
+ return fallback.complete(prompt, options);
108
+ throw err;
109
+ }
110
+ }
111
+ async stream(prompt, options, callbacks) {
112
+ try {
113
+ return await this.primary.stream(prompt, options, callbacks);
114
+ }
115
+ catch (err) {
116
+ const fallback = this.getFallback();
117
+ if (fallback)
118
+ return fallback.complete(prompt, options); // fallback doesn't stream
119
+ throw err;
120
+ }
121
+ }
122
+ getFallback() {
123
+ _failedProviders.add(this.primary.name);
124
+ _provider = null; // force re-resolve
125
+ try {
126
+ const next = getProvider();
127
+ if (next.name !== this.primary.name)
128
+ return next;
129
+ }
130
+ catch { }
131
+ return null;
132
+ }
133
+ }
134
+ /** List available providers (for onboarding UI). */
135
+ export function availableProviders() {
136
+ return [
137
+ { name: "cerebras", available: new CerebrasProvider().isAvailable() },
138
+ { name: "groq", available: new GroqProvider().isAvailable() },
139
+ { name: "xai", available: new XaiProvider().isAvailable() },
140
+ { name: "anthropic", available: new AnthropicProvider().isAvailable() },
141
+ ];
142
+ }
@@ -0,0 +1,93 @@
1
+ // Shared base class for OpenAI-compatible providers (Cerebras, Groq, xAI)
2
+ // Eliminates ~200 lines of duplicated streaming SSE parsing
3
+ export class OpenAICompatibleProvider {
4
+ get apiKey() {
5
+ return process.env[this.apiKeyEnvVar] ?? "";
6
+ }
7
+ isAvailable() {
8
+ return !!process.env[this.apiKeyEnvVar];
9
+ }
10
+ async complete(prompt, options) {
11
+ const res = await fetch(`${this.baseUrl}/chat/completions`, {
12
+ method: "POST",
13
+ headers: {
14
+ "Content-Type": "application/json",
15
+ Authorization: `Bearer ${this.apiKey}`,
16
+ },
17
+ body: JSON.stringify({
18
+ model: options.model ?? this.defaultModel,
19
+ max_tokens: options.maxTokens ?? 256,
20
+ temperature: options.temperature ?? 0,
21
+ ...(options.stop ? { stop: options.stop } : {}),
22
+ messages: [
23
+ { role: "system", content: options.system },
24
+ { role: "user", content: prompt },
25
+ ],
26
+ }),
27
+ });
28
+ if (!res.ok) {
29
+ const text = await res.text();
30
+ throw new Error(`${this.name} API error ${res.status}: ${text}`);
31
+ }
32
+ const json = (await res.json());
33
+ return (json.choices?.[0]?.message?.content ?? "").trim();
34
+ }
35
+ async stream(prompt, options, callbacks) {
36
+ const res = await fetch(`${this.baseUrl}/chat/completions`, {
37
+ method: "POST",
38
+ headers: {
39
+ "Content-Type": "application/json",
40
+ Authorization: `Bearer ${this.apiKey}`,
41
+ },
42
+ body: JSON.stringify({
43
+ model: options.model ?? this.defaultModel,
44
+ max_tokens: options.maxTokens ?? 256,
45
+ temperature: options.temperature ?? 0,
46
+ stream: true,
47
+ ...(options.stop ? { stop: options.stop } : {}),
48
+ messages: [
49
+ { role: "system", content: options.system },
50
+ { role: "user", content: prompt },
51
+ ],
52
+ }),
53
+ });
54
+ if (!res.ok) {
55
+ const text = await res.text();
56
+ throw new Error(`${this.name} API error ${res.status}: ${text}`);
57
+ }
58
+ let result = "";
59
+ const reader = res.body?.getReader();
60
+ if (!reader)
61
+ throw new Error("No response body");
62
+ const decoder = new TextDecoder();
63
+ let buffer = "";
64
+ while (true) {
65
+ const { done, value } = await reader.read();
66
+ if (done)
67
+ break;
68
+ buffer += decoder.decode(value, { stream: true });
69
+ const lines = buffer.split("\n");
70
+ buffer = lines.pop() ?? "";
71
+ for (const line of lines) {
72
+ const trimmed = line.trim();
73
+ if (!trimmed.startsWith("data: "))
74
+ continue;
75
+ const data = trimmed.slice(6);
76
+ if (data === "[DONE]")
77
+ break;
78
+ try {
79
+ const parsed = JSON.parse(data);
80
+ const delta = parsed.choices?.[0]?.delta?.content;
81
+ if (delta) {
82
+ result += delta;
83
+ callbacks.onToken(result.trim());
84
+ }
85
+ }
86
+ catch {
87
+ // skip malformed chunks
88
+ }
89
+ }
90
+ }
91
+ return result.trim();
92
+ }
93
+ }