osborn 0.1.6 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Conversation Brain - Gemini 2.5 Pro powered conversation manager
3
+ *
4
+ * This is the "smart brain" that:
5
+ * 1. Keeps conversation alive with relevant questions
6
+ * 2. Builds context until we understand what user wants
7
+ * 3. Dispatches background research agents
8
+ * 4. Receives progress updates and decides when to execute
9
+ * 5. Handles direct commands immediately
10
+ */
11
+ export interface ConversationMessage {
12
+ role: 'user' | 'assistant' | 'system';
13
+ content: string;
14
+ timestamp: Date;
15
+ }
16
+ export interface ResearchTask {
17
+ id: string;
18
+ query: string;
19
+ status: 'pending' | 'running' | 'completed' | 'failed';
20
+ result?: string;
21
+ startedAt?: Date;
22
+ completedAt?: Date;
23
+ }
24
+ export interface BrainDecision {
25
+ action: 'speak' | 'research' | 'execute' | 'clarify' | 'direct_command';
26
+ speech?: string;
27
+ researchQueries?: string[];
28
+ executeTask?: string;
29
+ directCommand?: string;
30
+ reasoning?: string;
31
+ }
32
+ export interface BrainState {
33
+ conversationHistory: ConversationMessage[];
34
+ userGoal: string | null;
35
+ userGoalConfidence: number;
36
+ pendingResearch: ResearchTask[];
37
+ completedResearch: ResearchTask[];
38
+ readyToExecute: boolean;
39
+ executionPlan: string | null;
40
+ }
41
+ export declare class ConversationBrain {
42
+ private llm;
43
+ private state;
44
+ private workingDir;
45
+ private onSpeak;
46
+ private onStateChange;
47
+ constructor(config: {
48
+ workingDir: string;
49
+ onSpeak: (text: string) => Promise<void>;
50
+ onStateChange: (state: string) => Promise<void>;
51
+ });
52
+ /**
53
+ * Process user input and decide what to do
54
+ */
55
+ processUserInput(input: string): Promise<BrainDecision>;
56
+ /**
57
+ * Receive research results from background agents
58
+ */
59
+ receiveResearchResult(taskId: string, result: string, success: boolean): void;
60
+ /**
61
+ * Check if we should provide a status update
62
+ */
63
+ shouldProvideUpdate(): boolean;
64
+ /**
65
+ * Generate a status update based on completed research
66
+ */
67
+ generateStatusUpdate(): Promise<string | null>;
68
+ /**
69
+ * Get current state for debugging/display
70
+ */
71
+ getState(): BrainState;
72
+ /**
73
+ * Reset conversation state
74
+ */
75
+ reset(): void;
76
+ private buildAnalysisPrompt;
77
+ private parseDecision;
78
+ private updateState;
79
+ /**
80
+ * Create research tasks for background agents
81
+ */
82
+ getPendingResearchTasks(): ResearchTask[];
83
+ /**
84
+ * Mark a research task as running
85
+ */
86
+ markResearchRunning(taskId: string): void;
87
+ }
88
+ export declare function createConversationBrain(config: {
89
+ workingDir: string;
90
+ onSpeak: (text: string) => Promise<void>;
91
+ onStateChange: (state: string) => Promise<void>;
92
+ }): ConversationBrain;
@@ -0,0 +1,360 @@
1
+ /**
2
+ * Conversation Brain - Gemini 2.5 Pro powered conversation manager
3
+ *
4
+ * This is the "smart brain" that:
5
+ * 1. Keeps conversation alive with relevant questions
6
+ * 2. Builds context until we understand what user wants
7
+ * 3. Dispatches background research agents
8
+ * 4. Receives progress updates and decides when to execute
9
+ * 5. Handles direct commands immediately
10
+ */
11
+ import { llm } from '@livekit/agents';
12
+ import * as google from '@livekit/agents-plugin-google';
13
+ // ============================================================
14
+ // Conversation Brain Class
15
+ // ============================================================
16
+ export class ConversationBrain {
17
+ llm;
18
+ state;
19
+ workingDir;
20
+ onSpeak;
21
+ onStateChange;
22
+ constructor(config) {
23
+ // Use Gemini 2.0 Flash for brain decisions - it's faster and less likely to conflict
24
+ // with the Gemini Realtime voice session which uses a different model
25
+ this.llm = new google.LLM({
26
+ model: 'gemini-2.0-flash',
27
+ // Set lower temperature for more consistent decisions
28
+ temperature: 0.3,
29
+ });
30
+ // Add error handler to prevent unhandled rejection crashes
31
+ this.llm.on('error', (err) => {
32
+ const errorMsg = err.error?.message || String(err);
33
+ // Only log non-abort errors
34
+ if (!errorMsg.includes('aborted') && !errorMsg.includes('AbortError')) {
35
+ console.error('🧠 [Brain LLM] Error:', errorMsg);
36
+ }
37
+ });
38
+ this.workingDir = config.workingDir;
39
+ this.onSpeak = config.onSpeak;
40
+ this.onStateChange = config.onStateChange;
41
+ this.state = {
42
+ conversationHistory: [],
43
+ userGoal: null,
44
+ userGoalConfidence: 0,
45
+ pendingResearch: [],
46
+ completedResearch: [],
47
+ readyToExecute: false,
48
+ executionPlan: null,
49
+ };
50
+ }
51
+ /**
52
+ * Process user input and decide what to do
53
+ */
54
+ async processUserInput(input) {
55
+ // Add to conversation history
56
+ this.state.conversationHistory.push({
57
+ role: 'user',
58
+ content: input,
59
+ timestamp: new Date(),
60
+ });
61
+ // Build the analysis prompt
62
+ const prompt = this.buildAnalysisPrompt(input);
63
+ let stream = null;
64
+ let streamError = null;
65
+ try {
66
+ // Call Gemini 2.5 Pro for decision
67
+ const chatCtx = new llm.ChatContext();
68
+ chatCtx.addMessage({ role: 'user', content: prompt });
69
+ let response = '';
70
+ stream = this.llm.chat({ chatCtx });
71
+ // Create a promise that wraps the stream iteration with proper error handling
72
+ const collectResponse = async () => {
73
+ let result = '';
74
+ try {
75
+ for await (const chunk of stream) {
76
+ if (chunk.delta?.content) {
77
+ result += chunk.delta.content;
78
+ }
79
+ }
80
+ }
81
+ catch (e) {
82
+ streamError = e;
83
+ throw e;
84
+ }
85
+ return result;
86
+ };
87
+ // Race the stream against a timeout to prevent hanging
88
+ const timeoutPromise = new Promise((_, reject) => {
89
+ setTimeout(() => reject(new Error('Brain timeout after 30s')), 30000);
90
+ });
91
+ response = await Promise.race([collectResponse(), timeoutPromise]);
92
+ // Parse the decision
93
+ const decision = this.parseDecision(response);
94
+ // Update state based on decision
95
+ this.updateState(decision);
96
+ // Add assistant response to history
97
+ if (decision.speech) {
98
+ this.state.conversationHistory.push({
99
+ role: 'assistant',
100
+ content: decision.speech,
101
+ timestamp: new Date(),
102
+ });
103
+ }
104
+ return decision;
105
+ }
106
+ catch (err) {
107
+ const errorMsg = err.message || String(err);
108
+ // Only log if not an abort (which is expected when user interrupts)
109
+ if (!errorMsg.includes('aborted')) {
110
+ console.error('🧠 [Brain] Stream error:', errorMsg);
111
+ }
112
+ // Close stream if it exists to prevent further errors
113
+ if (stream) {
114
+ try {
115
+ stream.close();
116
+ }
117
+ catch { }
118
+ }
119
+ return {
120
+ action: 'speak',
121
+ speech: "I'm having trouble processing that. Could you try again?",
122
+ };
123
+ }
124
+ }
125
+ /**
126
+ * Receive research results from background agents
127
+ */
128
+ receiveResearchResult(taskId, result, success) {
129
+ const task = this.state.pendingResearch.find(t => t.id === taskId);
130
+ if (task) {
131
+ task.status = success ? 'completed' : 'failed';
132
+ task.result = result;
133
+ task.completedAt = new Date();
134
+ // Move to completed
135
+ this.state.pendingResearch = this.state.pendingResearch.filter(t => t.id !== taskId);
136
+ this.state.completedResearch.push(task);
137
+ console.log(`🧠 Research completed: ${taskId.substring(0, 8)}... (${this.state.completedResearch.length} done)`);
138
+ }
139
+ }
140
+ /**
141
+ * Check if we should provide a status update
142
+ */
143
+ shouldProvideUpdate() {
144
+ // Provide update if research just completed
145
+ return this.state.completedResearch.length > 0 &&
146
+ this.state.pendingResearch.length === 0 &&
147
+ !this.state.readyToExecute;
148
+ }
149
+ /**
150
+ * Generate a status update based on completed research
151
+ */
152
+ async generateStatusUpdate() {
153
+ if (this.state.completedResearch.length === 0)
154
+ return null;
155
+ const researchSummary = this.state.completedResearch
156
+ .map(r => `- ${r.query}: ${r.result?.substring(0, 200) || 'No result'}`)
157
+ .join('\n');
158
+ const prompt = `Based on completed research, provide a brief conversational status update.
159
+
160
+ RESEARCH RESULTS:
161
+ ${researchSummary}
162
+
163
+ USER GOAL (so far): ${this.state.userGoal || 'Still understanding...'}
164
+
165
+ Generate a 1-2 sentence update that:
166
+ 1. Summarizes what you learned
167
+ 2. Either asks a clarifying question OR proposes next steps
168
+ 3. Sounds natural and conversational
169
+
170
+ Just the update text, no JSON.`;
171
+ let stream = null;
172
+ try {
173
+ const chatCtx = new llm.ChatContext();
174
+ chatCtx.addMessage({ role: 'user', content: prompt });
175
+ let response = '';
176
+ stream = this.llm.chat({ chatCtx });
177
+ for await (const chunk of stream) {
178
+ if (chunk.delta?.content) {
179
+ response += chunk.delta.content;
180
+ }
181
+ }
182
+ return response.trim();
183
+ }
184
+ catch (err) {
185
+ console.error('🧠 [Brain] Status update error:', err.message || err);
186
+ if (stream) {
187
+ try {
188
+ stream.close();
189
+ }
190
+ catch { }
191
+ }
192
+ return null;
193
+ }
194
+ }
195
+ /**
196
+ * Get current state for debugging/display
197
+ */
198
+ getState() {
199
+ return { ...this.state };
200
+ }
201
+ /**
202
+ * Reset conversation state
203
+ */
204
+ reset() {
205
+ this.state = {
206
+ conversationHistory: [],
207
+ userGoal: null,
208
+ userGoalConfidence: 0,
209
+ pendingResearch: [],
210
+ completedResearch: [],
211
+ readyToExecute: false,
212
+ executionPlan: null,
213
+ };
214
+ }
215
+ // ============================================================
216
+ // Private Methods
217
+ // ============================================================
218
+ buildAnalysisPrompt(userInput) {
219
+ const recentHistory = this.state.conversationHistory.slice(-6)
220
+ .map(m => `${m.role.toUpperCase()}: ${m.content}`)
221
+ .join('\n');
222
+ const researchContext = this.state.completedResearch.length > 0
223
+ ? `\nCOMPLETED RESEARCH:\n${this.state.completedResearch.map(r => `- ${r.query}: ${r.result?.substring(0, 300)}`).join('\n')}`
224
+ : '';
225
+ const pendingContext = this.state.pendingResearch.length > 0
226
+ ? `\nPENDING RESEARCH: ${this.state.pendingResearch.map(r => r.query).join(', ')}`
227
+ : '';
228
+ return `You are the brain of a voice AI coding assistant. Analyze this input and decide what to do.
229
+
230
+ WORKING DIRECTORY: ${this.workingDir}
231
+
232
+ CAPABILITIES:
233
+ - Full internet access (web search, fetch URLs, API calls)
234
+ - Read/write files in the working directory
235
+ - Run shell commands (npm, git, etc.)
236
+ - Search and analyze codebases
237
+
238
+ CONVERSATION HISTORY:
239
+ ${recentHistory}
240
+
241
+ CURRENT USER INPUT: "${userInput}"
242
+
243
+ CURRENT UNDERSTANDING:
244
+ - User Goal: ${this.state.userGoal || 'Unknown - still gathering context'}
245
+ - Confidence: ${Math.round(this.state.userGoalConfidence * 100)}%
246
+ - Ready to Execute: ${this.state.readyToExecute}
247
+ ${researchContext}
248
+ ${pendingContext}
249
+
250
+ DECIDE WHAT TO DO:
251
+
252
+ 1. DIRECT_COMMAND - If user gives a simple, clear command:
253
+ - "read file X" → direct_command
254
+ - "run npm test" → direct_command
255
+ - "show me the package.json" → direct_command
256
+ - "search the web for X" → direct_command
257
+ - "look up X online" → direct_command
258
+
259
+ 2. CLARIFY - If request is ambiguous, ask a specific question to understand better
260
+
261
+ 3. RESEARCH - If we need more info, start background research (2-3 queries max)
262
+ - Search codebase, read docs, explore files
263
+ - Search the web for information
264
+ - Keep conversation going while research runs
265
+
266
+ 4. EXECUTE - ONLY if we have HIGH confidence (>80%) about what user wants
267
+ - Must have clear plan
268
+ - Only ONE execution task at a time
269
+
270
+ 5. SPEAK - Just respond conversationally (greetings, status, etc.)
271
+
272
+ IMPORTANT: Keep speech SHORT and conversational. No markdown formatting.
273
+
274
+ Respond in JSON:
275
+ {
276
+ "action": "speak" | "research" | "execute" | "clarify" | "direct_command",
277
+ "speech": "What to say to user RIGHT NOW (keep it short, conversational, NO markdown)",
278
+ "researchQueries": ["query1", "query2"], // Only if action=research
279
+ "executeTask": "detailed task description", // Only if action=execute
280
+ "directCommand": "simple command to run", // Only if action=direct_command
281
+ "updatedGoal": "What we think user wants now",
282
+ "goalConfidence": 0.0-1.0,
283
+ "reasoning": "Brief explanation"
284
+ }`;
285
+ }
286
+ parseDecision(response) {
287
+ try {
288
+ // Extract JSON from response
289
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
290
+ if (!jsonMatch) {
291
+ return {
292
+ action: 'speak',
293
+ speech: response.substring(0, 200),
294
+ };
295
+ }
296
+ const parsed = JSON.parse(jsonMatch[0]);
297
+ // Update goal tracking
298
+ if (parsed.updatedGoal) {
299
+ this.state.userGoal = parsed.updatedGoal;
300
+ }
301
+ if (typeof parsed.goalConfidence === 'number') {
302
+ this.state.userGoalConfidence = parsed.goalConfidence;
303
+ }
304
+ return {
305
+ action: parsed.action || 'speak',
306
+ speech: parsed.speech,
307
+ researchQueries: parsed.researchQueries,
308
+ executeTask: parsed.executeTask,
309
+ directCommand: parsed.directCommand,
310
+ reasoning: parsed.reasoning,
311
+ };
312
+ }
313
+ catch (err) {
314
+ console.error('Failed to parse brain decision:', err);
315
+ return {
316
+ action: 'speak',
317
+ speech: "Let me think about that...",
318
+ };
319
+ }
320
+ }
321
+ updateState(decision) {
322
+ // Track research tasks
323
+ if (decision.action === 'research' && decision.researchQueries) {
324
+ for (const query of decision.researchQueries) {
325
+ this.state.pendingResearch.push({
326
+ id: `research-${Date.now()}-${Math.random().toString(36).substring(2, 6)}`,
327
+ query,
328
+ status: 'pending',
329
+ });
330
+ }
331
+ }
332
+ // Mark ready to execute
333
+ if (decision.action === 'execute') {
334
+ this.state.readyToExecute = true;
335
+ this.state.executionPlan = decision.executeTask || null;
336
+ }
337
+ }
338
+ /**
339
+ * Create research tasks for background agents
340
+ */
341
+ getPendingResearchTasks() {
342
+ return this.state.pendingResearch.filter(t => t.status === 'pending');
343
+ }
344
+ /**
345
+ * Mark a research task as running
346
+ */
347
+ markResearchRunning(taskId) {
348
+ const task = this.state.pendingResearch.find(t => t.id === taskId);
349
+ if (task) {
350
+ task.status = 'running';
351
+ task.startedAt = new Date();
352
+ }
353
+ }
354
+ }
355
+ // ============================================================
356
+ // Factory function
357
+ // ============================================================
358
+ export function createConversationBrain(config) {
359
+ return new ConversationBrain(config);
360
+ }
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Fast Brain Agent — Middle-tier intelligence for the Voice AI System
3
+ *
4
+ * A fast intermediary between the realtime voice model and the Claude SDK agent.
5
+ * Uses direct API calls for ~2 second responses.
6
+ *
7
+ * Capabilities:
8
+ * - Read/write session files (spec.md + library/)
9
+ * - Web search for quick factual lookups
10
+ * - Record user decisions and preferences into spec.md
11
+ * - Post-research: synthesize findings into spec.md
12
+ * - Escalate to ask_agent when deeper research is needed
13
+ *
14
+ * Key constraint: The fast brain NEVER calls ask_agent. The realtime model is always the router.
15
+ *
16
+ * Auth chain (tried in order):
17
+ * 1. ANTHROPIC_API_KEY env var → Anthropic SDK (Haiku)
18
+ * 2. ANTHROPIC_AUTH_TOKEN env var → Anthropic SDK (Haiku)
19
+ * 3. GOOGLE_API_KEY env var → Gemini Flash fallback
20
+ *
21
+ * Note: Claude Code OAuth (macOS Keychain) was tested but Anthropic's Messages API
22
+ * rejects OAuth tokens with 401 "OAuth authentication is currently not supported."
23
+ */
24
+ /**
25
+ * Extract useful content snippets from tool responses, truncated by tool type.
26
+ * Returns null for tools with no useful content (Write confirmations, etc.)
27
+ */
28
+ export declare function extractToolContent(toolName: string, toolInput: any, toolResponse: any): string | null;
29
+ /** A single voice conversation turn from the realtime LLM's chatCtx */
30
+ export interface ConversationTurn {
31
+ role: 'user' | 'assistant';
32
+ text: string;
33
+ }
34
+ /** No-op — history is now sourced live from agent.chatCtx, passed per-call */
35
+ export declare function clearFastBrainHistory(): void;
36
+ /**
37
+ * Ask the fast brain a question with access to session files and web search.
38
+ * Returns an answer or "NEEDS_DEEPER_RESEARCH: ..." for escalation.
39
+ *
40
+ * Auth chain: Anthropic (API key → auth token → Keychain OAuth) → Gemini Flash fallback
41
+ *
42
+ * @param researchContext - Optional snapshot of the live research log.
43
+ * ~2 second response time for most queries.
44
+ */
45
+ export declare function askHaiku(workingDir: string, sessionId: string, question: string, researchContext?: string, chatHistory?: ConversationTurn[]): Promise<string>;
46
+ /**
47
+ * Process a batch of research content chunks through the fast brain.
48
+ * Updates spec.md and library/ files incrementally during research.
49
+ *
50
+ * @param isRefinement - true for the final post-research consolidation pass (higher token budget)
51
+ */
52
+ export declare function processResearchChunk(workingDir: string, sessionId: string, task: string, contentChunks: string[], isRefinement?: boolean): Promise<{
53
+ spec: string | null;
54
+ libraryFiles: string[];
55
+ } | null>;
56
+ /**
57
+ * Augment agent SDK research results with context from spec.md.
58
+ * Passes ALL specific details through verbatim — only ADDS context annotations.
59
+ * The voice model downstream handles summarization for speech.
60
+ *
61
+ * Falls back to returning the original result if the fast brain is unavailable.
62
+ */
63
+ export declare function augmentResearchResult(workingDir: string, sessionId: string, task: string, agentResult: string): Promise<string>;
64
+ /**
65
+ * Update spec.md and library/ files after research completes.
66
+ * Reads FULL untruncated data directly from Claude Agent SDK JSONL files
67
+ * instead of receiving pre-truncated content chunks.
68
+ *
69
+ * Data sources:
70
+ * - getRecentToolResults() — last 30 full tool results (Read, Bash, WebSearch, etc.)
71
+ * - readSessionHistory() — last 50 assistant messages (agent reasoning/analysis)
72
+ * - getSubagentTranscripts() — all sub-agent findings
73
+ *
74
+ * Returns { spec, libraryFiles } or null if update failed.
75
+ */
76
+ export declare function updateSpecFromJSONL(workingDir: string, sessionId: string, task: string, researchLog: string[]): Promise<{
77
+ spec: string | null;
78
+ libraryFiles: string[];
79
+ } | null>;
80
+ /**
81
+ * Fire-and-forget: Write a user question to spec.md Open Questions > From User
82
+ * before the agent starts researching. Ensures every escalated question is tracked.
83
+ *
84
+ * Uses a simple LLM call to fuzzy-match existing questions and avoid duplicates.
85
+ * Skips if spec.md doesn't exist yet or no provider is available.
86
+ */
87
+ export declare function writeQuestionToSpec(workingDir: string, sessionId: string, question: string): Promise<void>;
88
+ /**
89
+ * Fire-and-forget: Check if substantial agent output answers any open questions in spec.md.
90
+ * Debounced (3s) to prevent flooding during rapid tool_result sequences.
91
+ *
92
+ * When a question is answered, marks it with [x] and moves the answer to Findings.
93
+ */
94
+ export declare function checkOutputAgainstQuestions(workingDir: string, sessionId: string, output: string, outputType: 'tool_result' | 'assistant_text'): Promise<void>;
95
+ /**
96
+ * Generate a natural, contextualized voice update from raw research events.
97
+ * Called by scheduleResearchBatch() instead of injecting raw events directly.
98
+ *
99
+ * Returns a natural 1-2 sentence update, or null if nothing interesting to say.
100
+ * 3-second timeout — returns null if the LLM is too slow.
101
+ */
102
+ export declare function contextualizeResearchUpdate(workingDir: string, sessionId: string, task: string, batchEvents: string[], researchLog: string[]): Promise<string | null>;
103
+ /**
104
+ * Generate a proactive conversational prompt to keep the user engaged during research.
105
+ * Called periodically (every 15s) during active research.
106
+ *
107
+ * Can ask open questions, discuss implications of findings, or give progress with depth.
108
+ * Returns null/NOTHING if nothing interesting to say.
109
+ * 3-second timeout.
110
+ */
111
+ export declare function generateProactivePrompt(workingDir: string, sessionId: string, task: string, researchLog: string[], previousPrompts: string[]): Promise<string | null>;
112
+ /**
113
+ * Generate a structured visual document (comparison table, Mermaid diagram,
114
+ * analysis, or summary) from research findings.
115
+ *
116
+ * Reads spec.md, JSONL results, and library for context.
117
+ * Writes the result to library/ and returns the filename + content.
118
+ */
119
+ export declare function generateVisualDocument(workingDir: string, sessionId: string, request: string, documentType: 'comparison' | 'diagram' | 'analysis' | 'summary'): Promise<{
120
+ fileName: string;
121
+ content: string;
122
+ } | null>;