osborn 0.8.11 → 0.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
- package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
- package/.claude/skills/playwright-browser/SKILL.md +90 -0
- package/.claude/skills/shadcn/SKILL.md +232 -0
- package/.claude/skills/shadcn/image.png +0 -0
- package/.claude/skills/youtube-transcript/SKILL.md +24 -0
- package/Dockerfile.sandbox +59 -0
- package/dist/claude-auth.js +51 -13
- package/dist/claude-llm.js +42 -8
- package/dist/codex-llm.js +1 -1
- package/dist/conversation-brain.d.ts +92 -0
- package/dist/conversation-brain.js +360 -0
- package/dist/fast-llm.d.ts +15 -0
- package/dist/fast-llm.js +81 -0
- package/dist/index.js +56 -10
- package/dist/pipeline-direct-llm.js +1 -1
- package/dist/prompts.js +25 -312
- package/package.json +1 -1
package/dist/claude-llm.js
CHANGED
|
@@ -12,7 +12,11 @@ import { EventEmitter } from 'events';
|
|
|
12
12
|
import { saveSessionMetadata, getSessionWorkspace } from './config.js';
|
|
13
13
|
import { getResearchSystemPrompt, getDirectModeResearchPrompt } from './prompts.js';
|
|
14
14
|
import { existsSync, readdirSync, readFileSync } from 'node:fs';
|
|
15
|
-
import { join } from 'node:path';
|
|
15
|
+
import { join, dirname } from 'node:path';
|
|
16
|
+
import { fileURLToPath } from 'node:url';
|
|
17
|
+
// Directory of this module — used to locate co-located prompt files (e.g., turn-shape reminder).
|
|
18
|
+
const __claudeLlmDir = dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
const TURN_SHAPE_REMINDER_PATH = join(__claudeLlmDir, 'prompts', 'turn-shape-reminder.md');
|
|
16
20
|
/**
|
|
17
21
|
* Strip markdown formatting for TTS (text-to-speech)
|
|
18
22
|
* Removes **bold**, ##headers, ```code```, etc. so TTS doesn't read them literally
|
|
@@ -617,7 +621,7 @@ export class ClaudeLLM extends llm.LLM {
|
|
|
617
621
|
callbacks.eventEmitter.emit('assistant_text', { text: block.text });
|
|
618
622
|
const ttsChunk = stripMarkdownForTTS(block.text);
|
|
619
623
|
if (ttsChunk.trim()) {
|
|
620
|
-
console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk
|
|
624
|
+
console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk}"`);
|
|
621
625
|
callbacks.eventEmitter.emit('tts_say', { text: ttsChunk });
|
|
622
626
|
}
|
|
623
627
|
}
|
|
@@ -732,7 +736,7 @@ class ClaudeLLMStream extends llm.LLMStream {
|
|
|
732
736
|
});
|
|
733
737
|
return;
|
|
734
738
|
}
|
|
735
|
-
console.log(`🎤 User
|
|
739
|
+
console.log(`🎤 User (${userText.length} chars): "${userText}"`);
|
|
736
740
|
// Build Claude Agent SDK options
|
|
737
741
|
const resumeSessionId = this.#opts.resumeSessionId;
|
|
738
742
|
const continueSession = this.#opts.continueSession;
|
|
@@ -746,7 +750,8 @@ class ClaudeLLMStream extends llm.LLMStream {
|
|
|
746
750
|
cwd: this.#opts.workingDirectory,
|
|
747
751
|
permissionMode: this.#opts.permissionMode,
|
|
748
752
|
allowedTools,
|
|
749
|
-
model: this.#opts.model || '
|
|
753
|
+
model: this.#opts.model || 'haiku', // haiku for speed with limited tools, sonnet for full research capabilities (including tool use trace in response)
|
|
754
|
+
// model: this.#opts.model || 'claude-sonnet-4-6', // Sonnet orchestrator with named sub-agents (Haiku tested but ignored delegation rules)
|
|
750
755
|
enableFileCheckpointing: true,
|
|
751
756
|
extraArgs: { 'replay-user-messages': null },
|
|
752
757
|
...(this.#abortController && { abortController: this.#abortController }),
|
|
@@ -852,6 +857,35 @@ class ClaudeLLMStream extends llm.LLMStream {
|
|
|
852
857
|
this.#eventEmitter.emit('tool_result', { name: toolName, input: toolInput, response: toolResponse });
|
|
853
858
|
return {};
|
|
854
859
|
}]
|
|
860
|
+
}],
|
|
861
|
+
// Per-turn behavioral re-anchor. Fires on EVERY user message that reaches Claude
|
|
862
|
+
// (initial requests, follow-ups, mid-flight steering, resumed-session messages).
|
|
863
|
+
// Reads the reminder text from disk every call, so it's hot-editable just like the
|
|
864
|
+
// main prompt — edit agent/src/prompts/turn-shape-reminder.md, reconnect, next message
|
|
865
|
+
// sees the new reminder. The SDK injects `additionalContext` alongside the user's actual
|
|
866
|
+
// message so the model sees both the literal user input AND the reminder, weighing them
|
|
867
|
+
// together. This is what fights JSONL-history-overrides-system-prompt drift on resumed
|
|
868
|
+
// sessions: the conductor pattern gets re-asserted on every turn instead of being
|
|
869
|
+
// anchored only at session-init time.
|
|
870
|
+
UserPromptSubmit: [{
|
|
871
|
+
matcher: '.*',
|
|
872
|
+
hooks: [async (input) => {
|
|
873
|
+
try {
|
|
874
|
+
const reminder = readFileSync(TURN_SHAPE_REMINDER_PATH, 'utf-8');
|
|
875
|
+
const promptPreview = String(input?.prompt || '').substring(0, 60).replace(/\n/g, ' ');
|
|
876
|
+
console.log(`📌 UserPromptSubmit: injected turn-shape reminder (${reminder.length} chars) for prompt="${promptPreview}..."`);
|
|
877
|
+
return {
|
|
878
|
+
hookSpecificOutput: {
|
|
879
|
+
hookEventName: 'UserPromptSubmit',
|
|
880
|
+
additionalContext: reminder,
|
|
881
|
+
},
|
|
882
|
+
};
|
|
883
|
+
}
|
|
884
|
+
catch (err) {
|
|
885
|
+
console.error('⚠️ UserPromptSubmit: failed to load turn-shape-reminder.md:', err instanceof Error ? err.message : err);
|
|
886
|
+
return { hookSpecificOutput: { hookEventName: 'UserPromptSubmit' } };
|
|
887
|
+
}
|
|
888
|
+
}]
|
|
855
889
|
}]
|
|
856
890
|
},
|
|
857
891
|
// Named sub-agents — Haiku overseer delegates to these specialists.
|
|
@@ -1076,12 +1110,12 @@ class ClaudeLLMStream extends llm.LLMStream {
|
|
|
1076
1110
|
if (this.#opts.skipTTSQueue) {
|
|
1077
1111
|
// Direct mode: emit event for session.say() — bypasses LiveKit's
|
|
1078
1112
|
// BufferedTokenStream which causes stuck/delayed/out-of-order audio
|
|
1079
|
-
console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk
|
|
1113
|
+
console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk}"`);
|
|
1080
1114
|
this.#eventEmitter.emit('tts_say', { text: ttsChunk });
|
|
1081
1115
|
}
|
|
1082
1116
|
else {
|
|
1083
1117
|
// Realtime mode: use LLM stream queue (framework handles TTS)
|
|
1084
|
-
console.log(`🔊 TTS stream (${ttsChunk.length} chars): "${ttsChunk
|
|
1118
|
+
console.log(`🔊 TTS stream (${ttsChunk.length} chars): "${ttsChunk}"`);
|
|
1085
1119
|
this.queue.put({
|
|
1086
1120
|
id: requestId,
|
|
1087
1121
|
delta: { role: 'assistant', content: ttsChunk },
|
|
@@ -1101,11 +1135,11 @@ class ClaudeLLMStream extends llm.LLMStream {
|
|
|
1101
1135
|
const ttsText = stripMarkdownForTTS(rawResult);
|
|
1102
1136
|
if (ttsText.trim()) {
|
|
1103
1137
|
if (this.#opts.skipTTSQueue) {
|
|
1104
|
-
console.log(`🔊 TTS say result (${ttsText.length} chars): "${ttsText
|
|
1138
|
+
console.log(`🔊 TTS say result (${ttsText.length} chars): "${ttsText}"`);
|
|
1105
1139
|
this.#eventEmitter.emit('tts_say', { text: ttsText });
|
|
1106
1140
|
}
|
|
1107
1141
|
else {
|
|
1108
|
-
console.log(`🔊 TTS result (${ttsText.length} chars): "${ttsText
|
|
1142
|
+
console.log(`🔊 TTS result (${ttsText.length} chars): "${ttsText}"`);
|
|
1109
1143
|
this.queue.put({
|
|
1110
1144
|
id: requestId,
|
|
1111
1145
|
delta: { role: 'assistant', content: ttsText },
|
package/dist/codex-llm.js
CHANGED
|
@@ -97,7 +97,7 @@ class CodexLLMStream extends llm.LLMStream {
|
|
|
97
97
|
});
|
|
98
98
|
return;
|
|
99
99
|
}
|
|
100
|
-
console.log(`🎤 User
|
|
100
|
+
console.log(`🎤 User (${userText.length} chars): "${userText}"`);
|
|
101
101
|
// Create or reuse thread
|
|
102
102
|
if (!this.#thread) {
|
|
103
103
|
console.log('🆕 Starting new Codex thread');
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation Brain - Gemini 2.5 Pro powered conversation manager
|
|
3
|
+
*
|
|
4
|
+
* This is the "smart brain" that:
|
|
5
|
+
* 1. Keeps conversation alive with relevant questions
|
|
6
|
+
* 2. Builds context until we understand what user wants
|
|
7
|
+
* 3. Dispatches background research agents
|
|
8
|
+
* 4. Receives progress updates and decides when to execute
|
|
9
|
+
* 5. Handles direct commands immediately
|
|
10
|
+
*/
|
|
11
|
+
export interface ConversationMessage {
|
|
12
|
+
role: 'user' | 'assistant' | 'system';
|
|
13
|
+
content: string;
|
|
14
|
+
timestamp: Date;
|
|
15
|
+
}
|
|
16
|
+
export interface ResearchTask {
|
|
17
|
+
id: string;
|
|
18
|
+
query: string;
|
|
19
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
20
|
+
result?: string;
|
|
21
|
+
startedAt?: Date;
|
|
22
|
+
completedAt?: Date;
|
|
23
|
+
}
|
|
24
|
+
export interface BrainDecision {
|
|
25
|
+
action: 'speak' | 'research' | 'execute' | 'clarify' | 'direct_command';
|
|
26
|
+
speech?: string;
|
|
27
|
+
researchQueries?: string[];
|
|
28
|
+
executeTask?: string;
|
|
29
|
+
directCommand?: string;
|
|
30
|
+
reasoning?: string;
|
|
31
|
+
}
|
|
32
|
+
export interface BrainState {
|
|
33
|
+
conversationHistory: ConversationMessage[];
|
|
34
|
+
userGoal: string | null;
|
|
35
|
+
userGoalConfidence: number;
|
|
36
|
+
pendingResearch: ResearchTask[];
|
|
37
|
+
completedResearch: ResearchTask[];
|
|
38
|
+
readyToExecute: boolean;
|
|
39
|
+
executionPlan: string | null;
|
|
40
|
+
}
|
|
41
|
+
export declare class ConversationBrain {
|
|
42
|
+
private llm;
|
|
43
|
+
private state;
|
|
44
|
+
private workingDir;
|
|
45
|
+
private onSpeak;
|
|
46
|
+
private onStateChange;
|
|
47
|
+
constructor(config: {
|
|
48
|
+
workingDir: string;
|
|
49
|
+
onSpeak: (text: string) => Promise<void>;
|
|
50
|
+
onStateChange: (state: string) => Promise<void>;
|
|
51
|
+
});
|
|
52
|
+
/**
|
|
53
|
+
* Process user input and decide what to do
|
|
54
|
+
*/
|
|
55
|
+
processUserInput(input: string): Promise<BrainDecision>;
|
|
56
|
+
/**
|
|
57
|
+
* Receive research results from background agents
|
|
58
|
+
*/
|
|
59
|
+
receiveResearchResult(taskId: string, result: string, success: boolean): void;
|
|
60
|
+
/**
|
|
61
|
+
* Check if we should provide a status update
|
|
62
|
+
*/
|
|
63
|
+
shouldProvideUpdate(): boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Generate a status update based on completed research
|
|
66
|
+
*/
|
|
67
|
+
generateStatusUpdate(): Promise<string | null>;
|
|
68
|
+
/**
|
|
69
|
+
* Get current state for debugging/display
|
|
70
|
+
*/
|
|
71
|
+
getState(): BrainState;
|
|
72
|
+
/**
|
|
73
|
+
* Reset conversation state
|
|
74
|
+
*/
|
|
75
|
+
reset(): void;
|
|
76
|
+
private buildAnalysisPrompt;
|
|
77
|
+
private parseDecision;
|
|
78
|
+
private updateState;
|
|
79
|
+
/**
|
|
80
|
+
* Create research tasks for background agents
|
|
81
|
+
*/
|
|
82
|
+
getPendingResearchTasks(): ResearchTask[];
|
|
83
|
+
/**
|
|
84
|
+
* Mark a research task as running
|
|
85
|
+
*/
|
|
86
|
+
markResearchRunning(taskId: string): void;
|
|
87
|
+
}
|
|
88
|
+
export declare function createConversationBrain(config: {
|
|
89
|
+
workingDir: string;
|
|
90
|
+
onSpeak: (text: string) => Promise<void>;
|
|
91
|
+
onStateChange: (state: string) => Promise<void>;
|
|
92
|
+
}): ConversationBrain;
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation Brain - Gemini 2.5 Pro powered conversation manager
|
|
3
|
+
*
|
|
4
|
+
* This is the "smart brain" that:
|
|
5
|
+
* 1. Keeps conversation alive with relevant questions
|
|
6
|
+
* 2. Builds context until we understand what user wants
|
|
7
|
+
* 3. Dispatches background research agents
|
|
8
|
+
* 4. Receives progress updates and decides when to execute
|
|
9
|
+
* 5. Handles direct commands immediately
|
|
10
|
+
*/
|
|
11
|
+
import { llm } from '@livekit/agents';
|
|
12
|
+
import * as google from '@livekit/agents-plugin-google';
|
|
13
|
+
// ============================================================
|
|
14
|
+
// Conversation Brain Class
|
|
15
|
+
// ============================================================
|
|
16
|
+
export class ConversationBrain {
|
|
17
|
+
llm;
|
|
18
|
+
state;
|
|
19
|
+
workingDir;
|
|
20
|
+
onSpeak;
|
|
21
|
+
onStateChange;
|
|
22
|
+
constructor(config) {
|
|
23
|
+
// Use Gemini 2.0 Flash for brain decisions - it's faster and less likely to conflict
|
|
24
|
+
// with the Gemini Realtime voice session which uses a different model
|
|
25
|
+
this.llm = new google.LLM({
|
|
26
|
+
model: 'gemini-2.0-flash',
|
|
27
|
+
// Set lower temperature for more consistent decisions
|
|
28
|
+
temperature: 0.3,
|
|
29
|
+
});
|
|
30
|
+
// Add error handler to prevent unhandled rejection crashes
|
|
31
|
+
this.llm.on('error', (err) => {
|
|
32
|
+
const errorMsg = err.error?.message || String(err);
|
|
33
|
+
// Only log non-abort errors
|
|
34
|
+
if (!errorMsg.includes('aborted') && !errorMsg.includes('AbortError')) {
|
|
35
|
+
console.error('🧠 [Brain LLM] Error:', errorMsg);
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
this.workingDir = config.workingDir;
|
|
39
|
+
this.onSpeak = config.onSpeak;
|
|
40
|
+
this.onStateChange = config.onStateChange;
|
|
41
|
+
this.state = {
|
|
42
|
+
conversationHistory: [],
|
|
43
|
+
userGoal: null,
|
|
44
|
+
userGoalConfidence: 0,
|
|
45
|
+
pendingResearch: [],
|
|
46
|
+
completedResearch: [],
|
|
47
|
+
readyToExecute: false,
|
|
48
|
+
executionPlan: null,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Process user input and decide what to do
|
|
53
|
+
*/
|
|
54
|
+
async processUserInput(input) {
|
|
55
|
+
// Add to conversation history
|
|
56
|
+
this.state.conversationHistory.push({
|
|
57
|
+
role: 'user',
|
|
58
|
+
content: input,
|
|
59
|
+
timestamp: new Date(),
|
|
60
|
+
});
|
|
61
|
+
// Build the analysis prompt
|
|
62
|
+
const prompt = this.buildAnalysisPrompt(input);
|
|
63
|
+
let stream = null;
|
|
64
|
+
let streamError = null;
|
|
65
|
+
try {
|
|
66
|
+
// Call Gemini 2.5 Pro for decision
|
|
67
|
+
const chatCtx = new llm.ChatContext();
|
|
68
|
+
chatCtx.addMessage({ role: 'user', content: prompt });
|
|
69
|
+
let response = '';
|
|
70
|
+
stream = this.llm.chat({ chatCtx });
|
|
71
|
+
// Create a promise that wraps the stream iteration with proper error handling
|
|
72
|
+
const collectResponse = async () => {
|
|
73
|
+
let result = '';
|
|
74
|
+
try {
|
|
75
|
+
for await (const chunk of stream) {
|
|
76
|
+
if (chunk.delta?.content) {
|
|
77
|
+
result += chunk.delta.content;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
streamError = e;
|
|
83
|
+
throw e;
|
|
84
|
+
}
|
|
85
|
+
return result;
|
|
86
|
+
};
|
|
87
|
+
// Race the stream against a timeout to prevent hanging
|
|
88
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
89
|
+
setTimeout(() => reject(new Error('Brain timeout after 30s')), 30000);
|
|
90
|
+
});
|
|
91
|
+
response = await Promise.race([collectResponse(), timeoutPromise]);
|
|
92
|
+
// Parse the decision
|
|
93
|
+
const decision = this.parseDecision(response);
|
|
94
|
+
// Update state based on decision
|
|
95
|
+
this.updateState(decision);
|
|
96
|
+
// Add assistant response to history
|
|
97
|
+
if (decision.speech) {
|
|
98
|
+
this.state.conversationHistory.push({
|
|
99
|
+
role: 'assistant',
|
|
100
|
+
content: decision.speech,
|
|
101
|
+
timestamp: new Date(),
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
return decision;
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
const errorMsg = err.message || String(err);
|
|
108
|
+
// Only log if not an abort (which is expected when user interrupts)
|
|
109
|
+
if (!errorMsg.includes('aborted')) {
|
|
110
|
+
console.error('🧠 [Brain] Stream error:', errorMsg);
|
|
111
|
+
}
|
|
112
|
+
// Close stream if it exists to prevent further errors
|
|
113
|
+
if (stream) {
|
|
114
|
+
try {
|
|
115
|
+
stream.close();
|
|
116
|
+
}
|
|
117
|
+
catch { }
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
action: 'speak',
|
|
121
|
+
speech: "I'm having trouble processing that. Could you try again?",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Receive research results from background agents
|
|
127
|
+
*/
|
|
128
|
+
receiveResearchResult(taskId, result, success) {
|
|
129
|
+
const task = this.state.pendingResearch.find(t => t.id === taskId);
|
|
130
|
+
if (task) {
|
|
131
|
+
task.status = success ? 'completed' : 'failed';
|
|
132
|
+
task.result = result;
|
|
133
|
+
task.completedAt = new Date();
|
|
134
|
+
// Move to completed
|
|
135
|
+
this.state.pendingResearch = this.state.pendingResearch.filter(t => t.id !== taskId);
|
|
136
|
+
this.state.completedResearch.push(task);
|
|
137
|
+
console.log(`🧠 Research completed: ${taskId.substring(0, 8)}... (${this.state.completedResearch.length} done)`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Check if we should provide a status update
|
|
142
|
+
*/
|
|
143
|
+
shouldProvideUpdate() {
|
|
144
|
+
// Provide update if research just completed
|
|
145
|
+
return this.state.completedResearch.length > 0 &&
|
|
146
|
+
this.state.pendingResearch.length === 0 &&
|
|
147
|
+
!this.state.readyToExecute;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Generate a status update based on completed research
|
|
151
|
+
*/
|
|
152
|
+
async generateStatusUpdate() {
|
|
153
|
+
if (this.state.completedResearch.length === 0)
|
|
154
|
+
return null;
|
|
155
|
+
const researchSummary = this.state.completedResearch
|
|
156
|
+
.map(r => `- ${r.query}: ${r.result?.substring(0, 200) || 'No result'}`)
|
|
157
|
+
.join('\n');
|
|
158
|
+
const prompt = `Based on completed research, provide a brief conversational status update.
|
|
159
|
+
|
|
160
|
+
RESEARCH RESULTS:
|
|
161
|
+
${researchSummary}
|
|
162
|
+
|
|
163
|
+
USER GOAL (so far): ${this.state.userGoal || 'Still understanding...'}
|
|
164
|
+
|
|
165
|
+
Generate a 1-2 sentence update that:
|
|
166
|
+
1. Summarizes what you learned
|
|
167
|
+
2. Either asks a clarifying question OR proposes next steps
|
|
168
|
+
3. Sounds natural and conversational
|
|
169
|
+
|
|
170
|
+
Just the update text, no JSON.`;
|
|
171
|
+
let stream = null;
|
|
172
|
+
try {
|
|
173
|
+
const chatCtx = new llm.ChatContext();
|
|
174
|
+
chatCtx.addMessage({ role: 'user', content: prompt });
|
|
175
|
+
let response = '';
|
|
176
|
+
stream = this.llm.chat({ chatCtx });
|
|
177
|
+
for await (const chunk of stream) {
|
|
178
|
+
if (chunk.delta?.content) {
|
|
179
|
+
response += chunk.delta.content;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return response.trim();
|
|
183
|
+
}
|
|
184
|
+
catch (err) {
|
|
185
|
+
console.error('🧠 [Brain] Status update error:', err.message || err);
|
|
186
|
+
if (stream) {
|
|
187
|
+
try {
|
|
188
|
+
stream.close();
|
|
189
|
+
}
|
|
190
|
+
catch { }
|
|
191
|
+
}
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Get current state for debugging/display
|
|
197
|
+
*/
|
|
198
|
+
getState() {
|
|
199
|
+
return { ...this.state };
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Reset conversation state
|
|
203
|
+
*/
|
|
204
|
+
reset() {
|
|
205
|
+
this.state = {
|
|
206
|
+
conversationHistory: [],
|
|
207
|
+
userGoal: null,
|
|
208
|
+
userGoalConfidence: 0,
|
|
209
|
+
pendingResearch: [],
|
|
210
|
+
completedResearch: [],
|
|
211
|
+
readyToExecute: false,
|
|
212
|
+
executionPlan: null,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
// ============================================================
|
|
216
|
+
// Private Methods
|
|
217
|
+
// ============================================================
|
|
218
|
+
buildAnalysisPrompt(userInput) {
|
|
219
|
+
const recentHistory = this.state.conversationHistory.slice(-6)
|
|
220
|
+
.map(m => `${m.role.toUpperCase()}: ${m.content}`)
|
|
221
|
+
.join('\n');
|
|
222
|
+
const researchContext = this.state.completedResearch.length > 0
|
|
223
|
+
? `\nCOMPLETED RESEARCH:\n${this.state.completedResearch.map(r => `- ${r.query}: ${r.result?.substring(0, 300)}`).join('\n')}`
|
|
224
|
+
: '';
|
|
225
|
+
const pendingContext = this.state.pendingResearch.length > 0
|
|
226
|
+
? `\nPENDING RESEARCH: ${this.state.pendingResearch.map(r => r.query).join(', ')}`
|
|
227
|
+
: '';
|
|
228
|
+
return `You are the brain of a voice AI coding assistant. Analyze this input and decide what to do.
|
|
229
|
+
|
|
230
|
+
WORKING DIRECTORY: ${this.workingDir}
|
|
231
|
+
|
|
232
|
+
CAPABILITIES:
|
|
233
|
+
- Full internet access (web search, fetch URLs, API calls)
|
|
234
|
+
- Read/write files in the working directory
|
|
235
|
+
- Run shell commands (npm, git, etc.)
|
|
236
|
+
- Search and analyze codebases
|
|
237
|
+
|
|
238
|
+
CONVERSATION HISTORY:
|
|
239
|
+
${recentHistory}
|
|
240
|
+
|
|
241
|
+
CURRENT USER INPUT: "${userInput}"
|
|
242
|
+
|
|
243
|
+
CURRENT UNDERSTANDING:
|
|
244
|
+
- User Goal: ${this.state.userGoal || 'Unknown - still gathering context'}
|
|
245
|
+
- Confidence: ${Math.round(this.state.userGoalConfidence * 100)}%
|
|
246
|
+
- Ready to Execute: ${this.state.readyToExecute}
|
|
247
|
+
${researchContext}
|
|
248
|
+
${pendingContext}
|
|
249
|
+
|
|
250
|
+
DECIDE WHAT TO DO:
|
|
251
|
+
|
|
252
|
+
1. DIRECT_COMMAND - If user gives a simple, clear command:
|
|
253
|
+
- "read file X" → direct_command
|
|
254
|
+
- "run npm test" → direct_command
|
|
255
|
+
- "show me the package.json" → direct_command
|
|
256
|
+
- "search the web for X" → direct_command
|
|
257
|
+
- "look up X online" → direct_command
|
|
258
|
+
|
|
259
|
+
2. CLARIFY - If request is ambiguous, ask a specific question to understand better
|
|
260
|
+
|
|
261
|
+
3. RESEARCH - If we need more info, start background research (2-3 queries max)
|
|
262
|
+
- Search codebase, read docs, explore files
|
|
263
|
+
- Search the web for information
|
|
264
|
+
- Keep conversation going while research runs
|
|
265
|
+
|
|
266
|
+
4. EXECUTE - ONLY if we have HIGH confidence (>80%) about what user wants
|
|
267
|
+
- Must have clear plan
|
|
268
|
+
- Only ONE execution task at a time
|
|
269
|
+
|
|
270
|
+
5. SPEAK - Just respond conversationally (greetings, status, etc.)
|
|
271
|
+
|
|
272
|
+
IMPORTANT: Keep speech SHORT and conversational. No markdown formatting.
|
|
273
|
+
|
|
274
|
+
Respond in JSON:
|
|
275
|
+
{
|
|
276
|
+
"action": "speak" | "research" | "execute" | "clarify" | "direct_command",
|
|
277
|
+
"speech": "What to say to user RIGHT NOW (keep it short, conversational, NO markdown)",
|
|
278
|
+
"researchQueries": ["query1", "query2"], // Only if action=research
|
|
279
|
+
"executeTask": "detailed task description", // Only if action=execute
|
|
280
|
+
"directCommand": "simple command to run", // Only if action=direct_command
|
|
281
|
+
"updatedGoal": "What we think user wants now",
|
|
282
|
+
"goalConfidence": 0.0-1.0,
|
|
283
|
+
"reasoning": "Brief explanation"
|
|
284
|
+
}`;
|
|
285
|
+
}
|
|
286
|
+
parseDecision(response) {
|
|
287
|
+
try {
|
|
288
|
+
// Extract JSON from response
|
|
289
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
290
|
+
if (!jsonMatch) {
|
|
291
|
+
return {
|
|
292
|
+
action: 'speak',
|
|
293
|
+
speech: response.substring(0, 200),
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
297
|
+
// Update goal tracking
|
|
298
|
+
if (parsed.updatedGoal) {
|
|
299
|
+
this.state.userGoal = parsed.updatedGoal;
|
|
300
|
+
}
|
|
301
|
+
if (typeof parsed.goalConfidence === 'number') {
|
|
302
|
+
this.state.userGoalConfidence = parsed.goalConfidence;
|
|
303
|
+
}
|
|
304
|
+
return {
|
|
305
|
+
action: parsed.action || 'speak',
|
|
306
|
+
speech: parsed.speech,
|
|
307
|
+
researchQueries: parsed.researchQueries,
|
|
308
|
+
executeTask: parsed.executeTask,
|
|
309
|
+
directCommand: parsed.directCommand,
|
|
310
|
+
reasoning: parsed.reasoning,
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
console.error('Failed to parse brain decision:', err);
|
|
315
|
+
return {
|
|
316
|
+
action: 'speak',
|
|
317
|
+
speech: "Let me think about that...",
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
updateState(decision) {
|
|
322
|
+
// Track research tasks
|
|
323
|
+
if (decision.action === 'research' && decision.researchQueries) {
|
|
324
|
+
for (const query of decision.researchQueries) {
|
|
325
|
+
this.state.pendingResearch.push({
|
|
326
|
+
id: `research-${Date.now()}-${Math.random().toString(36).substring(2, 6)}`,
|
|
327
|
+
query,
|
|
328
|
+
status: 'pending',
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
// Mark ready to execute
|
|
333
|
+
if (decision.action === 'execute') {
|
|
334
|
+
this.state.readyToExecute = true;
|
|
335
|
+
this.state.executionPlan = decision.executeTask || null;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Create research tasks for background agents
|
|
340
|
+
*/
|
|
341
|
+
getPendingResearchTasks() {
|
|
342
|
+
return this.state.pendingResearch.filter(t => t.status === 'pending');
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Mark a research task as running
|
|
346
|
+
*/
|
|
347
|
+
markResearchRunning(taskId) {
|
|
348
|
+
const task = this.state.pendingResearch.find(t => t.id === taskId);
|
|
349
|
+
if (task) {
|
|
350
|
+
task.status = 'running';
|
|
351
|
+
task.startedAt = new Date();
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// ============================================================
|
|
356
|
+
// Factory function
|
|
357
|
+
// ============================================================
|
|
358
|
+
export function createConversationBrain(config) {
|
|
359
|
+
return new ConversationBrain(config);
|
|
360
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
interface AcknowledgmentResult {
|
|
2
|
+
acknowledgment: string;
|
|
3
|
+
isCodingTask: boolean;
|
|
4
|
+
intent: 'code' | 'chat' | 'question' | 'permission';
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Fast LLM for immediate acknowledgment and intent classification
|
|
8
|
+
* Target: < 200ms response time
|
|
9
|
+
*/
|
|
10
|
+
export declare function getAcknowledgment(userInput: string): Promise<AcknowledgmentResult>;
|
|
11
|
+
/**
|
|
12
|
+
* Summarize Claude's response for voice output
|
|
13
|
+
*/
|
|
14
|
+
export declare function summarizeForVoice(claudeResponse: string): Promise<string>;
|
|
15
|
+
export {};
|
package/dist/fast-llm.js
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import Groq from 'groq-sdk';
|
|
2
|
+
const groq = new Groq({
|
|
3
|
+
apiKey: process.env.GROQ_API_KEY,
|
|
4
|
+
});
|
|
5
|
+
/**
|
|
6
|
+
* Fast LLM for immediate acknowledgment and intent classification
|
|
7
|
+
* Target: < 200ms response time
|
|
8
|
+
*/
|
|
9
|
+
export async function getAcknowledgment(userInput) {
|
|
10
|
+
const response = await groq.chat.completions.create({
|
|
11
|
+
model: 'llama-3.3-70b-versatile', // Fast model
|
|
12
|
+
messages: [
|
|
13
|
+
{
|
|
14
|
+
role: 'system',
|
|
15
|
+
content: `You are Osborn, a voice-enabled coding assistant. Classify user requests:
|
|
16
|
+
|
|
17
|
+
CODING TASKS (isCodingTask: true) - Anything involving:
|
|
18
|
+
- Files: read, write, create, edit, list, find, search
|
|
19
|
+
- Directories: what directory, current folder, list files, project structure
|
|
20
|
+
- Code: fix bugs, refactor, explain code, run tests
|
|
21
|
+
- Terminal: run commands, install packages, git operations
|
|
22
|
+
- Project: what's in this project, show me files, analyze codebase
|
|
23
|
+
|
|
24
|
+
NOT CODING (isCodingTask: false):
|
|
25
|
+
- General chat: hi, how are you, tell me a joke
|
|
26
|
+
- Off-topic: weather, news, recipes
|
|
27
|
+
- Permission responses: yes, no, approve, deny
|
|
28
|
+
|
|
29
|
+
Respond in JSON:
|
|
30
|
+
{
|
|
31
|
+
"acknowledgment": "brief friendly response",
|
|
32
|
+
"isCodingTask": true/false,
|
|
33
|
+
"intent": "code|chat|question|permission"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
- "What directory are we in?" → {"acknowledgment": "Let me check", "isCodingTask": true, "intent": "code"}
|
|
38
|
+
- "What files are in this project?" → {"acknowledgment": "I'll list them", "isCodingTask": true, "intent": "code"}
|
|
39
|
+
- "Read package.json" → {"acknowledgment": "On it", "isCodingTask": true, "intent": "code"}
|
|
40
|
+
- "Fix the bug" → {"acknowledgment": "Let me look", "isCodingTask": true, "intent": "code"}
|
|
41
|
+
- "Hello" → {"acknowledgment": "Hi there!", "isCodingTask": false, "intent": "chat"}
|
|
42
|
+
- "Yes, do it" → {"acknowledgment": "Got it", "isCodingTask": false, "intent": "permission"}`,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
role: 'user',
|
|
46
|
+
content: userInput,
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
temperature: 0.3,
|
|
50
|
+
max_tokens: 150,
|
|
51
|
+
response_format: { type: 'json_object' },
|
|
52
|
+
});
|
|
53
|
+
const content = response.choices[0]?.message?.content || '{}';
|
|
54
|
+
return JSON.parse(content);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Summarize Claude's response for voice output
|
|
58
|
+
*/
|
|
59
|
+
export async function summarizeForVoice(claudeResponse) {
|
|
60
|
+
if (claudeResponse.length < 200) {
|
|
61
|
+
return claudeResponse;
|
|
62
|
+
}
|
|
63
|
+
const response = await groq.chat.completions.create({
|
|
64
|
+
model: 'llama-3.3-70b-versatile',
|
|
65
|
+
messages: [
|
|
66
|
+
{
|
|
67
|
+
role: 'system',
|
|
68
|
+
content: `Summarize this coding assistant response for voice output.
|
|
69
|
+
Keep it under 2 sentences. Focus on what was done and the result.
|
|
70
|
+
Don't include code - just describe what happened.`,
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
role: 'user',
|
|
74
|
+
content: claudeResponse,
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
temperature: 0.3,
|
|
78
|
+
max_tokens: 100,
|
|
79
|
+
});
|
|
80
|
+
return response.choices[0]?.message?.content || claudeResponse;
|
|
81
|
+
}
|