osborn 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +75 -0
  5. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  6. package/dist/claude-llm.d.ts +29 -1
  7. package/dist/claude-llm.js +346 -79
  8. package/dist/config.d.ts +6 -2
  9. package/dist/config.js +6 -1
  10. package/dist/fast-brain.d.ts +124 -12
  11. package/dist/fast-brain.js +1361 -96
  12. package/dist/index-3-2-26-legacy.d.ts +1 -0
  13. package/dist/index-3-2-26-legacy.js +2233 -0
  14. package/dist/index.js +889 -394
  15. package/dist/jsonl-search.d.ts +66 -0
  16. package/dist/jsonl-search.js +274 -0
  17. package/dist/leagcyprompts2.d.ts +0 -0
  18. package/dist/leagcyprompts2.js +573 -0
  19. package/dist/pipeline-direct-llm.d.ts +77 -0
  20. package/dist/pipeline-direct-llm.js +216 -0
  21. package/dist/pipeline-fastbrain.d.ts +45 -0
  22. package/dist/pipeline-fastbrain.js +367 -0
  23. package/dist/prompts-2-25-26.d.ts +0 -0
  24. package/dist/prompts-2-25-26.js +518 -0
  25. package/dist/prompts-3-2-26.d.ts +78 -0
  26. package/dist/prompts-3-2-26.js +1319 -0
  27. package/dist/prompts.d.ts +83 -8
  28. package/dist/prompts.js +1990 -374
  29. package/dist/session-access.d.ts +60 -2
  30. package/dist/session-access.js +172 -2
  31. package/dist/summary-index.d.ts +87 -0
  32. package/dist/summary-index.js +570 -0
  33. package/dist/turn-detector-shim.d.ts +24 -0
  34. package/dist/turn-detector-shim.js +83 -0
  35. package/dist/voice-io.d.ts +9 -3
  36. package/dist/voice-io.js +39 -20
  37. package/package.json +18 -11
@@ -1,33 +1,35 @@
1
1
  /**
2
- * Fast Brain Agent Middle-tier intelligence for the Voice AI System
2
+ * Fast Brain — Central Orchestrator for the Voice AI System
3
3
  *
4
- * A fast intermediary between the realtime voice model and the Claude SDK agent.
5
- * Uses direct API calls for ~2 second responses.
4
+ * The sole intelligence layer between the user and all backend capabilities.
5
+ * The realtime voice model is a thin teleprompter — it speaks what this module returns.
6
6
  *
7
7
  * Capabilities:
8
8
  * - Read/write session files (spec.md + library/)
9
9
  * - Web search for quick factual lookups
10
10
  * - Record user decisions and preferences into spec.md
11
- * - Post-research: synthesize findings into spec.md
12
- * - Escalate to ask_agent when deeper research is needed
11
+ * - Trigger deep research (via callbacks to index.ts)
12
+ * - Generate teleprompter scripts for ALL voice output
13
+ * - Post-research: synthesize findings from JSONL into spec.md + voice scripts
14
+ * - Generate visual documents (comparison, diagram, analysis, summary)
13
15
  *
14
- * Key constraint: The fast brain NEVER calls ask_agent. The realtime model is always the router.
16
+ * Central function: askFastBrain() ALL user questions route here.
17
+ * It returns a FastBrainResponse with a teleprompter script the voice model reads verbatim.
15
18
  *
16
19
  * Auth chain (tried in order):
17
20
  * 1. ANTHROPIC_API_KEY env var → Anthropic SDK (Haiku)
18
21
  * 2. ANTHROPIC_AUTH_TOKEN env var → Anthropic SDK (Haiku)
19
22
  * 3. GOOGLE_API_KEY env var → Gemini Flash fallback
20
- *
21
- * Note: Claude Code OAuth (macOS Keychain) was tested but Anthropic's Messages API
22
- * rejects OAuth tokens with 401 "OAuth authentication is currently not supported."
23
23
  */
24
24
  import Anthropic from '@anthropic-ai/sdk';
25
+ import { query as sdkQuery, tool as sdkTool, createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk';
25
26
  import { GoogleGenAI } from '@google/genai';
26
27
  import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from 'fs';
27
28
  import { dirname, basename } from 'path';
28
- import { getSessionWorkspace, listLibraryFiles } from './config.js';
29
- import { FAST_BRAIN_SYSTEM_PROMPT, CHUNK_PROCESS_SYSTEM, REFINEMENT_PROCESS_SYSTEM } from './prompts.js';
30
- import { getRecentToolResults, readSessionHistory, getSubagentTranscripts } from './session-access.js';
29
+ import { z } from 'zod';
30
+ import { getSessionWorkspace, readSessionSpec, listLibraryFiles } from './config.js';
31
+ import { FAST_BRAIN_SYSTEM_PROMPT, CHUNK_PROCESS_SYSTEM, REFINEMENT_PROCESS_SYSTEM, AUGMENT_RESULT_SYSTEM, CONTEXTUALIZE_UPDATE_SYSTEM, PROACTIVE_PROMPT_SYSTEM, VISUAL_DOCUMENT_SYSTEM, RESEARCH_COMPLETION_SYSTEM, buildFastBrainSdkPrompt } from './prompts.js';
32
+ import { getRecentToolResults, readSessionHistory, getSubagentTranscripts, getConversationText, getSessionTranscripts, searchSessionJsonl, getSessionStats } from './session-access.js';
31
33
  // ============================================================
32
34
  // Content extraction — pulls useful snippets from tool responses
33
35
  // ============================================================
@@ -70,51 +72,81 @@ let initialized = false;
70
72
  // Model IDs — configurable per provider
71
73
  const ANTHROPIC_FAST_MODEL = 'claude-haiku-4-5-20251001';
72
74
  const GEMINI_FAST_MODEL = 'gemini-2.0-flash';
75
+ // Agent SDK session tracking — resume across voice questions for context continuity
76
+ let fastBrainSessionId = null;
77
+ // Gemini Chat session — persists across voice questions for context continuity.
78
+ // The Chat object auto-manages full conversation history (messages + tool calls).
79
+ // Cleared on disconnect/reconnect/session switch via clearFastBrainSession().
80
+ let geminiChat = null;
81
+ const MAX_FAST_BRAIN_HISTORY = 30;
82
+ let fastBrainHistory = [];
83
+ /** Clear fast brain session state — call on disconnect/reconnect/session switch */
84
+ export function clearFastBrainSession() {
85
+ fastBrainSessionId = null;
86
+ geminiChat = null;
87
+ fastBrainHistory = [];
88
+ console.log('🧠 Fast brain: session cleared (SDK + Gemini chat + Anthropic history)');
89
+ }
90
+ /** @deprecated Use clearFastBrainSession() instead */
91
+ export function clearFastBrainHistory() {
92
+ clearFastBrainSession();
93
+ }
73
94
  function initProvider() {
74
95
  if (initialized)
75
96
  return;
76
97
  initialized = true;
77
- // 1. ANTHROPIC_API_KEY
98
+ // Initialize fallback clients (Gemini for fallback, Anthropic direct API if key available)
99
+ const googleKey = process.env.GOOGLE_API_KEY;
100
+ if (googleKey) {
101
+ geminiClient = new GoogleGenAI({ apiKey: googleKey });
102
+ }
78
103
  const apiKey = process.env.ANTHROPIC_API_KEY;
79
104
  if (apiKey) {
80
105
  anthropicClient = new Anthropic({ apiKey });
81
- provider = 'anthropic';
82
- console.log('🧠 Fast brain: using Anthropic API (ANTHROPIC_API_KEY)');
83
- return;
84
106
  }
85
- // 2. ANTHROPIC_AUTH_TOKEN (if user sets it explicitly)
86
- const authToken = process.env.ANTHROPIC_AUTH_TOKEN;
87
- if (authToken) {
88
- anthropicClient = new Anthropic({ authToken });
89
- provider = 'anthropic';
90
- console.log('🧠 Fast brain: using Anthropic API (ANTHROPIC_AUTH_TOKEN)');
91
- return;
107
+ else {
108
+ const authToken = process.env.ANTHROPIC_AUTH_TOKEN;
109
+ if (authToken) {
110
+ anthropicClient = new Anthropic({ authToken });
111
+ }
92
112
  }
93
- // NOTE: Claude Code OAuth (macOS Keychain) was tested but Anthropic's Messages API
94
- // returns 401 "OAuth authentication is currently not supported." cannot reuse it.
95
- // 3. Gemini Flash fallback (uses GOOGLE_API_KEY already in .env)
96
- const googleKey = process.env.GOOGLE_API_KEY;
97
- if (googleKey) {
98
- geminiClient = new GoogleGenAI({ apiKey: googleKey });
113
+ // PRIMARY: Gemini Flash fastest (~1-2s), handles 1M tokens, no cold start.
114
+ // Agent SDK Haiku is too slow (~10-15s) due to CLI process spawn + session overhead.
115
+ if (geminiClient) {
99
116
  provider = 'gemini';
100
- console.log(`🧠 Fast brain: using Gemini Flash fallback (${GEMINI_FAST_MODEL})`);
101
- return;
117
+ console.log(`🧠 Fast brain: using Gemini Flash (primary) — fastest response time`);
118
+ if (anthropicClient) {
119
+ console.log(`🧠 Fast brain: Direct Anthropic API available as fallback`);
120
+ }
121
+ }
122
+ else if (anthropicClient) {
123
+ provider = 'anthropic';
124
+ console.log(`🧠 Fast brain: using Anthropic API (primary) — no Gemini key available`);
125
+ }
126
+ else {
127
+ // Last resort: Agent SDK is slow but functional
128
+ provider = 'agent-sdk';
129
+ console.log(`🧠 Fast brain: using Claude Agent SDK (fallback) — no API keys available`);
102
130
  }
103
- // No provider available
104
- provider = 'none';
105
- console.error('⚠️ Fast brain: no API key available — fast brain disabled');
106
- console.error(' Set ANTHROPIC_API_KEY or GOOGLE_API_KEY in agent/.env');
107
131
  }
108
132
  // ============================================================
109
133
  // Tool execution (shared across providers)
110
134
  // ============================================================
111
- function executeTool(toolName, toolInput, workspace, sessionId, workingDir) {
135
+ // Track whether send_to_chat was called during a fast brain conversation.
136
+ // If the LLM calls send_to_chat but returns no text, we use a fallback
137
+ // instead of "No answer found."
138
+ let sendToChatCalledThisTurn = false;
139
+ function executeTool(toolName, toolInput, workspace, sessionId, workingDir, sendToChat) {
112
140
  try {
113
141
  switch (toolName) {
114
142
  case 'read_file': {
115
143
  const relPath = toolInput.path;
116
144
  if (relPath.includes('..'))
117
145
  return 'Error: path traversal not allowed';
146
+ const ext = relPath.toLowerCase().split('.').pop() || '';
147
+ const BINARY_EXTS = ['png', 'jpg', 'jpeg', 'gif', 'webp', 'pdf', 'mp3', 'wav', 'mp4', 'mov'];
148
+ if (BINARY_EXTS.includes(ext))
149
+ return `Binary file (${ext}) — cannot read as text.`;
118
150
  const fullPath = `${workspace}/${relPath}`;
119
151
  if (!existsSync(fullPath))
120
152
  return `File not found: ${relPath}`;
@@ -147,27 +179,139 @@ function executeTool(toolName, toolInput, workspace, sessionId, workingDir) {
147
179
  case 'read_agent_results': {
148
180
  if (!sessionId || !workingDir)
149
181
  return 'Error: no active research session';
150
- const lastN = toolInput.lastN || 5;
151
- const results = getRecentToolResults(sessionId, workingDir, lastN);
182
+ const lastN = toolInput.lastN || 40;
183
+ const toolFilter = toolInput.toolFilter;
184
+ const results = getRecentToolResults(sessionId, workingDir, lastN, { toolNameFilter: toolFilter });
152
185
  if (results.length === 0)
153
- return 'No tool results found in agent JSONL.';
154
- return results.map(tr => {
155
- const inputPreview = JSON.stringify(tr.toolInput).substring(0, 200);
156
- return `[${tr.toolName}: ${inputPreview}]\n${tr.resultContent}`;
157
- }).join('\n\n---\n\n');
186
+ return `No tool results found${toolFilter ? ` for tools: ${toolFilter.join(', ')}` : ''}.`;
187
+ return `[${results.length} results${toolFilter ? ` filtered by: ${toolFilter.join(', ')}` : ''}]\n\n` +
188
+ results.map(tr => {
189
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 200);
190
+ return `[${tr.toolName}: ${inputPreview}]\n${tr.resultContent}`;
191
+ }).join('\n\n---\n\n');
158
192
  }
159
193
  case 'read_agent_text': {
160
194
  if (!sessionId || !workingDir)
161
195
  return 'Error: no active research session';
162
- const lastN = toolInput.lastN || 10;
163
- const messages = readSessionHistory(sessionId, workingDir, {
164
- lastN,
165
- types: ['assistant']
166
- });
196
+ const lastN = toolInput.lastN || 60;
197
+ const opts = lastN === 0
198
+ ? { types: ['assistant'] }
199
+ : { lastN, types: ['assistant'] };
200
+ const messages = readSessionHistory(sessionId, workingDir, opts);
167
201
  const texts = messages.filter(m => m.text && m.text.length > 20);
168
202
  if (texts.length === 0)
169
203
  return 'No agent reasoning text found in JSONL.';
170
- return texts.map(m => m.text).join('\n\n---\n\n');
204
+ return `[${texts.length} agent messages]\n\n` + texts.map(m => m.text).join('\n\n---\n\n');
205
+ }
206
+ case 'read_subagents': {
207
+ if (!sessionId || !workingDir)
208
+ return 'Error: no active research session';
209
+ const transcripts = getSubagentTranscripts(sessionId, workingDir);
210
+ if (transcripts.length === 0)
211
+ return 'No sub-agent transcripts found.';
212
+ return transcripts.map(sa => {
213
+ const texts = sa.messages
214
+ .filter(m => m.text && m.text.length > 20)
215
+ .map(m => `[${m.type}] ${m.text}`);
216
+ return `=== Sub-agent ${sa.taskId} (${sa.messages.length} msgs) ===\n${texts.join('\n')}`;
217
+ }).join('\n\n');
218
+ }
219
+ case 'search_jsonl': {
220
+ if (!sessionId || !workingDir)
221
+ return 'Error: no active research session';
222
+ const keyword = toolInput.keyword;
223
+ if (!keyword)
224
+ return 'Error: keyword is required';
225
+ const maxResults = toolInput.maxResults || 20;
226
+ const results = searchSessionJsonl(sessionId, workingDir, keyword, { maxResults });
227
+ if (results.length === 0)
228
+ return `No matches for "${keyword}" in agent JSONL.`;
229
+ return results.map(r => `[${r.type}${r.timestamp ? ` @ ${r.timestamp}` : ''}] ${r.text}`).join('\n\n---\n\n');
230
+ }
231
+ case 'read_conversation': {
232
+ if (!sessionId || !workingDir)
233
+ return 'Error: no active research session';
234
+ const lastN = toolInput.lastN || 30;
235
+ const exchanges = getConversationText(sessionId, workingDir, lastN, 2000);
236
+ if (exchanges.length === 0)
237
+ return 'No conversation history found.';
238
+ return exchanges.map(e => `${e.role}: ${e.text}`).join('\n\n');
239
+ }
240
+ case 'get_session_stats': {
241
+ if (!sessionId || !workingDir)
242
+ return 'Error: no active research session';
243
+ const stats = getSessionStats(sessionId, workingDir);
244
+ if (!stats)
245
+ return 'No session data found.';
246
+ const toolList = Object.entries(stats.toolBreakdown)
247
+ .sort(([, a], [, b]) => b - a)
248
+ .map(([name, count]) => ` ${name}: ${count}`)
249
+ .join('\n');
250
+ return `Session Stats:
251
+ Total messages: ${stats.totalMessages}
252
+ User messages: ${stats.userMessages}
253
+ Agent messages: ${stats.assistantMessages}
254
+ Tool calls: ${stats.toolUseCount}
255
+ Tool results: ${stats.toolResultCount}
256
+ Sub-agents: ${stats.subagentCount}
257
+ File size: ${(stats.fileSizeBytes / 1024).toFixed(1)} KB
258
+ Time range: ${stats.firstTimestamp || '?'} → ${stats.lastTimestamp || '?'}
259
+
260
+ Tool breakdown:
261
+ ${toolList}`;
262
+ }
263
+ case 'deep_read_results': {
264
+ if (!sessionId || !workingDir)
265
+ return 'Error: no active research session';
266
+ const toolFilter = toolInput.toolFilter;
267
+ const allResults = getRecentToolResults(sessionId, workingDir, 0, { toolNameFilter: toolFilter });
268
+ if (allResults.length === 0)
269
+ return `No tool results found${toolFilter ? ` for tools: ${toolFilter.join(', ')}` : ''}.`;
270
+ return `[${allResults.length} total results${toolFilter ? ` filtered by: ${toolFilter.join(', ')}` : ' (all tools)'}]\n\n` +
271
+ allResults.map(tr => {
272
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 200);
273
+ return `[${tr.toolName}: ${inputPreview}]\n${tr.resultContent}`;
274
+ }).join('\n\n---\n\n');
275
+ }
276
+ case 'deep_read_text': {
277
+ if (!sessionId || !workingDir)
278
+ return 'Error: no active research session';
279
+ const allMessages = readSessionHistory(sessionId, workingDir, {
280
+ types: ['assistant']
281
+ });
282
+ const allTexts = allMessages.filter(m => m.text && m.text.length > 20);
283
+ if (allTexts.length === 0)
284
+ return 'No agent reasoning text found in JSONL.';
285
+ return `[${allTexts.length} total agent messages across entire session]\n\n` + allTexts.map(m => m.text).join('\n\n---\n\n');
286
+ }
287
+ case 'get_full_transcript': {
288
+ if (!sessionId || !workingDir)
289
+ return 'Error: no active research session';
290
+ const transcripts = getSessionTranscripts(sessionId, workingDir);
291
+ const agentTexts = transcripts.agent.messages
292
+ .filter(m => m.text && m.text.length > 20)
293
+ .map(m => `[${m.type}${m.toolName ? ': ' + m.toolName : ''}] ${m.text}`);
294
+ let output = `=== Agent Transcript (${transcripts.agent.messages.length} msgs, ${transcripts.agent.fileSize} bytes) ===\n${agentTexts.join('\n\n')}`;
295
+ if (transcripts.subagents.length > 0) {
296
+ const subTexts = transcripts.subagents.map(sa => {
297
+ const texts = sa.messages.filter(m => m.text).map(m => `[${m.type}] ${m.text}`);
298
+ return `=== Sub-agent ${sa.taskId} ===\n${texts.join('\n')}`;
299
+ });
300
+ output += '\n\n' + subTexts.join('\n\n');
301
+ }
302
+ return output;
303
+ }
304
+ case 'send_to_chat': {
305
+ const text = toolInput.text;
306
+ if (!text)
307
+ return 'Error: text is required';
308
+ if (sendToChat) {
309
+ console.log(`💬 [fast brain] send_to_chat: ${text.substring(0, 80)}...`);
310
+ sendToChat(text);
311
+ sendToChatCalledThisTurn = true;
312
+ return `Sent to chat successfully. Now return a brief spoken summary — do NOT repeat the content you just sent.`;
313
+ }
314
+ return 'Error: chat sending not available';
171
315
  }
172
316
  default:
173
317
  return `Unknown tool: ${toolName}`;
@@ -212,23 +356,90 @@ function buildAnthropicTools() {
212
356
  },
213
357
  {
214
358
  name: 'read_agent_results',
215
- description: 'Read recent tool results from the research agent JSONL. Returns FULL untruncated tool outputs (file contents, command outputs, web search results).',
359
+ description: 'Read the research agent\'s FULL memory complete untruncated tool outputs including entire file contents the agent read, full bash command outputs, web search results, and web page fetches. This is the agent\'s raw data. Use this FIRST when asked about anything the agent just researched. Default: last 40 results.',
216
360
  input_schema: {
217
361
  type: 'object',
218
362
  properties: {
219
- lastN: { type: 'number', description: 'Number of recent results to return (default: 5)' }
363
+ lastN: { type: 'number', description: 'Number of recent results to return (default: 40, max: 80)' }
220
364
  }
221
365
  }
222
366
  },
223
367
  {
224
368
  name: 'read_agent_text',
225
- description: 'Read recent agent reasoning and analysis text from JSONL. Returns the agent\'s thinking and conclusions.',
369
+ description: 'Read the research agent\'s reasoning, analysis, and conclusions from JSONL. Contains the agent\'s step-by-step thinking, synthesis of findings, comparisons, and recommendations. Use this alongside read_agent_results to get the COMPLETE picture of what the agent researched and concluded. Default: last 60 messages.',
370
+ input_schema: {
371
+ type: 'object',
372
+ properties: {
373
+ lastN: { type: 'number', description: 'Number of recent text messages to return (default: 60, max: 100)' }
374
+ }
375
+ }
376
+ },
377
+ {
378
+ name: 'read_subagents',
379
+ description: 'Read all sub-agent (parallel Task) transcripts. Contains the detailed work done by sub-agents spawned during research. Use when the main agent delegated parts of the research to sub-agents working in parallel.',
380
+ input_schema: { type: 'object', properties: {} }
381
+ },
382
+ {
383
+ name: 'search_jsonl',
384
+ description: 'Search the agent\'s JSONL transcript for a keyword. Returns matching entries across all tool results, agent reasoning, and conversation history. Use to find specific mentions of a topic, file, function, or concept.',
385
+ input_schema: {
386
+ type: 'object',
387
+ properties: {
388
+ keyword: { type: 'string', description: 'The keyword to search for (case-insensitive)' },
389
+ maxResults: { type: 'number', description: 'Maximum number of results (default: 20)' }
390
+ },
391
+ required: ['keyword']
392
+ }
393
+ },
394
+ {
395
+ name: 'read_conversation',
396
+ description: 'Read the user/assistant conversation exchange history. Shows what the user asked and what the agent responded, without tool call details. Use for understanding conversation flow, user intent, and what was discussed.',
397
+ input_schema: {
398
+ type: 'object',
399
+ properties: {
400
+ lastN: { type: 'number', description: 'Number of recent exchanges to return (default: 30)' }
401
+ }
402
+ }
403
+ },
404
+ {
405
+ name: 'get_full_transcript',
406
+ description: 'Read the COMPLETE agent transcript + all sub-agent transcripts. This is the most comprehensive view of everything the agent did — use when targeted tools (read_agent_results, read_agent_text) aren\'t enough and you need the full picture. Large output.',
407
+ input_schema: { type: 'object', properties: {} }
408
+ },
409
+ {
410
+ name: 'get_session_stats',
411
+ description: 'Get session statistics: total messages, tool call counts by name, sub-agent count, data size, time range. Use this to understand how much data is in the session before deciding whether to use deep tools.',
412
+ input_schema: { type: 'object', properties: {} }
413
+ },
414
+ {
415
+ name: 'deep_read_results',
416
+ description: 'Read ALL tool results across the ENTIRE session — not just recent ones. Returns every file read, bash output, web search, web fetch, etc. Use toolFilter to narrow by tool type. Use this for generating detailed analyses, overviews, diagrams, answering specific questions requiring full context, or when the user wants comprehensive details.',
226
417
  input_schema: {
227
418
  type: 'object',
228
419
  properties: {
229
- lastN: { type: 'number', description: 'Number of recent text messages to return (default: 10)' }
420
+ toolFilter: {
421
+ type: 'array',
422
+ items: { type: 'string' },
423
+ description: 'Only return results from these tools. E.g., ["Read"] for file reads, ["WebSearch","WebFetch"] for web data, ["Bash"] for commands, ["Grep","Glob"] for code searches. Omit for all tools.'
424
+ }
230
425
  }
231
426
  }
427
+ },
428
+ {
429
+ name: 'deep_read_text',
430
+ description: 'Read ALL agent reasoning and analysis across the ENTIRE session — not just recent messages. Returns every piece of thinking, synthesis, comparison, and recommendation the agent produced. Use this for generating comprehensive overviews or when the user asks for detailed explanations of what the agent found.',
431
+ input_schema: { type: 'object', properties: {} }
432
+ },
433
+ {
434
+ name: 'send_to_chat',
435
+ description: 'Send formatted content to the user\'s chat panel. Use for URLs, links, lists, prices, code snippets, or anything that\'s better read than spoken. The content appears as a chat message in the frontend. You should STILL speak a brief summary — use this tool for the detailed/visual content.',
436
+ input_schema: {
437
+ type: 'object',
438
+ properties: {
439
+ text: { type: 'string', description: 'The formatted text to display in chat. Supports markdown.' }
440
+ },
441
+ required: ['text']
442
+ }
232
443
  }
233
444
  ];
234
445
  }
@@ -288,23 +499,90 @@ function buildGeminiTools() {
288
499
  },
289
500
  {
290
501
  name: 'read_agent_results',
291
- description: 'Read recent tool results from the research agent JSONL. Returns FULL untruncated tool outputs (file contents, command outputs, web search results).',
502
+ description: 'Read the research agent\'s FULL memory complete untruncated tool outputs including entire file contents the agent read, full bash command outputs, web search results, and web page fetches. This is the agent\'s raw data. Use this FIRST when asked about anything the agent just researched. Default: last 40 results.',
292
503
  parameters: {
293
504
  type: 'object',
294
505
  properties: {
295
- lastN: { type: 'number', description: 'Number of recent results to return (default: 5)' }
506
+ lastN: { type: 'number', description: 'Number of recent results to return (default: 40, max: 60)' }
296
507
  }
297
508
  }
298
509
  },
299
510
  {
300
511
  name: 'read_agent_text',
301
- description: 'Read recent agent reasoning and analysis text from JSONL. Returns the agent\'s thinking and conclusions.',
512
+ description: 'Read the research agent\'s reasoning, analysis, and conclusions from JSONL. Contains the agent\'s step-by-step thinking, synthesis of findings, comparisons, and recommendations. Use this alongside read_agent_results to get the COMPLETE picture of what the agent researched and concluded. Default: last 60 messages.',
513
+ parameters: {
514
+ type: 'object',
515
+ properties: {
516
+ lastN: { type: 'number', description: 'Number of recent text messages to return (default: 60, max: 100)' }
517
+ }
518
+ }
519
+ },
520
+ {
521
+ name: 'read_subagents',
522
+ description: 'Read all sub-agent (parallel Task) transcripts. Contains the detailed work done by sub-agents spawned during research.',
523
+ parameters: { type: 'object', properties: {} }
524
+ },
525
+ {
526
+ name: 'search_jsonl',
527
+ description: 'Search the agent\'s JSONL transcript for a keyword. Returns matching entries across all tool results, agent reasoning, and conversation history.',
528
+ parameters: {
529
+ type: 'object',
530
+ properties: {
531
+ keyword: { type: 'string', description: 'The keyword to search for (case-insensitive)' },
532
+ maxResults: { type: 'number', description: 'Maximum number of results (default: 20)' }
533
+ },
534
+ required: ['keyword']
535
+ }
536
+ },
537
+ {
538
+ name: 'read_conversation',
539
+ description: 'Read the user/assistant conversation exchange history. Shows what the user asked and what the agent responded.',
302
540
  parameters: {
303
541
  type: 'object',
304
542
  properties: {
305
- lastN: { type: 'number', description: 'Number of recent text messages to return (default: 10)' }
543
+ lastN: { type: 'number', description: 'Number of recent exchanges to return (default: 30)' }
306
544
  }
307
545
  }
546
+ },
547
+ {
548
+ name: 'get_full_transcript',
549
+ description: 'Read the COMPLETE agent transcript + all sub-agent transcripts. Most comprehensive view — use when targeted tools aren\'t enough.',
550
+ parameters: { type: 'object', properties: {} }
551
+ },
552
+ {
553
+ name: 'get_session_stats',
554
+ description: 'Get session statistics: total messages, tool call counts by name, sub-agent count, data size, time range. Use to understand how much data is in the session before using deep tools.',
555
+ parameters: { type: 'object', properties: {} }
556
+ },
557
+ {
558
+ name: 'deep_read_results',
559
+ description: 'Read ALL tool results across the ENTIRE session — not just recent ones. Use toolFilter to narrow by tool type. For detailed analyses, overviews, diagrams, specific questions requiring full context.',
560
+ parameters: {
561
+ type: 'object',
562
+ properties: {
563
+ toolFilter: {
564
+ type: 'array',
565
+ items: { type: 'string' },
566
+ description: 'Only return results from these tools. E.g., ["Read"] for file reads, ["WebSearch","WebFetch"] for web data. Omit for all.'
567
+ }
568
+ }
569
+ }
570
+ },
571
+ {
572
+ name: 'deep_read_text',
573
+ description: 'Read ALL agent reasoning across the ENTIRE session. For comprehensive overviews or detailed explanations of what the agent found throughout the session.',
574
+ parameters: { type: 'object', properties: {} }
575
+ },
576
+ {
577
+ name: 'send_to_chat',
578
+ description: 'Send formatted content to the user\'s chat panel. Use for URLs, links, lists, prices, code snippets, or anything better read than spoken. Still speak a brief summary — use this for the detailed/visual content.',
579
+ parameters: {
580
+ type: 'object',
581
+ properties: {
582
+ text: { type: 'string', description: 'The formatted text to display in chat. Supports markdown.' }
583
+ },
584
+ required: ['text']
585
+ }
308
586
  }
309
587
  ]
310
588
  }
@@ -333,18 +611,150 @@ async function geminiWebSearch(query) {
333
611
  }
334
612
  }
335
613
  // ============================================================
336
- // Anthropic Q&A implementation
614
+ // Agent SDK Q&A implementation — replaces direct Anthropic API for Q&A
615
+ // ============================================================
616
+ /**
617
+ * Create an in-process MCP server with the send_to_chat tool for the Agent SDK fast brain.
618
+ */
619
+ function createFastBrainMcpServer(sendToChat) {
620
+ const tools = [];
621
+ if (sendToChat) {
622
+ tools.push(sdkTool('send_to_chat', 'Send formatted content to the user\'s chat panel. Use for URLs, links, lists, prices, code snippets, tables, or anything better read than spoken. Supports markdown. You should STILL speak a brief summary — the chat content is supplementary.', { text: z.string().describe('The formatted text to display in chat. Supports markdown.') }, async ({ text }) => {
623
+ sendToChat(text);
624
+ sendToChatCalledThisTurn = true;
625
+ return { content: [{ type: 'text', text: 'Sent to chat. Now give a brief spoken summary of what you sent.' }] };
626
+ }));
627
+ }
628
+ return createSdkMcpServer({
629
+ name: 'osborn-fast-brain',
630
+ version: '1.0.0',
631
+ tools,
632
+ });
633
+ }
634
+ /**
635
+ * Ask via Claude Agent SDK — the agent traverses JSONL files natively using Read/Grep/Glob.
636
+ * Falls back to Gemini on timeout or error.
637
+ */
638
+ async function askViaAgentSdk(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat, sessionBaseDir) {
639
+ sendToChatCalledThisTurn = false;
640
+ // Build the prompt with conversation context
641
+ let prompt = question;
642
+ if (researchContext) {
643
+ prompt += `\n\n[LIVE RESEARCH CONTEXT — the deep research agent is currently working]\n${researchContext}`;
644
+ }
645
+ if (chatHistory && chatHistory.length > 0) {
646
+ const historyStr = chatHistory.slice(-15).map(t => `${t.role}: ${t.text}`).join('\n');
647
+ prompt = `[Recent voice conversation]\n${historyStr}\n\n[Current question]\n${prompt}`;
648
+ }
649
+ // Create MCP server for send_to_chat
650
+ const mcpServer = createFastBrainMcpServer(sendToChat);
651
+ // Build system prompt with computed paths
652
+ const systemPrompt = buildFastBrainSdkPrompt(workingDir || workspace, sessionId || '', sessionBaseDir || workingDir || workspace);
653
+ // Tools: Read/Write/Edit for files, Grep/Glob for search, WebSearch/WebFetch for web
654
+ const toolNames = ['Read', 'Write', 'Edit', 'Grep', 'Glob', 'WebSearch', 'WebFetch'];
655
+ const mcpToolPatterns = sendToChat ? ['mcp__osborn-fast-brain__*'] : [];
656
+ const options = {
657
+ model: ANTHROPIC_FAST_MODEL,
658
+ cwd: workingDir,
659
+ systemPrompt,
660
+ maxTurns: 8,
661
+ tools: toolNames,
662
+ allowedTools: [...toolNames, ...mcpToolPatterns],
663
+ mcpServers: { 'osborn-fast-brain': mcpServer },
664
+ };
665
+ if (fastBrainSessionId) {
666
+ options.resume = fastBrainSessionId;
667
+ }
668
+ // Run with 15s timeout — falls back to Gemini on timeout
669
+ const TIMEOUT_MS = 15000;
670
+ let timeoutHandle;
671
+ const timeoutPromise = new Promise((_, reject) => {
672
+ timeoutHandle = setTimeout(() => reject(new Error('fast-brain-timeout')), TIMEOUT_MS);
673
+ });
674
+ const queryPromise = (async () => {
675
+ let result = '';
676
+ try {
677
+ for await (const message of sdkQuery({ prompt, options })) {
678
+ if (message.type === 'result') {
679
+ result = message.result || '';
680
+ }
681
+ // Capture session ID eagerly — even if we timeout, next call can resume
682
+ if (message.type === 'assistant' && message.session_id) {
683
+ const sid = message.session_id;
684
+ if (sid !== fastBrainSessionId) {
685
+ fastBrainSessionId = sid;
686
+ console.log(`🧠 Fast brain session: ${sid.substring(0, 12)}... (${options.resume ? 'resumed' : 'new'})`);
687
+ }
688
+ }
689
+ }
690
+ }
691
+ catch (err) {
692
+ console.error('❌ Agent SDK query error:', err);
693
+ throw err;
694
+ }
695
+ clearTimeout(timeoutHandle);
696
+ return result;
697
+ })();
698
+ try {
699
+ const result = await Promise.race([queryPromise, timeoutPromise]);
700
+ if (!result || result.trim().length === 0) {
701
+ if (sendToChatCalledThisTurn)
702
+ return "I've sent the details to your chat panel.";
703
+ return 'No answer found.';
704
+ }
705
+ console.log(`🧠 Agent SDK fast brain: ${result.length} chars (session: ${fastBrainSessionId?.substring(0, 8) || 'new'})`);
706
+ return result;
707
+ }
708
+ catch (err) {
709
+ clearTimeout(timeoutHandle);
710
+ if (err.message === 'fast-brain-timeout') {
711
+ console.log('⏱️ Agent SDK fast brain timed out (15s), falling back to Gemini');
712
+ }
713
+ else {
714
+ console.error('❌ Agent SDK fast brain error:', err.message || err);
715
+ }
716
+ // Fall back to Gemini if available
717
+ if (geminiClient) {
718
+ console.log('🔄 Falling back to Gemini fast brain');
719
+ return askViaGemini(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat);
720
+ }
721
+ // Fall back to direct Anthropic API if no Gemini
722
+ if (anthropicClient) {
723
+ console.log('🔄 Falling back to direct Anthropic API');
724
+ return askViaAnthropic(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat);
725
+ }
726
+ return 'Fast brain unavailable. Try asking me to research it.';
727
+ }
728
+ }
729
+ // ============================================================
730
+ // Direct Anthropic API Q&A — kept as fallback for Agent SDK failures
337
731
  // ============================================================
338
- async function askViaAnthropic(question, workspace, researchContext, sessionId, workingDir) {
732
+ async function askViaAnthropic(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat) {
339
733
  const client = anthropicClient;
340
734
  const tools = buildAnthropicTools();
735
+ sendToChatCalledThisTurn = false;
341
736
  const userContent = researchContext
342
737
  ? `${question}\n\n[LIVE RESEARCH CONTEXT — the research agent is currently working]\n${researchContext}`
343
738
  : question;
344
- const messages = [
345
- { role: 'user', content: userContent }
346
- ];
739
+ // Build messages: persistent fast brain history + live voice history + current question
740
+ const messages = [];
741
+ // 1. Inject persistent fast brain history (prior exchanges from this session)
742
+ for (const exchange of fastBrainHistory) {
743
+ messages.push({ role: 'user', content: exchange.question });
744
+ messages.push({ role: 'assistant', content: exchange.answer });
745
+ }
746
+ // 2. Inject live voice conversation history (from agent.chatCtx — what user/model actually said)
747
+ if (chatHistory && chatHistory.length > 0) {
748
+ for (const turn of chatHistory) {
749
+ messages.push({ role: turn.role, content: turn.text });
750
+ }
751
+ }
752
+ // 3. Current question
753
+ messages.push({ role: 'user', content: userContent });
347
754
  const allTools = [...tools, ANTHROPIC_WEB_SEARCH];
755
+ const noAnswerFallback = () => sendToChatCalledThisTurn
756
+ ? "I've sent the details to your chat panel."
757
+ : 'No answer found.';
348
758
  for (let i = 0; i < 10; i++) {
349
759
  const response = await client.messages.create({
350
760
  model: ANTHROPIC_FAST_MODEL,
@@ -355,63 +765,97 @@ async function askViaAnthropic(question, workspace, researchContext, sessionId,
355
765
  });
356
766
  if (response.stop_reason === 'end_turn') {
357
767
  const textBlock = response.content.find((b) => b.type === 'text');
358
- return textBlock?.text || 'No answer found.';
768
+ const answer = textBlock?.text || noAnswerFallback();
769
+ // Persist this exchange for future calls
770
+ fastBrainHistory.push({ question: userContent, answer });
771
+ if (fastBrainHistory.length > MAX_FAST_BRAIN_HISTORY)
772
+ fastBrainHistory.shift();
773
+ return answer;
359
774
  }
360
775
  const toolUseBlocks = response.content.filter((b) => b.type === 'tool_use');
361
776
  if (toolUseBlocks.length === 0 && response.stop_reason !== 'tool_use') {
362
777
  const textBlock = response.content.find((b) => b.type === 'text');
363
- return textBlock?.text || 'No answer found.';
778
+ const answer = textBlock?.text || noAnswerFallback();
779
+ fastBrainHistory.push({ question: userContent, answer });
780
+ if (fastBrainHistory.length > MAX_FAST_BRAIN_HISTORY)
781
+ fastBrainHistory.shift();
782
+ return answer;
364
783
  }
365
784
  messages.push({ role: 'assistant', content: response.content });
366
785
  if (toolUseBlocks.length > 0) {
367
786
  const toolResults = toolUseBlocks.map(toolUse => ({
368
787
  type: 'tool_result',
369
788
  tool_use_id: toolUse.id,
370
- content: executeTool(toolUse.name, toolUse.input, workspace, sessionId, workingDir),
789
+ content: executeTool(toolUse.name, toolUse.input, workspace, sessionId, workingDir, sendToChat),
371
790
  }));
372
791
  messages.push({ role: 'user', content: toolResults });
373
792
  }
374
793
  }
794
+ if (sendToChatCalledThisTurn) {
795
+ const answer = "I've sent the full details to your chat. Let me know if you want to dive deeper into anything.";
796
+ fastBrainHistory.push({ question: userContent, answer });
797
+ if (fastBrainHistory.length > MAX_FAST_BRAIN_HISTORY)
798
+ fastBrainHistory.shift();
799
+ return answer;
800
+ }
375
801
  return 'Fast brain reached maximum tool iterations. Try ask_agent for a deeper search.';
376
802
  }
377
803
  // ============================================================
378
804
  // Gemini Q&A implementation
379
805
  // ============================================================
380
- async function askViaGemini(question, workspace, researchContext, sessionId, workingDir) {
806
+ async function askViaGemini(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat, sessionBaseDir) {
381
807
  const ai = geminiClient;
382
808
  const tools = buildGeminiTools();
809
+ sendToChatCalledThisTurn = false;
383
810
  const userContent = researchContext
384
811
  ? `${question}\n\n[LIVE RESEARCH CONTEXT — the research agent is currently working]\n${researchContext}`
385
812
  : question;
386
- // Gemini uses a different content format
387
- const contents = [
388
- { role: 'user', parts: [{ text: userContent }] }
389
- ];
390
- for (let i = 0; i < 10; i++) {
391
- const response = await ai.models.generateContent({
813
+ // Create or reuse persistent Gemini Chat session.
814
+ // The Chat object auto-manages full conversation history (messages + tool calls).
815
+ // Cleared on disconnect/reconnect/session switch via clearFastBrainSession().
816
+ if (!geminiChat) {
817
+ // Seed with live voice conversation history so Gemini knows what user/model said
818
+ const history = [];
819
+ if (chatHistory && chatHistory.length > 0) {
820
+ for (const turn of chatHistory) {
821
+ history.push({
822
+ role: turn.role === 'assistant' ? 'model' : 'user',
823
+ parts: [{ text: turn.text }],
824
+ });
825
+ }
826
+ }
827
+ geminiChat = ai.chats.create({
392
828
  model: GEMINI_FAST_MODEL,
393
- contents,
394
829
  config: {
395
830
  systemInstruction: FAST_BRAIN_SYSTEM_PROMPT,
396
831
  tools,
397
- }
832
+ },
833
+ history,
398
834
  });
835
+ console.log(`🧠 Gemini fast brain: new chat session (history: ${history.length} turns)`);
836
+ }
837
+ // Send user message via the persistent chat — history accumulates automatically.
838
+ // The Chat object tracks all messages + tool calls internally.
839
+ let response = await geminiChat.sendMessage({ message: userContent });
840
+ // Tool call loop: execute tools and send results back, up to 10 rounds
841
+ for (let i = 0; i < 10; i++) {
399
842
  const functionCalls = response.functionCalls;
400
843
  if (!functionCalls || functionCalls.length === 0) {
401
- return response.text || 'No answer found.';
402
- }
403
- // Add model response to conversation
404
- if (response.candidates?.[0]?.content) {
405
- contents.push(response.candidates[0].content);
844
+ const text = response.text;
845
+ if (text)
846
+ return text;
847
+ if (sendToChatCalledThisTurn)
848
+ return "I've sent the details to your chat panel.";
849
+ return 'No answer found.';
406
850
  }
407
- // Execute tools and send results back (web_search is async, others are sync)
851
+ // Execute tools
408
852
  const functionResponses = await Promise.all(functionCalls.map(async (call) => {
409
853
  let result;
410
854
  if (call.name === 'web_search') {
411
855
  result = await geminiWebSearch(call.args?.query || question);
412
856
  }
413
857
  else {
414
- result = executeTool(call.name, call.args || {}, workspace, sessionId, workingDir);
858
+ result = executeTool(call.name, call.args || {}, workspace, sessionId, workingDir, sendToChat);
415
859
  }
416
860
  return {
417
861
  functionResponse: {
@@ -420,7 +864,11 @@ async function askViaGemini(question, workspace, researchContext, sessionId, wor
420
864
  }
421
865
  };
422
866
  }));
423
- contents.push({ role: 'user', parts: functionResponses });
867
+ // Send tool results back — chat auto-tracks the full exchange
868
+ response = await geminiChat.sendMessage({ message: functionResponses });
869
+ }
870
+ if (sendToChatCalledThisTurn) {
871
+ return "I've sent the full details to your chat. Let me know if you want to dive deeper into anything.";
424
872
  }
425
873
  return 'Fast brain reached maximum tool iterations. Try ask_agent for a deeper search.';
426
874
  }
@@ -436,19 +884,181 @@ async function askViaGemini(question, workspace, researchContext, sessionId, wor
436
884
  * @param researchContext - Optional snapshot of the live research log.
437
885
  * ~2 second response time for most queries.
438
886
  */
439
- export async function askHaiku(workingDir, sessionId, question, researchContext) {
887
+ export async function askHaiku(workingDir, sessionId, question, researchContext, chatHistory, sendToChat, sessionBaseDir) {
440
888
  initProvider();
441
- if (provider === 'none') {
442
- return 'NEEDS_DEEPER_RESEARCH: Fast brain unavailable (no API key). Try ask_agent instead.';
889
+ // workspace uses sessionBaseDir (Osborn install dir) for spec.md/library
890
+ // workingDir is for JSONL access (matches Claude SDK cwd)
891
+ const wsDir = sessionBaseDir || workingDir;
892
+ const workspace = getSessionWorkspace(wsDir, sessionId);
893
+ // Primary: Gemini Flash (~1-2s) with pre-loaded JSONL context
894
+ // Fallback: Anthropic direct API or Agent SDK (slower but functional)
895
+ if (provider === 'gemini') {
896
+ return askViaGemini(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat, wsDir);
443
897
  }
444
- const workspace = getSessionWorkspace(workingDir, sessionId);
445
- if (provider === 'anthropic') {
446
- return askViaAnthropic(question, workspace, researchContext, sessionId, workingDir);
898
+ else if (provider === 'anthropic' || provider === 'agent-sdk') {
899
+ return askViaAgentSdk(question, workspace, researchContext, sessionId, workingDir, chatHistory, sendToChat, wsDir);
447
900
  }
448
901
  else {
449
- return askViaGemini(question, workspace, researchContext, sessionId, workingDir);
902
+ return 'NEEDS_DEEPER_RESEARCH: Fast brain unavailable (no API key or CLI auth). Try ask_agent instead.';
450
903
  }
451
904
  }
905
+ let researchTaskCounter = 0;
906
+ /**
907
+ * Central orchestrator — ALL user questions from the realtime model come here.
908
+ * Routes to: direct answer, research triggering, decision recording, or document generation.
909
+ * Returns a teleprompter script the voice model reads verbatim.
910
+ */
911
+ export async function askFastBrain(workingDir, sessionId, question, opts) {
912
+ const { chatHistory, researchContext, callbacks } = opts;
913
+ const wsDir = opts.sessionBaseDir || workingDir;
914
+ // Detect document generation requests
915
+ const docMatch = detectDocumentRequest(question);
916
+ if (docMatch) {
917
+ try {
918
+ const result = await generateVisualDocument(workingDir, sessionId, question, docMatch, wsDir);
919
+ if (result) {
920
+ const fullPath = `${wsDir}/.osborn/sessions/${sessionId}/library/${result.fileName}`;
921
+ callbacks.sendToFrontend({
922
+ type: 'research_artifact_updated',
923
+ filePath: fullPath,
924
+ fileName: result.fileName,
925
+ });
926
+ return {
927
+ script: `I've created a ${docMatch} document called ${result.fileName}. You can see it in the files panel.`,
928
+ type: 'answer',
929
+ };
930
+ }
931
+ }
932
+ catch (err) {
933
+ console.error('❌ askFastBrain: document generation failed:', err);
934
+ }
935
+ // Fall through to regular handling if document gen fails
936
+ }
937
+ // Create sendToChat wrapper that sends assistant_response to frontend
938
+ const sendToChat = (text) => {
939
+ callbacks.sendToFrontend({ type: 'assistant_response', text });
940
+ };
941
+ // Core: ask the fast brain LLM
942
+ const answer = await askHaiku(workingDir, sessionId, question, researchContext, chatHistory, sendToChat, wsDir);
943
+ // Parse the response to determine routing
944
+ if (answer.startsWith('RECORDED:') || answer.includes('\nRECORDED:')) {
945
+ // Decision was recorded — extract the confirmation
946
+ const recordedLine = answer.split('\n').find(l => l.startsWith('RECORDED:'));
947
+ const confirmation = recordedLine
948
+ ? recordedLine.replace('RECORDED:', '').trim()
949
+ : 'Got it, noted.';
950
+ // Notify frontend about spec update
951
+ const specPath = `${wsDir}/.osborn/sessions/${sessionId}/spec.md`;
952
+ callbacks.sendToFrontend({
953
+ type: 'research_artifact_updated',
954
+ filePath: specPath,
955
+ fileName: 'spec.md',
956
+ });
957
+ return { script: confirmation, type: 'recorded' };
958
+ }
959
+ // Handle ASK_USER — questions directed at the user (not research tasks)
960
+ if (answer.startsWith('ASK_USER:') || answer.includes('\nASK_USER:')) {
961
+ const askLine = answer.split('\n').find(l => l.includes('ASK_USER:'));
962
+ const userQuestion = askLine
963
+ ? askLine.replace(/^ASK_USER:\s*/, '').trim()
964
+ : answer.replace(/^ASK_USER:\s*/, '').trim();
965
+ return { script: userQuestion, type: 'question' };
966
+ }
967
+ if (answer.includes('NEEDS_DEEPER_RESEARCH')) {
968
+ // Extract the research task context
969
+ const needsLine = answer.split('\n').find(l => l.includes('NEEDS_DEEPER_RESEARCH'));
970
+ const contextLine = answer.split('\n').find(l => l.startsWith('CONTEXT:'));
971
+ const researchTask = needsLine
972
+ ? needsLine.replace(/^(PARTIAL:\s*)?NEEDS_DEEPER_RESEARCH:\s*/, '').trim()
973
+ : question;
974
+ const contextStr = contextLine ? contextLine.replace('CONTEXT:', '').trim() : '';
975
+ // Safety check: if the "research task" looks like a question for the user
976
+ // (ends with ?, asks about preferences/needs, is very short), treat it as ASK_USER instead.
977
+ // This catches the common Gemini bug where clarification questions are formatted as research tasks.
978
+ const taskLower = researchTask.toLowerCase();
979
+ const looksLikeUserQuestion = (researchTask.endsWith('?') && (taskLower.includes('would you') ||
980
+ taskLower.includes('do you') ||
981
+ taskLower.includes('could you') ||
982
+ taskLower.includes('what kind of') ||
983
+ taskLower.includes('which') ||
984
+ taskLower.includes('your needs') ||
985
+ taskLower.includes('your preference') ||
986
+ taskLower.includes('more details') ||
987
+ taskLower.includes('clarif') ||
988
+ taskLower.includes('specify') ||
989
+ taskLower.includes('interested in') ||
990
+ researchTask.length < 80 // Very short "tasks" ending in ? are almost always user questions
991
+ ));
992
+ if (looksLikeUserQuestion) {
993
+ console.log(`🧠 [fast brain] Caught question-as-research-task, redirecting to ASK_USER: "${researchTask.substring(0, 100)}"`);
994
+ return { script: researchTask, type: 'question' };
995
+ }
996
+ const fullTask = contextStr ? `${researchTask}\n\nContext: ${contextStr}` : researchTask;
997
+ // Extract any partial answer (spoken script before NEEDS_DEEPER_RESEARCH)
998
+ const partialMatch = answer.match(/^PARTIAL:\s*([\s\S]*?)(?=\nNEEDS_DEEPER_RESEARCH)/m);
999
+ const partialScript = partialMatch ? partialMatch[1].trim() : '';
1000
+ // Generate a task ID for frontend tracking
1001
+ researchTaskCounter++;
1002
+ const taskId = `research-${researchTaskCounter}-${Date.now()}`;
1003
+ // Trigger research in background
1004
+ callbacks.triggerResearch(fullTask);
1005
+ callbacks.sendToFrontend({
1006
+ type: 'research_task_started',
1007
+ task: researchTask.substring(0, 200),
1008
+ taskId,
1009
+ });
1010
+ // Generate acknowledgment script
1011
+ let script;
1012
+ if (partialScript) {
1013
+ script = `${partialScript} Let me dig deeper on the rest.`;
1014
+ }
1015
+ else {
1016
+ // Generate a contextual ack based on conversation flow
1017
+ script = generateResearchAck(question, chatHistory);
1018
+ }
1019
+ return { script, type: 'research_started' };
1020
+ }
1021
+ // Direct answer — the response IS the teleprompter script
1022
+ return { script: answer, type: 'answer' };
1023
+ }
1024
+ /** Detect if the user's question is an EXPLICIT document generation request.
1025
+ * Must be very specific — don't catch general questions about analysis or comparisons.
1026
+ * Only triggers when the user explicitly asks for a written document/artifact. */
1027
+ function detectDocumentRequest(question) {
1028
+ const q = question.toLowerCase();
1029
+ // Only match explicit document requests — "create a comparison", "make a diagram", "write a summary"
1030
+ // Do NOT match: "compare X and Y", "analyze the code", "give me an overview"
1031
+ const docVerbs = /(create|make|generate|write|build|produce|draw)\s+(a\s+|an\s+|the\s+)?/;
1032
+ if (!docVerbs.test(q))
1033
+ return null;
1034
+ if (q.includes('comparison') || q.includes('comparison table') || q.includes('comparison document'))
1035
+ return 'comparison';
1036
+ if (q.includes('diagram') || q.includes('flow chart') || q.includes('architecture diagram'))
1037
+ return 'diagram';
1038
+ if (q.includes('analysis document') || q.includes('tradeoff document'))
1039
+ return 'analysis';
1040
+ if (q.includes('summary document') || q.includes('overview document'))
1041
+ return 'summary';
1042
+ return null;
1043
+ }
1044
+ /** Generate a natural research acknowledgment based on conversation context */
1045
+ function generateResearchAck(question, chatHistory) {
1046
+ // Use simple heuristics for a natural ack — no LLM call needed
1047
+ const q = question.toLowerCase();
1048
+ if (q.includes('how') && (q.includes('work') || q.includes('implement'))) {
1049
+ return "Let me look into how that works. I'll have the details for you shortly.";
1050
+ }
1051
+ if (q.includes('what') && (q.includes('option') || q.includes('available') || q.includes('choice'))) {
1052
+ return "Let me research the options for you.";
1053
+ }
1054
+ if (q.includes('why') || q.includes('explain')) {
1055
+ return "Good question. Let me dig into that.";
1056
+ }
1057
+ if (q.includes('find') || q.includes('search') || q.includes('look')) {
1058
+ return "On it. Give me a moment to look into that.";
1059
+ }
1060
+ return "Let me research that for you. I'll have findings shortly.";
1061
+ }
452
1062
  // ============================================================
453
1063
  // processResearchChunk — Incremental content processing during research
454
1064
  // ============================================================
@@ -458,7 +1068,7 @@ export async function askHaiku(workingDir, sessionId, question, researchContext)
458
1068
  *
459
1069
  * @param isRefinement - true for the final post-research consolidation pass (higher token budget)
460
1070
  */
461
- export async function processResearchChunk(workingDir, sessionId, task, contentChunks, isRefinement) {
1071
+ export async function processResearchChunk(workingDir, sessionId, task, contentChunks, isRefinement, sessionBaseDir) {
462
1072
  initProvider();
463
1073
  if (provider === 'none')
464
1074
  return null;
@@ -470,8 +1080,9 @@ export async function processResearchChunk(workingDir, sessionId, task, contentC
470
1080
  return null;
471
1081
  }
472
1082
  specUpdateInProgress = true;
1083
+ const wsDir = sessionBaseDir || workingDir;
473
1084
  try {
474
- const workspace = getSessionWorkspace(workingDir, sessionId);
1085
+ const workspace = getSessionWorkspace(wsDir, sessionId);
475
1086
  const specPath = `${workspace}/spec.md`;
476
1087
  if (!existsSync(specPath)) {
477
1088
  console.log('⚠️ processResearchChunk: spec.md not found, skipping');
@@ -483,7 +1094,7 @@ export async function processResearchChunk(workingDir, sessionId, task, contentC
483
1094
  // Mid-research: skip library entirely to stay fast and avoid file proliferation
484
1095
  let existingSection = '';
485
1096
  if (isRefinement) {
486
- const existingFiles = listLibraryFiles(workingDir, sessionId);
1097
+ const existingFiles = listLibraryFiles(wsDir, sessionId);
487
1098
  const existingContents = [];
488
1099
  for (const file of existingFiles) {
489
1100
  const filePath = `${libraryDir}/${file}`;
@@ -516,7 +1127,7 @@ ${chunksText}
516
1127
 
517
1128
  Return ONLY valid JSON — no code fences, no explanation.`;
518
1129
  let responseText = null;
519
- if (provider === 'anthropic') {
1130
+ if (anthropicClient) {
520
1131
  const response = await anthropicClient.messages.create({
521
1132
  model: ANTHROPIC_FAST_MODEL,
522
1133
  max_tokens: isRefinement ? 20000 : 10000,
@@ -525,7 +1136,7 @@ Return ONLY valid JSON — no code fences, no explanation.`;
525
1136
  });
526
1137
  responseText = response.content[0].type === 'text' ? response.content[0].text : null;
527
1138
  }
528
- else {
1139
+ else if (geminiClient) {
529
1140
  const response = await geminiClient.models.generateContent({
530
1141
  model: GEMINI_FAST_MODEL,
531
1142
  contents: userMessage,
@@ -627,6 +1238,68 @@ function parseChunkResponse(responseText) {
627
1238
  return null;
628
1239
  }
629
1240
  // ============================================================
1241
+ // augmentResearchResult — Fast brain adds spec context to agent results (NO summarization)
1242
+ // ============================================================
1243
+ /**
1244
+ * Augment agent SDK research results with context from spec.md.
1245
+ * Passes ALL specific details through verbatim — only ADDS context annotations.
1246
+ * The voice model downstream handles summarization for speech.
1247
+ *
1248
+ * Falls back to returning the original result if the fast brain is unavailable.
1249
+ */
1250
+ export async function augmentResearchResult(workingDir, sessionId, task, agentResult) {
1251
+ initProvider();
1252
+ if (provider === 'none')
1253
+ return agentResult;
1254
+ try {
1255
+ // Read spec for context
1256
+ const specContent = readSessionSpec(workingDir, sessionId);
1257
+ const libraryFiles = listLibraryFiles(workingDir, sessionId);
1258
+ const specSection = specContent
1259
+ ? `\n\nCurrent spec.md:\n${specContent}`
1260
+ : '';
1261
+ const libSection = libraryFiles.length > 0
1262
+ ? `\n\nLibrary files available: ${libraryFiles.join(', ')}`
1263
+ : '';
1264
+ const userMessage = `Research task: "${task}"
1265
+
1266
+ Agent findings:
1267
+ ${agentResult}
1268
+ ${specSection}${libSection}
1269
+
1270
+ Augment the agent's findings with relevant context from the spec. Pass ALL details through verbatim.`;
1271
+ let responseText = null;
1272
+ if (anthropicClient) {
1273
+ const response = await anthropicClient.messages.create({
1274
+ model: ANTHROPIC_FAST_MODEL,
1275
+ max_tokens: 16000,
1276
+ system: AUGMENT_RESULT_SYSTEM,
1277
+ messages: [{ role: 'user', content: userMessage }]
1278
+ });
1279
+ responseText = response.content[0].type === 'text' ? response.content[0].text : null;
1280
+ }
1281
+ else if (geminiClient) {
1282
+ const response = await geminiClient.models.generateContent({
1283
+ model: GEMINI_FAST_MODEL,
1284
+ contents: userMessage,
1285
+ config: { systemInstruction: AUGMENT_RESULT_SYSTEM }
1286
+ });
1287
+ responseText = response.text || null;
1288
+ }
1289
+ if (!responseText || responseText.length < agentResult.length * 0.5) {
1290
+ // If augmented result is suspiciously shorter, the LLM likely summarized — use original
1291
+ console.log('⚠️ augmentResearchResult: augmented result too short, using original');
1292
+ return agentResult;
1293
+ }
1294
+ console.log(`🔄 augmentResearchResult: augmented ${agentResult.length} → ${responseText.length} chars`);
1295
+ return responseText;
1296
+ }
1297
+ catch (err) {
1298
+ console.error('❌ augmentResearchResult failed:', err);
1299
+ return agentResult; // Fallback to original on error
1300
+ }
1301
+ }
1302
+ // ============================================================
630
1303
  // updateSpecFromJSONL — Post-research spec consolidation via JSONL
631
1304
  // ============================================================
632
1305
  /**
@@ -641,7 +1314,7 @@ function parseChunkResponse(responseText) {
641
1314
  *
642
1315
  * Returns { spec, libraryFiles } or null if update failed.
643
1316
  */
644
- export async function updateSpecFromJSONL(workingDir, sessionId, task, researchLog) {
1317
+ export async function updateSpecFromJSONL(workingDir, sessionId, task, researchLog, sessionBaseDir) {
645
1318
  initProvider();
646
1319
  if (provider === 'none')
647
1320
  return null;
@@ -693,10 +1366,602 @@ export async function updateSpecFromJSONL(workingDir, sessionId, task, researchL
693
1366
  const totalChars = contentChunks.reduce((sum, c) => sum + c.length, 0);
694
1367
  console.log(`📖 updateSpecFromJSONL: read ${toolResults.length} tool results, ${agentTexts.length} agent messages, ${subagents.length} sub-agents (${totalChars} total chars)`);
695
1368
  // 3. Pass to processResearchChunk with isRefinement=true
696
- return processResearchChunk(workingDir, sessionId, task, contentChunks, true);
1369
+ return processResearchChunk(workingDir, sessionId, task, contentChunks, true, sessionBaseDir);
697
1370
  }
698
1371
  catch (err) {
699
1372
  console.error('❌ updateSpecFromJSONL failed:', err);
700
1373
  return null;
701
1374
  }
702
1375
  }
1376
+ // ============================================================
1377
+ // Fire-and-forget: Question Writer — writes user question to spec BEFORE agent starts
1378
+ // ============================================================
1379
+ /**
1380
+ * Fire-and-forget: Write a user question to spec.md Open Questions > From User
1381
+ * before the agent starts researching. Ensures every escalated question is tracked.
1382
+ *
1383
+ * Uses a simple LLM call to fuzzy-match existing questions and avoid duplicates.
1384
+ * Skips if spec.md doesn't exist yet or no provider is available.
1385
+ */
1386
+ export async function writeQuestionToSpec(workingDir, sessionId, question) {
1387
+ initProvider();
1388
+ if (provider === 'none')
1389
+ return;
1390
+ try {
1391
+ const workspace = getSessionWorkspace(workingDir, sessionId);
1392
+ const specPath = `${workspace}/spec.md`;
1393
+ if (!existsSync(specPath))
1394
+ return;
1395
+ const currentSpec = readFileSync(specPath, 'utf-8');
1396
+ // Quick check: if the question (or something very similar) is already in the spec, skip
1397
+ const normalizedQ = question.toLowerCase().replace(/[^a-z0-9\s]/g, '').trim();
1398
+ if (normalizedQ.length < 10)
1399
+ return; // Too short to track
1400
+ const systemPrompt = `You manage the "Open Questions" section of a research spec file.
1401
+
1402
+ Given the current spec.md and a new user question, decide:
1403
+ 1. Is this question (or something very similar) already tracked? If yes, output: SKIP
1404
+ 2. If not, output the COMPLETE updated spec.md with the question added under "## Open Questions > ### From User (unanswered)" as a checkbox: - [ ] Question
1405
+
1406
+ Rules:
1407
+ - Add a timestamp: (asked ${new Date().toLocaleTimeString()})
1408
+ - Do NOT modify any other section of the spec
1409
+ - Do NOT mark existing questions as answered
1410
+ - Output ONLY the full spec.md content or the word SKIP — nothing else`;
1411
+ const userMessage = `Current spec.md:\n\`\`\`\n${currentSpec}\n\`\`\`\n\nNew user question to track:\n"${question}"`;
1412
+ let responseText = null;
1413
+ if (anthropicClient) {
1414
+ const response = await anthropicClient.messages.create({
1415
+ model: ANTHROPIC_FAST_MODEL,
1416
+ max_tokens: 8000,
1417
+ system: systemPrompt,
1418
+ messages: [{ role: 'user', content: userMessage }]
1419
+ });
1420
+ responseText = response.content[0].type === 'text' ? response.content[0].text : null;
1421
+ }
1422
+ else if (geminiClient) {
1423
+ const response = await geminiClient.models.generateContent({
1424
+ model: GEMINI_FAST_MODEL,
1425
+ contents: userMessage,
1426
+ config: { systemInstruction: systemPrompt }
1427
+ });
1428
+ responseText = response.text || null;
1429
+ }
1430
+ if (!responseText || responseText.trim() === 'SKIP') {
1431
+ console.log(`📝 writeQuestionToSpec: question already tracked or skipped`);
1432
+ return;
1433
+ }
1434
+ // Strip code fences if present
1435
+ let updatedSpec = responseText.trim();
1436
+ if (updatedSpec.startsWith('```')) {
1437
+ updatedSpec = updatedSpec.replace(/^```(?:markdown)?\n?/, '').replace(/\n?```$/, '');
1438
+ }
1439
+ // Sanity check: updated spec should be at least as long as current spec
1440
+ if (updatedSpec.length >= currentSpec.length * 0.8) {
1441
+ writeFileSync(specPath, updatedSpec, 'utf-8');
1442
+ console.log(`📝 writeQuestionToSpec: added question to spec (${updatedSpec.length} chars)`);
1443
+ }
1444
+ }
1445
+ catch (err) {
1446
+ console.error('❌ writeQuestionToSpec failed:', err);
1447
+ }
1448
+ }
1449
+ // ============================================================
1450
+ // Fire-and-forget: Answer Checker — checks agent output against open questions
1451
+ // ============================================================
1452
+ // Debounce guard: prevent flooding during rapid tool_result sequences
1453
+ let answerCheckTimer = null;
1454
+ let pendingAnswerCheck = null;
1455
+ /**
1456
+ * Fire-and-forget: Check if substantial agent output answers any open questions in spec.md.
1457
+ * Debounced (3s) to prevent flooding during rapid tool_result sequences.
1458
+ *
1459
+ * When a question is answered, marks it with [x] and moves the answer to Findings.
1460
+ */
1461
+ export async function checkOutputAgainstQuestions(workingDir, sessionId, output, outputType) {
1462
+ // Store the latest check request (newer output replaces older)
1463
+ pendingAnswerCheck = { workingDir, sessionId, output, outputType };
1464
+ // Debounce: only fire after 3s of quiet
1465
+ if (answerCheckTimer)
1466
+ return;
1467
+ answerCheckTimer = setTimeout(async () => {
1468
+ answerCheckTimer = null;
1469
+ const check = pendingAnswerCheck;
1470
+ pendingAnswerCheck = null;
1471
+ if (!check)
1472
+ return;
1473
+ await executeAnswerCheck(check.workingDir, check.sessionId, check.output, check.outputType);
1474
+ }, 3000);
1475
+ }
1476
+ async function executeAnswerCheck(workingDir, sessionId, output, outputType) {
1477
+ initProvider();
1478
+ if (provider === 'none')
1479
+ return;
1480
+ try {
1481
+ const workspace = getSessionWorkspace(workingDir, sessionId);
1482
+ const specPath = `${workspace}/spec.md`;
1483
+ if (!existsSync(specPath))
1484
+ return;
1485
+ const currentSpec = readFileSync(specPath, 'utf-8');
1486
+ // Quick check: are there any open questions?
1487
+ if (!currentSpec.includes('- [ ]')) {
1488
+ return; // No open questions to check against
1489
+ }
1490
+ const systemPrompt = `You check if research output answers any open questions in a spec file.
1491
+
1492
+ Given the current spec.md and a piece of agent output (${outputType}), decide:
1493
+ 1. Does this output answer (fully or partially) any "- [ ]" questions in "## Open Questions"?
1494
+ 2. If YES: output the COMPLETE updated spec.md with:
1495
+ - Answered questions marked: - [x] Question → Brief answer summary (from research)
1496
+ - Key findings added to "## Findings & Resources" section
1497
+ 3. If NO questions are answered: output NONE
1498
+
1499
+ Rules:
1500
+ - Only mark a question answered if the output CLEARLY provides the answer
1501
+ - Keep the answer summary brief (1-2 sentences)
1502
+ - Do NOT modify questions that aren't answered by this output
1503
+ - Do NOT remove or rewrite existing Findings
1504
+ - Output ONLY the full spec.md content or the word NONE — nothing else`;
1505
+ // Truncate output to avoid overwhelming the model on very large tool results
1506
+ const truncatedOutput = output.length > 15000 ? output.substring(0, 15000) + '\n[... truncated]' : output;
1507
+ const userMessage = `Current spec.md:\n\`\`\`\n${currentSpec}\n\`\`\`\n\nAgent output (${outputType}):\n\`\`\`\n${truncatedOutput}\n\`\`\``;
1508
+ let responseText = null;
1509
+ if (anthropicClient) {
1510
+ const response = await anthropicClient.messages.create({
1511
+ model: ANTHROPIC_FAST_MODEL,
1512
+ max_tokens: 8000,
1513
+ system: systemPrompt,
1514
+ messages: [{ role: 'user', content: userMessage }]
1515
+ });
1516
+ responseText = response.content[0].type === 'text' ? response.content[0].text : null;
1517
+ }
1518
+ else if (geminiClient) {
1519
+ const response = await geminiClient.models.generateContent({
1520
+ model: GEMINI_FAST_MODEL,
1521
+ contents: userMessage,
1522
+ config: { systemInstruction: systemPrompt }
1523
+ });
1524
+ responseText = response.text || null;
1525
+ }
1526
+ if (!responseText || responseText.trim() === 'NONE') {
1527
+ return;
1528
+ }
1529
+ // Strip code fences if present
1530
+ let updatedSpec = responseText.trim();
1531
+ if (updatedSpec.startsWith('```')) {
1532
+ updatedSpec = updatedSpec.replace(/^```(?:markdown)?\n?/, '').replace(/\n?```$/, '');
1533
+ }
1534
+ // Sanity check
1535
+ if (updatedSpec.length >= currentSpec.length * 0.8) {
1536
+ writeFileSync(specPath, updatedSpec, 'utf-8');
1537
+ console.log(`✅ checkOutputAgainstQuestions: marked question(s) as answered in spec (${updatedSpec.length} chars)`);
1538
+ }
1539
+ }
1540
+ catch (err) {
1541
+ console.error('❌ checkOutputAgainstQuestions failed:', err);
1542
+ }
1543
+ }
1544
+ // ============================================================
1545
+ // contextualizeResearchUpdate — Fast brain generates natural voice updates during research
1546
+ // ============================================================
1547
+ /**
1548
+ * Generate a natural, contextualized voice update from raw research events.
1549
+ * Called by scheduleResearchBatch() instead of injecting raw events directly.
1550
+ *
1551
+ * Returns a natural 1-2 sentence update, or null if nothing interesting to say.
1552
+ * 3-second timeout — returns null if the LLM is too slow.
1553
+ */
1554
+ export async function contextualizeResearchUpdate(workingDir, sessionId, task, batchEvents, researchLog, chatHistory, sessionBaseDir) {
1555
+ initProvider();
1556
+ if (provider === 'none')
1557
+ return null;
1558
+ const wsDir = sessionBaseDir || workingDir;
1559
+ try {
1560
+ const specContent = readSessionSpec(wsDir, sessionId);
1561
+ const specTruncated = specContent ? specContent.substring(0, 1500) : '';
1562
+ // Read last 5 tool results for what was just found
1563
+ const recentResults = getRecentToolResults(sessionId, workingDir, 5);
1564
+ const resultsSummary = recentResults.map(tr => {
1565
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 100);
1566
+ const resultPreview = tr.resultContent.substring(0, 200);
1567
+ return `[${tr.toolName}: ${inputPreview}] ${resultPreview}`;
1568
+ }).join('\n');
1569
+ const userMessage = `Research question: "${task}"
1570
+
1571
+ Recent events: ${batchEvents.slice(-10).join('. ')}
1572
+
1573
+ Research log (${researchLog.length} total steps): ${researchLog.slice(-15).join('. ')}
1574
+
1575
+ Recent findings:
1576
+ ${resultsSummary}
1577
+
1578
+ ${specTruncated ? `Spec context:\n${specTruncated}` : ''}`;
1579
+ const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 3000));
1580
+ let responsePromise;
1581
+ if (anthropicClient) {
1582
+ responsePromise = anthropicClient.messages.create({
1583
+ model: ANTHROPIC_FAST_MODEL,
1584
+ max_tokens: 200,
1585
+ system: CONTEXTUALIZE_UPDATE_SYSTEM,
1586
+ messages: [{ role: 'user', content: userMessage }]
1587
+ }).then(r => r.content[0].type === 'text' ? r.content[0].text : null);
1588
+ }
1589
+ else if (geminiClient) {
1590
+ responsePromise = geminiClient.models.generateContent({
1591
+ model: GEMINI_FAST_MODEL,
1592
+ contents: userMessage,
1593
+ config: { systemInstruction: CONTEXTUALIZE_UPDATE_SYSTEM }
1594
+ }).then(r => r.text || null);
1595
+ }
1596
+ else {
1597
+ return null;
1598
+ }
1599
+ const result = await Promise.race([responsePromise, timeoutPromise]);
1600
+ if (!result || result.trim() === 'NOTHING')
1601
+ return null;
1602
+ return result.trim();
1603
+ }
1604
+ catch (err) {
1605
+ console.error('❌ contextualizeResearchUpdate failed:', err);
1606
+ return null;
1607
+ }
1608
+ }
1609
+ // ============================================================
1610
+ // generateProactivePrompt — Fast brain generates conversation during research silence
1611
+ // ============================================================
1612
+ /**
1613
+ * Generate a proactive conversational prompt to keep the user engaged during research.
1614
+ * Called periodically (every 15s) during active research.
1615
+ *
1616
+ * Can ask open questions, discuss implications of findings, or give progress with depth.
1617
+ * Returns null/NOTHING if nothing interesting to say.
1618
+ * 3-second timeout.
1619
+ */
1620
+ export async function generateProactivePrompt(workingDir, sessionId, task, researchLog, previousPrompts, sessionBaseDir) {
1621
+ initProvider();
1622
+ if (provider === 'none')
1623
+ return null;
1624
+ const wsDir = sessionBaseDir || workingDir;
1625
+ try {
1626
+ const specContent = readSessionSpec(wsDir, sessionId);
1627
+ const specTruncated = specContent ? specContent.substring(0, 2000) : '';
1628
+ // Read recent discoveries from JSONL
1629
+ const recentResults = getRecentToolResults(sessionId, workingDir, 8);
1630
+ const resultsSummary = recentResults.map(tr => {
1631
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 100);
1632
+ const resultPreview = tr.resultContent.substring(0, 300);
1633
+ return `[${tr.toolName}: ${inputPreview}] ${resultPreview}`;
1634
+ }).join('\n');
1635
+ // Read recent agent reasoning
1636
+ const recentText = readSessionHistory(sessionId, workingDir, {
1637
+ lastN: 5,
1638
+ types: ['assistant']
1639
+ });
1640
+ const reasoningSummary = recentText
1641
+ .filter(m => m.text && m.text.length > 20)
1642
+ .map(m => m.text.substring(0, 300))
1643
+ .join('\n');
1644
+ const userMessage = `Research question: "${task}"
1645
+
1646
+ Research progress (${researchLog.length} steps so far): ${researchLog.slice(-10).join('. ')}
1647
+
1648
+ Recent findings:
1649
+ ${resultsSummary}
1650
+
1651
+ Agent reasoning:
1652
+ ${reasoningSummary}
1653
+
1654
+ ${specTruncated ? `Session spec:\n${specTruncated}` : ''}
1655
+
1656
+ Previous things already said (DO NOT repeat):
1657
+ ${previousPrompts.length > 0 ? previousPrompts.join('\n') : '(none yet)'}`;
1658
+ const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 3000));
1659
+ let responsePromise;
1660
+ if (anthropicClient) {
1661
+ responsePromise = anthropicClient.messages.create({
1662
+ model: ANTHROPIC_FAST_MODEL,
1663
+ max_tokens: 200,
1664
+ system: PROACTIVE_PROMPT_SYSTEM,
1665
+ messages: [{ role: 'user', content: userMessage }]
1666
+ }).then(r => r.content[0].type === 'text' ? r.content[0].text : null);
1667
+ }
1668
+ else if (geminiClient) {
1669
+ responsePromise = geminiClient.models.generateContent({
1670
+ model: GEMINI_FAST_MODEL,
1671
+ contents: userMessage,
1672
+ config: { systemInstruction: PROACTIVE_PROMPT_SYSTEM }
1673
+ }).then(r => r.text || null);
1674
+ }
1675
+ else {
1676
+ return null;
1677
+ }
1678
+ const result = await Promise.race([responsePromise, timeoutPromise]);
1679
+ if (!result || result.trim() === 'NOTHING')
1680
+ return null;
1681
+ return result.trim();
1682
+ }
1683
+ catch (err) {
1684
+ console.error('❌ generateProactivePrompt failed:', err);
1685
+ return null;
1686
+ }
1687
+ }
1688
+ // ============================================================
1689
+ // generateVisualDocument — Fast brain generates structured visual documents
1690
+ // ============================================================
1691
+ /**
1692
+ * Generate a structured visual document (comparison table, Mermaid diagram,
1693
+ * analysis, or summary) from research findings.
1694
+ *
1695
+ * Reads spec.md, JSONL results, and library for context.
1696
+ * Writes the result to library/ and returns the filename + content.
1697
+ */
1698
+ export async function generateVisualDocument(workingDir, sessionId, request, documentType, sessionBaseDir) {
1699
+ initProvider();
1700
+ if (provider === 'none')
1701
+ return null;
1702
+ const wsDir = sessionBaseDir || workingDir;
1703
+ try {
1704
+ const workspace = getSessionWorkspace(wsDir, sessionId);
1705
+ const specContent = readSessionSpec(wsDir, sessionId) || '';
1706
+ const libraryFiles = listLibraryFiles(wsDir, sessionId);
1707
+ // Read library contents for context
1708
+ const libraryDir = `${workspace}/library`;
1709
+ const libraryContents = [];
1710
+ for (const file of libraryFiles.slice(0, 5)) {
1711
+ const filePath = `${libraryDir}/${file}`;
1712
+ if (existsSync(filePath)) {
1713
+ try {
1714
+ const content = readFileSync(filePath, 'utf-8');
1715
+ libraryContents.push(`--- ${file} ---\n${content.substring(0, 3000)}`);
1716
+ }
1717
+ catch { /* skip */ }
1718
+ }
1719
+ }
1720
+ // Read recent JSONL results for raw data
1721
+ const toolResults = getRecentToolResults(sessionId, workingDir, 20);
1722
+ const toolResultsSummary = toolResults.map(tr => {
1723
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 150);
1724
+ return `[${tr.toolName}: ${inputPreview}]\n${tr.resultContent.substring(0, 1000)}`;
1725
+ }).join('\n\n---\n\n');
1726
+ const userMessage = `Document request: "${request}"
1727
+ Document type: ${documentType}
1728
+
1729
+ Session spec:
1730
+ ${specContent}
1731
+
1732
+ ${libraryContents.length > 0 ? `Library files:\n${libraryContents.join('\n\n')}` : ''}
1733
+
1734
+ Recent research data:
1735
+ ${toolResultsSummary}
1736
+
1737
+ Return JSON: {"fileName": "descriptive-name.md", "content": "full markdown content"}`;
1738
+ let responseText = null;
1739
+ if (anthropicClient) {
1740
+ const response = await anthropicClient.messages.create({
1741
+ model: ANTHROPIC_FAST_MODEL,
1742
+ max_tokens: 16000,
1743
+ system: VISUAL_DOCUMENT_SYSTEM,
1744
+ messages: [{ role: 'user', content: userMessage }]
1745
+ });
1746
+ responseText = response.content[0].type === 'text' ? response.content[0].text : null;
1747
+ }
1748
+ else if (geminiClient) {
1749
+ const response = await geminiClient.models.generateContent({
1750
+ model: GEMINI_FAST_MODEL,
1751
+ contents: userMessage,
1752
+ config: { systemInstruction: VISUAL_DOCUMENT_SYSTEM }
1753
+ });
1754
+ responseText = response.text || null;
1755
+ }
1756
+ if (!responseText)
1757
+ return null;
1758
+ // Parse JSON response
1759
+ const cleaned = responseText.replace(/^```json?\s*/i, '').replace(/\s*```$/i, '').trim();
1760
+ let parsed;
1761
+ try {
1762
+ parsed = JSON.parse(cleaned);
1763
+ }
1764
+ catch {
1765
+ // Try to extract from malformed response
1766
+ const fnMatch = cleaned.match(/"fileName"\s*:\s*"([^"]+)"/);
1767
+ const ctMatch = cleaned.match(/"content"\s*:\s*"((?:[^"\\]|\\.)*)"/s);
1768
+ if (fnMatch && ctMatch) {
1769
+ try {
1770
+ parsed = { fileName: fnMatch[1], content: JSON.parse(`"${ctMatch[1]}"`) };
1771
+ }
1772
+ catch {
1773
+ console.error('⚠️ generateVisualDocument: failed to parse response');
1774
+ return null;
1775
+ }
1776
+ }
1777
+ else {
1778
+ return null;
1779
+ }
1780
+ }
1781
+ if (!parsed.fileName || !parsed.content)
1782
+ return null;
1783
+ // Write to library
1784
+ const safeName = parsed.fileName.replace(/[^a-zA-Z0-9._-]/g, '-');
1785
+ const libraryPath = `${workspace}/library`;
1786
+ mkdirSync(libraryPath, { recursive: true });
1787
+ const filePath = `${libraryPath}/${safeName}`;
1788
+ writeFileSync(filePath, parsed.content, 'utf-8');
1789
+ console.log(`📊 generateVisualDocument: wrote ${safeName} (${parsed.content.length} chars)`);
1790
+ return { fileName: safeName, content: parsed.content };
1791
+ }
1792
+ catch (err) {
1793
+ console.error('❌ generateVisualDocument failed:', err);
1794
+ return null;
1795
+ }
1796
+ }
1797
+ // ============================================================
1798
+ // processResearchCompletion — Generate teleprompter script from research results
1799
+ // ============================================================
1800
+ /**
1801
+ * Generate a complete teleprompter script from research results.
1802
+ * Replaces augmentResearchResult + extractPriorityContent.
1803
+ * Reads full JSONL and produces a spoken monologue.
1804
+ */
1805
+ export async function processResearchCompletion(workingDir, sessionId, task, agentResult, chatHistory, sendToChat, sessionBaseDir) {
1806
+ initProvider();
1807
+ if (provider === 'none')
1808
+ return agentResult.substring(0, 500);
1809
+ const wsDir = sessionBaseDir || workingDir;
1810
+ try {
1811
+ // Read spec for context
1812
+ const specContent = readSessionSpec(wsDir, sessionId) || '';
1813
+ // Read FULL JSONL data — not truncated. The user waited for this research;
1814
+ // give the completion generator the complete picture.
1815
+ const toolResults = getRecentToolResults(sessionId, workingDir, 30);
1816
+ const toolSummary = toolResults.map(tr => {
1817
+ const inputPreview = JSON.stringify(tr.toolInput).substring(0, 200);
1818
+ return `[${tr.toolName}: ${inputPreview}]\n${tr.resultContent}`;
1819
+ }).join('\n\n---\n\n');
1820
+ // Also read agent reasoning for synthesis context
1821
+ const agentTexts = readSessionHistory(sessionId, workingDir, {
1822
+ lastN: 20,
1823
+ types: ['assistant']
1824
+ }).filter(m => m.text && m.text.length > 30)
1825
+ .map(m => m.text)
1826
+ .join('\n\n');
1827
+ // Read sub-agent findings if any
1828
+ const subagents = getSubagentTranscripts(sessionId, workingDir);
1829
+ const subagentSummary = subagents.length > 0
1830
+ ? subagents.map(sa => {
1831
+ const texts = sa.messages.filter(m => m.text && m.text.length > 30).map(m => m.text);
1832
+ return `[Sub-agent ${sa.taskId}]\n${texts.join('\n')}`;
1833
+ }).join('\n\n')
1834
+ : '';
1835
+ const historyStr = chatHistory
1836
+ ? chatHistory.slice(-10).map(t => `${t.role}: ${t.text}`).join('\n')
1837
+ : '';
1838
+ const userMessage = `Research task: "${task}"
1839
+
1840
+ Agent's headline findings:
1841
+ ${agentResult}
1842
+
1843
+ Full tool outputs (${toolResults.length} results):
1844
+ ${toolSummary}
1845
+
1846
+ ${agentTexts ? `Agent reasoning and analysis:\n${agentTexts.substring(0, 8000)}` : ''}
1847
+
1848
+ ${subagentSummary ? `Sub-agent findings:\n${subagentSummary.substring(0, 4000)}` : ''}
1849
+
1850
+ ${specContent ? `Session spec (for context):\n${specContent.substring(0, 3000)}` : ''}
1851
+
1852
+ ${historyStr ? `Recent conversation (match this vocabulary):\n${historyStr}` : ''}
1853
+
1854
+ Write the spoken monologue now. The user waited for this research — be comprehensive.${sendToChat ? ' If you have structured data (lists, URLs, code, steps), include a CHAT_CONTENT section at the end after a line "---CHAT---" with markdown content to send to the chat panel.' : ''}`;
1855
+ let script = null;
1856
+ if (anthropicClient) {
1857
+ const response = await anthropicClient.messages.create({
1858
+ model: ANTHROPIC_FAST_MODEL,
1859
+ max_tokens: 4000,
1860
+ system: RESEARCH_COMPLETION_SYSTEM,
1861
+ messages: [{ role: 'user', content: userMessage }]
1862
+ });
1863
+ script = response.content[0].type === 'text' ? response.content[0].text : null;
1864
+ }
1865
+ else if (geminiClient) {
1866
+ const response = await geminiClient.models.generateContent({
1867
+ model: GEMINI_FAST_MODEL,
1868
+ contents: userMessage,
1869
+ config: { systemInstruction: RESEARCH_COMPLETION_SYSTEM }
1870
+ });
1871
+ script = response.text || null;
1872
+ }
1873
+ if (!script)
1874
+ return agentResult.substring(0, 500);
1875
+ // Check for chat content section
1876
+ if (sendToChat && script.includes('---CHAT---')) {
1877
+ const parts = script.split('---CHAT---');
1878
+ const spokenPart = parts[0].trim();
1879
+ const chatPart = parts[1]?.trim();
1880
+ if (chatPart) {
1881
+ console.log(`💬 processResearchCompletion: sending ${chatPart.length} chars to chat`);
1882
+ sendToChat(chatPart);
1883
+ }
1884
+ console.log(`🎙️ processResearchCompletion: generated ${spokenPart.length} char script + ${chatPart?.length || 0} char chat content`);
1885
+ return spokenPart;
1886
+ }
1887
+ console.log(`🎙️ processResearchCompletion: generated ${script.length} char script`);
1888
+ return script;
1889
+ }
1890
+ catch (err) {
1891
+ console.error('❌ processResearchCompletion failed:', err);
1892
+ // Fallback: return truncated agent result as-is
1893
+ return agentResult.substring(0, 500);
1894
+ }
1895
+ }
1896
+ // ============================================================
1897
+ // handleResearchBatch — Decide whether research events are worth speaking
1898
+ // ============================================================
1899
+ /**
1900
+ * Process a batch of research events and decide whether to speak.
1901
+ * Replaces contextualizeResearchUpdate — but usually returns null (silent).
1902
+ * Only speaks when something genuinely critical is found.
1903
+ */
1904
+ export async function handleResearchBatch(workingDir, sessionId, task, batchEvents, researchLog, chatHistory, sessionBaseDir) {
1905
+ // Usually: stay silent. The frontend spinner handles visual feedback.
1906
+ // Only speak if the batch contains something genuinely interesting.
1907
+ // Quick heuristic: if fewer than 5 research steps, too early to say anything useful
1908
+ if (researchLog.length < 5)
1909
+ return null;
1910
+ // Check if any event mentions something critical (error, user-impacting finding)
1911
+ const hasCritical = batchEvents.some(e => e.toLowerCase().includes('error') ||
1912
+ e.toLowerCase().includes('warning') ||
1913
+ e.toLowerCase().includes('breaking') ||
1914
+ e.toLowerCase().includes('deprecated'));
1915
+ if (!hasCritical)
1916
+ return null;
1917
+ // Something interesting — generate a brief spoken update via contextualizeResearchUpdate
1918
+ return contextualizeResearchUpdate(workingDir, sessionId, task, batchEvents, researchLog, chatHistory, sessionBaseDir);
1919
+ }
1920
+ // ============================================================
1921
+ // prepareBriefingScript — Session resume/switch spoken briefing
1922
+ // ============================================================
1923
+ /**
1924
+ * Generate a brief spoken script for session resume or switch.
1925
+ * Replaces buildContextBriefing + getSpecForVoiceModel.
1926
+ */
1927
+ export async function prepareBriefingScript(workingDir, sessionId, conversationHistory, type = 'default') {
1928
+ initProvider();
1929
+ // Read spec for context
1930
+ const specContent = readSessionSpec(workingDir, sessionId);
1931
+ if (!specContent && (!conversationHistory || conversationHistory.length === 0)) {
1932
+ return type === 'switch'
1933
+ ? 'Switched sessions. What would you like to work on?'
1934
+ : 'Welcome back. What would you like to work on?';
1935
+ }
1936
+ // Extract goal and last topic from spec
1937
+ const goalMatch = specContent?.match(/## Goal\s*\n([\s\S]*?)(?=\n##|$)/);
1938
+ const goal = goalMatch ? goalMatch[1].trim().substring(0, 200) : '';
1939
+ const prefix = type === 'switch' ? 'Switched over.' : 'Welcome back.';
1940
+ // If we have a goal, generate a brief spoken briefing
1941
+ if (goal) {
1942
+ const lastExchanges = conversationHistory
1943
+ ? conversationHistory.slice(-3).map(e => `${e.role}: ${e.text.substring(0, 100)}`).join('. ')
1944
+ : '';
1945
+ if (lastExchanges) {
1946
+ return `${prefix} We were working on ${goal}. Last time we discussed ${lastExchanges.substring(0, 150)}. Where would you like to pick up?`;
1947
+ }
1948
+ return `${prefix} We were working on ${goal}. Where would you like to pick up?`;
1949
+ }
1950
+ return type === 'switch'
1951
+ ? 'Switched sessions. What would you like to work on?'
1952
+ : 'Session resumed. What would you like to work on?';
1953
+ }
1954
+ // ============================================================
1955
+ // prepareRecoveryScript — Gemini crash recovery spoken script
1956
+ // ============================================================
1957
+ /**
1958
+ * Generate a spoken script after Gemini auto-recovery.
1959
+ * Replaces inline recovery logic in index.ts.
1960
+ */
1961
+ export async function prepareRecoveryScript(conversationHistory) {
1962
+ if (conversationHistory && conversationHistory.length > 0) {
1963
+ const lastTopic = conversationHistory[conversationHistory.length - 1];
1964
+ return `Voice session was briefly interrupted but I'm back. We were talking about ${lastTopic.text.substring(0, 100)}. Where were we?`;
1965
+ }
1966
+ return 'Voice session was briefly interrupted but I\'m back. What were we working on?';
1967
+ }