osborn 0.5.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +90 -0
  5. package/.claude/skills/shadcn/SKILL.md +232 -0
  6. package/.claude/skills/shadcn/image.png +0 -0
  7. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  8. package/.dockerignore +13 -0
  9. package/Dockerfile +103 -0
  10. package/deploy.sh +70 -0
  11. package/dist/claude-auth.d.ts +60 -0
  12. package/dist/claude-auth.js +334 -0
  13. package/dist/claude-llm.d.ts +51 -2
  14. package/dist/claude-llm.js +619 -86
  15. package/dist/config.d.ts +5 -1
  16. package/dist/config.js +4 -1
  17. package/dist/fast-brain.d.ts +70 -16
  18. package/dist/fast-brain.js +662 -99
  19. package/dist/index-3-2-26-legacy.d.ts +1 -0
  20. package/dist/index-3-2-26-legacy.js +2233 -0
  21. package/dist/index.js +979 -429
  22. package/dist/jsonl-search.d.ts +66 -0
  23. package/dist/jsonl-search.js +274 -0
  24. package/dist/leagcyprompts2.d.ts +0 -0
  25. package/dist/leagcyprompts2.js +573 -0
  26. package/dist/pipeline-direct-llm.d.ts +77 -0
  27. package/dist/pipeline-direct-llm.js +221 -0
  28. package/dist/pipeline-fastbrain.d.ts +45 -0
  29. package/dist/pipeline-fastbrain.js +373 -0
  30. package/dist/prompts-2-25-26.d.ts +0 -0
  31. package/dist/prompts-2-25-26.js +518 -0
  32. package/dist/prompts-3-2-26.d.ts +78 -0
  33. package/dist/prompts-3-2-26.js +1319 -0
  34. package/dist/prompts.d.ts +83 -12
  35. package/dist/prompts.js +2064 -587
  36. package/dist/recall-client.d.ts +33 -0
  37. package/dist/recall-client.js +101 -0
  38. package/dist/session-access.d.ts +24 -0
  39. package/dist/session-access.js +74 -0
  40. package/dist/summary-index.d.ts +87 -0
  41. package/dist/summary-index.js +570 -0
  42. package/dist/turn-detector-shim.d.ts +24 -0
  43. package/dist/turn-detector-shim.js +83 -0
  44. package/dist/voice-io.d.ts +15 -5
  45. package/dist/voice-io.js +52 -20
  46. package/fly.toml +30 -0
  47. package/package.json +18 -13
@@ -10,7 +10,9 @@ import { llm, shortuuid, DEFAULT_API_CONNECT_OPTIONS } from '@livekit/agents';
10
10
  import { query } from '@anthropic-ai/claude-agent-sdk';
11
11
  import { EventEmitter } from 'events';
12
12
  import { saveSessionMetadata } from './config.js';
13
- import { getResearchSystemPrompt } from './prompts.js';
13
+ import { getResearchSystemPrompt, getDirectModeResearchPrompt } from './prompts.js';
14
+ import { existsSync, readdirSync, readFileSync } from 'node:fs';
15
+ import { join } from 'node:path';
14
16
  /**
15
17
  * Strip markdown formatting for TTS (text-to-speech)
16
18
  * Removes **bold**, ##headers, ```code```, etc. so TTS doesn't read them literally
@@ -47,40 +49,30 @@ function stripMarkdownForTTS(text) {
47
49
  .trim();
48
50
  }
49
51
  /**
50
- * Summarize text for TTS - create short spoken summaries
51
- * Full output goes to frontend, this condensed version is spoken
52
+ * Load skill files from agent/.claude/skills/{name}/SKILL.md
53
+ * Injects into system prompt so Claude sees them as available capabilities.
54
+ * Skills execute via Bash — no SDK settingSources needed.
52
55
  */
53
- function summarizeForTTS(text, maxLength = 500) {
54
- // First strip markdown
55
- let summary = stripMarkdownForTTS(text);
56
- // Remove file paths (keep just filename)
57
- summary = summary.replace(/\/[\w\-\.\/]+\/([\w\-\.]+)/g, '$1');
58
- // Remove code block placeholders if too many
59
- const codeBlockCount = (summary.match(/\[code block\]/g) || []).length;
60
- if (codeBlockCount > 1) {
61
- summary = summary.replace(/\[code block\]/g, '').replace(/\s+/g, ' ');
62
- summary = summary.trim() + ` I've included ${codeBlockCount} code examples.`;
63
- }
64
- // If still too long, take first sentence(s) up to maxLength
65
- if (summary.length > maxLength) {
66
- // Try to break at sentence boundaries
67
- const sentences = summary.match(/[^.!?]+[.!?]+/g) || [summary];
68
- let result = '';
69
- for (const sentence of sentences) {
70
- if ((result + sentence).length <= maxLength) {
71
- result += sentence;
56
+ function loadSkillsFromDir(agentDir) {
57
+ const skillsDir = join(agentDir, '.claude', 'skills');
58
+ if (!existsSync(skillsDir))
59
+ return '';
60
+ const skills = [];
61
+ try {
62
+ for (const skillName of readdirSync(skillsDir)) {
63
+ const skillFile = join(skillsDir, skillName, 'SKILL.md');
64
+ if (existsSync(skillFile)) {
65
+ skills.push(readFileSync(skillFile, 'utf-8').trim());
72
66
  }
73
- else {
74
- break;
75
- }
76
- }
77
- // If no complete sentence fits, truncate with ellipsis
78
- if (!result) {
79
- result = summary.substring(0, maxLength - 3) + '...';
80
67
  }
81
- summary = result.trim();
82
68
  }
83
- return summary || 'Done.';
69
+ catch (err) {
70
+ console.warn('⚠️ Failed to load skills:', err);
71
+ }
72
+ if (skills.length === 0)
73
+ return '';
74
+ console.log(`📚 Loaded ${skills.length} skill(s) from ${skillsDir}`);
75
+ return `<available-skills>\n${skills.join('\n\n---\n\n')}\n</available-skills>`;
84
76
  }
85
77
  // Research mode tools — full research capabilities
86
78
  const RESEARCH_TOOLS = [
@@ -88,6 +80,49 @@ const RESEARCH_TOOLS = [
88
80
  'Bash', 'WebSearch', 'WebFetch',
89
81
  'LSP', 'Task', 'TodoWrite',
90
82
  ];
83
+ /**
84
+ * Pushable async iterable — allows pushing SDKUserMessages into a query's
85
+ * streaming input. The query subprocess stays alive between pushes (no JSONL replay).
86
+ */
87
+ class MessageChannel {
88
+ #queue = [];
89
+ #waiting = null;
90
+ #done = false;
91
+ push(item) {
92
+ if (this.#done)
93
+ return;
94
+ if (this.#waiting) {
95
+ const resolve = this.#waiting;
96
+ this.#waiting = null;
97
+ resolve({ value: item, done: false });
98
+ }
99
+ else {
100
+ this.#queue.push(item);
101
+ }
102
+ }
103
+ close() {
104
+ this.#done = true;
105
+ if (this.#waiting) {
106
+ const resolve = this.#waiting;
107
+ this.#waiting = null;
108
+ resolve({ value: undefined, done: true });
109
+ }
110
+ }
111
+ get closed() { return this.#done; }
112
+ [Symbol.asyncIterator]() {
113
+ return {
114
+ next: () => {
115
+ if (this.#queue.length > 0) {
116
+ return Promise.resolve({ value: this.#queue.shift(), done: false });
117
+ }
118
+ if (this.#done) {
119
+ return Promise.resolve({ value: undefined, done: true });
120
+ }
121
+ return new Promise(resolve => { this.#waiting = resolve; });
122
+ },
123
+ };
124
+ }
125
+ }
91
126
  /**
92
127
  * Claude LLM - Wraps Claude Agent SDK for LiveKit
93
128
  * Research mode: reads anything, writes only to session workspace
@@ -104,6 +139,15 @@ export class ClaudeLLM extends llm.LLM {
104
139
  #latestCheckpoint = null;
105
140
  // Pending permission request (for voice approval flow)
106
141
  #pendingPermission = null;
142
+ // Persistent session — single query() with AsyncIterable<SDKUserMessage> input.
143
+ // Subprocess spawns once on first chat(), stays alive for all subsequent messages.
144
+ // No JSONL replay after the first cold start.
145
+ #persistentQuery = null;
146
+ #messageChannel = null;
147
+ #backgroundConsumerRunning = false;
148
+ // Active queries — multiple can be running (SDK queues them internally).
149
+ // We keep ALL references so interrupt() can stop whatever is currently executing.
150
+ #activeQueries = new Set();
107
151
  constructor(opts = {}) {
108
152
  super();
109
153
  // Session resume/continue options
@@ -113,15 +157,21 @@ export class ClaudeLLM extends llm.LLM {
113
157
  this.#mcpServers = opts.mcpServers || {};
114
158
  this.#opts = {
115
159
  workingDirectory: opts.workingDirectory || process.cwd(),
160
+ sessionBaseDir: opts.sessionBaseDir || opts.workingDirectory || process.cwd(),
116
161
  permissionMode: opts.permissionMode || 'default',
117
162
  allowedTools: opts.allowedTools || RESEARCH_TOOLS,
118
163
  resumeSessionId: this.#resumeSessionId || undefined,
119
164
  continueSession: this.#continueSession,
120
165
  mcpServers: this.#mcpServers,
166
+ voiceMode: opts.voiceMode || 'realtime',
167
+ skipTTSQueue: opts.skipTTSQueue || false,
121
168
  };
122
169
  this.#eventEmitter = opts.eventEmitter || new EventEmitter();
123
170
  console.log('🟠 ClaudeLLM initialized (Research Mode)');
124
- console.log(` 📁 Working dir: ${this.#opts.workingDirectory}`);
171
+ console.log(` 📁 Working dir (cwd): ${this.#opts.workingDirectory}`);
172
+ if (this.#opts.sessionBaseDir !== this.#opts.workingDirectory) {
173
+ console.log(` 📁 Session base dir: ${this.#opts.sessionBaseDir}`);
174
+ }
125
175
  console.log(` 🔧 Allowed tools: ${this.#opts.allowedTools?.join(', ')}`);
126
176
  const mcpCount = Object.keys(this.#mcpServers).length;
127
177
  if (mcpCount > 0) {
@@ -223,7 +273,7 @@ export class ClaudeLLM extends llm.LLM {
223
273
  return 'claude.agent-sdk';
224
274
  }
225
275
  get model() {
226
- return this.#opts.model || 'claude-sonnet-4-6';
276
+ return this.#opts.model || 'claude-sonnet-4-6'; // Sonnet orchestrator with named sub-agents
227
277
  }
228
278
  get sessionId() {
229
279
  return this.#sessionId;
@@ -245,9 +295,10 @@ export class ClaudeLLM extends llm.LLM {
245
295
  * Clears pending permissions and resets conversation tracking
246
296
  */
247
297
  resetForSessionSwitch() {
298
+ // Kill persistent session — new session needs fresh subprocess
299
+ this.closeSession();
248
300
  // Clear any pending permission request from previous session
249
301
  if (this.#pendingPermission) {
250
- // Deny the pending permission to clean up
251
302
  this.#pendingPermission.resolve({
252
303
  behavior: 'deny',
253
304
  message: 'Session switched - permission request cancelled',
@@ -335,13 +386,277 @@ export class ClaudeLLM extends llm.LLM {
335
386
  hasCheckpoints() {
336
387
  return this.#checkpoints.length > 0;
337
388
  }
338
- chat({ chatCtx, toolCtx, connOptions = DEFAULT_API_CONNECT_OPTIONS, }) {
389
+ // ============================================================
390
+ // AGENT CONTROL — interrupt, abort, rewind (for fast brain)
391
+ // ============================================================
392
+ /**
393
+ * Interrupt the current Claude query gracefully (like pressing Esc).
394
+ * Stops current tool execution but keeps the process alive.
395
+ * Returns true if interrupted, false if no active query.
396
+ */
397
+ async interruptQuery() {
398
+ // Prefer persistent query's interrupt() — graceful Esc that keeps subprocess alive
399
+ if (this.#persistentQuery && typeof this.#persistentQuery.interrupt === 'function') {
400
+ try {
401
+ await this.#persistentQuery.interrupt();
402
+ console.log('🛑 Interrupted persistent session (Esc equivalent — subprocess stays alive)');
403
+ return true;
404
+ }
405
+ catch (err) {
406
+ console.error('⚠️ Persistent interrupt failed:', err?.message);
407
+ }
408
+ }
409
+ // Fallback: interrupt any active one-shot queries (realtime mode research)
410
+ if (this.#activeQueries.size === 0)
411
+ return false;
412
+ const queriesToInterrupt = [...this.#activeQueries];
413
+ let interrupted = false;
414
+ for (const q of queriesToInterrupt) {
415
+ if (typeof q.interrupt === 'function') {
416
+ try {
417
+ await q.interrupt();
418
+ interrupted = true;
419
+ }
420
+ catch (err) {
421
+ console.error('⚠️ Interrupt failed:', err?.message);
422
+ }
423
+ }
424
+ }
425
+ if (interrupted) {
426
+ console.log(`🛑 Interrupted ${queriesToInterrupt.length} active query(s) (Esc equivalent)`);
427
+ }
428
+ return interrupted;
429
+ }
430
+ /**
431
+ * Hard abort all active queries (like Ctrl+C).
432
+ * Kills subprocesses. Next message will spawn new processes.
433
+ */
434
+ abortQuery() {
435
+ // Kill persistent session first (if alive)
436
+ this.closeSession();
437
+ // Also kill any one-shot queries (realtime research)
438
+ for (const q of this.#activeQueries) {
439
+ try {
440
+ q.return?.();
441
+ }
442
+ catch { }
443
+ }
444
+ this.#activeQueries.clear();
445
+ console.log('🛑 All queries aborted (Ctrl+C equivalent)');
446
+ }
447
+ /**
448
+ * Rewind file changes to a specific checkpoint.
449
+ * Uses the most recently added query (most likely to have the rewind capability).
450
+ */
451
+ async rewindToCheckpoint(checkpointId) {
452
+ const id = checkpointId || this.#latestCheckpoint;
453
+ if (!id) {
454
+ console.log('⚠️ No checkpoint available for rewind');
455
+ return false;
456
+ }
457
+ // Prefer persistent query (has the full session context)
458
+ if (this.#persistentQuery && typeof this.#persistentQuery.rewindFiles === 'function') {
459
+ try {
460
+ await this.#persistentQuery.rewindFiles(id);
461
+ console.log(`🔄 Files rewound to checkpoint: ${id.substring(0, 8)}...`);
462
+ return true;
463
+ }
464
+ catch (err) {
465
+ console.error('⚠️ Rewind failed:', err?.message);
466
+ }
467
+ }
468
+ // Fallback: try latest one-shot query
469
+ const queries = [...this.#activeQueries];
470
+ const latest = queries[queries.length - 1];
471
+ if (latest && typeof latest.rewindFiles === 'function') {
472
+ try {
473
+ await latest.rewindFiles(id);
474
+ console.log(`🔄 Files rewound to checkpoint: ${id.substring(0, 8)}...`);
475
+ return true;
476
+ }
477
+ catch (err) {
478
+ console.error('⚠️ Rewind failed:', err?.message);
479
+ }
480
+ }
481
+ return false;
482
+ }
483
+ /**
484
+ * Check if there are active queries that can be interrupted
485
+ */
486
+ hasActiveQuery() {
487
+ return this.#activeQueries.size > 0;
488
+ }
489
+ /** Add an active query (called from ClaudeLLMStream when query starts) */
490
+ setActiveQuery(q) {
491
+ if (q) {
492
+ this.#activeQueries.add(q);
493
+ }
494
+ }
495
+ /** Remove an active query (called from ClaudeLLMStream when query completes) */
496
+ removeActiveQuery(q) {
497
+ this.#activeQueries.delete(q);
498
+ }
499
+ // ============================================================
500
+ // PERSISTENT SESSION — V1 query() with AsyncIterable<SDKUserMessage>
501
+ // Single subprocess per voice session. First chat() does JSONL cold
502
+ // start; subsequent chat() calls push messages to the existing
503
+ // subprocess via the MessageChannel — no JSONL replay.
504
+ // ============================================================
505
+ /** Whether a persistent session is alive and consuming messages */
506
+ hasSession() {
507
+ return this.#persistentQuery !== null && !this.#messageChannel?.closed;
508
+ }
509
+ /**
510
+ * Close the persistent session (kills subprocess).
511
+ * Call on disconnect, session switch, or recovery.
512
+ */
513
+ closeSession() {
514
+ if (this.#messageChannel) {
515
+ this.#messageChannel.close();
516
+ }
517
+ if (this.#persistentQuery) {
518
+ try {
519
+ this.#persistentQuery.close();
520
+ }
521
+ catch { }
522
+ this.#activeQueries.delete(this.#persistentQuery);
523
+ }
524
+ this.#persistentQuery = null;
525
+ this.#messageChannel = null;
526
+ this.#backgroundConsumerRunning = false;
527
+ console.log('🔒 Persistent session closed');
528
+ }
529
+ /**
530
+ * Push a user message into the persistent session.
531
+ * If no session exists yet, creates one (cold start with JSONL replay).
532
+ * If a session exists, instantly delivers the message (no replay).
533
+ *
534
+ * @param userText - The user's message text
535
+ * @param sdkOptions - Full V1 Options (only used on first call to create the query)
536
+ * @param callbacks - Event callbacks for the background consumer
537
+ */
538
+ pushMessage(userText, sdkOptions, callbacks) {
539
+ const userMessage = {
540
+ type: 'user',
541
+ message: { role: 'user', content: [{ type: 'text', text: userText }] },
542
+ parent_tool_use_id: null,
543
+ session_id: this.#sessionId || '',
544
+ };
545
+ if (this.#persistentQuery && this.#messageChannel && !this.#messageChannel.closed) {
546
+ // Fast path — push to existing subprocess (no cold start)
547
+ console.log('⚡ Persistent session: pushing message (no JSONL replay)');
548
+ this.#messageChannel.push(userMessage);
549
+ return;
550
+ }
551
+ // Cold start — create channel, push first message, start query + background consumer
552
+ console.log('🔄 Persistent session: cold start (first message, JSONL replay)');
553
+ this.#messageChannel = new MessageChannel();
554
+ this.#messageChannel.push(userMessage);
555
+ this.#persistentQuery = query({ prompt: this.#messageChannel, options: sdkOptions });
556
+ this.#activeQueries.add(this.#persistentQuery);
557
+ this.#startBackgroundConsumer(callbacks);
558
+ }
559
+ /**
560
+ * Background consumer — runs for the lifetime of the persistent session.
561
+ * Consumes all SDKMessage events from the query and routes them to
562
+ * the event emitter (same events as the old per-query skipTTSQueue path).
563
+ */
564
+ async #startBackgroundConsumer(callbacks) {
565
+ if (this.#backgroundConsumerRunning)
566
+ return;
567
+ this.#backgroundConsumerRunning = true;
568
+ const pq = this.#persistentQuery;
569
+ try {
570
+ for await (const message of pq) {
571
+ const msg = message;
572
+ // Session ID capture
573
+ if (msg.type === 'system' && msg.subtype === 'init') {
574
+ const mcpServers = msg.mcp_servers;
575
+ if (mcpServers && Array.isArray(mcpServers)) {
576
+ for (const s of mcpServers) {
577
+ const status = s.status === 'connected' ? '✅' : '❌';
578
+ console.log(`${status} MCP server ${s.name}: ${s.status}`);
579
+ }
580
+ }
581
+ const newSessionId = msg.session_id;
582
+ if (newSessionId) {
583
+ callbacks.onSessionId(newSessionId);
584
+ const isNew = !this.#sessionId;
585
+ if (isNew)
586
+ console.log(`📋 New session: ${newSessionId}`);
587
+ this.#sessionId = newSessionId;
588
+ if (isNew && this.#opts.workingDirectory) {
589
+ saveSessionMetadata(this.#opts.workingDirectory, {
590
+ sessionId: newSessionId,
591
+ lastUpdated: new Date().toISOString(),
592
+ projectPath: this.#opts.workingDirectory,
593
+ });
594
+ }
595
+ const requestedResumeId = this.#opts.resumeSessionId;
596
+ if (requestedResumeId && newSessionId !== requestedResumeId) {
597
+ console.error(`❌ Session resume FAILED: Expected ${requestedResumeId.substring(0, 8)}..., got ${newSessionId.substring(0, 8)}...`);
598
+ callbacks.eventEmitter.emit('session_resume_failed', { requestedSessionId: requestedResumeId, actualSessionId: newSessionId });
599
+ }
600
+ else if (requestedResumeId && newSessionId === requestedResumeId) {
601
+ console.log(`✅ Session resumed successfully: ${newSessionId.substring(0, 8)}...`);
602
+ }
603
+ }
604
+ }
605
+ // Checkpoint capture
606
+ if (msg.type === 'user' && msg.uuid) {
607
+ callbacks.onCheckpoint(msg.uuid);
608
+ }
609
+ // SDK request ID
610
+ if (msg.requestId) {
611
+ callbacks.eventEmitter.emit('query_request_id', { requestId: msg.requestId });
612
+ }
613
+ // Stream assistant text → tts_say events
614
+ if (msg.type === 'assistant' && msg.message?.content) {
615
+ for (const block of msg.message.content) {
616
+ if (block.type === 'text' && block.text) {
617
+ callbacks.eventEmitter.emit('assistant_text', { text: block.text });
618
+ const ttsChunk = stripMarkdownForTTS(block.text);
619
+ if (ttsChunk.trim()) {
620
+ console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
621
+ callbacks.eventEmitter.emit('tts_say', { text: ttsChunk });
622
+ }
623
+ }
624
+ }
625
+ }
626
+ // Result — marks end of a turn (but we keep consuming for next turn)
627
+ if (msg.type === 'result') {
628
+ if (msg.result) {
629
+ callbacks.eventEmitter.emit('assistant_result', { text: msg.result });
630
+ }
631
+ console.log('✅ Claude turn complete (persistent session stays alive)');
632
+ }
633
+ }
634
+ }
635
+ catch (error) {
636
+ if (error?.message?.includes('aborted') || error?.message?.includes('AbortError')) {
637
+ console.log('🛑 Persistent session query aborted');
638
+ }
639
+ else {
640
+ console.error('❌ Persistent session error:', error);
641
+ callbacks.eventEmitter.emit('tts_say', { text: 'Sorry, I encountered an error.' });
642
+ }
643
+ }
644
+ finally {
645
+ this.#backgroundConsumerRunning = false;
646
+ this.#activeQueries.delete(pq);
647
+ this.#persistentQuery = null;
648
+ this.#messageChannel = null;
649
+ console.log('🔒 Persistent session background consumer exited');
650
+ }
651
+ }
652
+ chat({ chatCtx, toolCtx, connOptions = DEFAULT_API_CONNECT_OPTIONS, abortController, }) {
339
653
  return new ClaudeLLMStream(this, {
340
654
  chatCtx,
341
655
  toolCtx,
342
656
  connOptions,
343
657
  opts: this.#opts,
344
658
  sessionId: this.#sessionId,
659
+ abortController,
345
660
  onSessionId: (id) => {
346
661
  const isFirst = !this.#sessionId;
347
662
  this.#sessionId = id;
@@ -375,17 +690,23 @@ class ClaudeLLMStream extends llm.LLMStream {
375
690
  #eventEmitter;
376
691
  #onPermissionRequest;
377
692
  #onCheckpoint;
378
- constructor(llmInstance, { chatCtx, toolCtx, connOptions, opts, sessionId, onSessionId, eventEmitter, onCheckpoint, onPermissionRequest, }) {
693
+ #abortController;
694
+ #llmRef;
695
+ #approvedWriterToolUseIds = new Set();
696
+ constructor(llmInstance, { chatCtx, toolCtx, connOptions, opts, sessionId, onSessionId, eventEmitter, onCheckpoint, onPermissionRequest, abortController, }) {
379
697
  super(llmInstance, { chatCtx, toolCtx, connOptions });
698
+ this.#llmRef = llmInstance;
380
699
  this.#opts = opts;
381
700
  this.#sessionId = sessionId;
382
701
  this.#onSessionId = onSessionId;
383
702
  this.#eventEmitter = eventEmitter;
384
703
  this.#onCheckpoint = onCheckpoint;
385
704
  this.#onPermissionRequest = onPermissionRequest;
705
+ this.#abortController = abortController;
386
706
  }
387
707
  async run() {
388
708
  const requestId = `claude_${shortuuid()}`;
709
+ let activeQuery = null;
389
710
  try {
390
711
  // Extract user's message from chat context
391
712
  // ChatContext has .items which are ChatItem[] (ChatMessage | FunctionCall | FunctionCallOutput)
@@ -415,46 +736,45 @@ class ClaudeLLMStream extends llm.LLMStream {
415
736
  // Build Claude Agent SDK options
416
737
  const resumeSessionId = this.#opts.resumeSessionId;
417
738
  const continueSession = this.#opts.continueSession;
418
- // Session workspace path for system prompt — only available after SDK assigns a real session ID
739
+ // Session workspace path for system prompt — uses sessionBaseDir (not cwd) so
740
+ // workspace always lives in the Osborn install dir regardless of cwd setting
419
741
  const sessionId = this.#sessionId || this.#opts.resumeSessionId || null;
742
+ const baseDir = this.#opts.sessionBaseDir || this.#opts.workingDirectory;
420
743
  const workspacePath = sessionId
421
- ? (this.#opts.workingDirectory
422
- ? `${this.#opts.workingDirectory}/.osborn/sessions/${sessionId}/`
744
+ ? (baseDir
745
+ ? `${baseDir}/.osborn/sessions/${sessionId}/`
423
746
  : `.osborn/sessions/${sessionId}/`)
424
747
  : null;
425
- // Build allowedTools with MCP wildcard patterns
426
- const mcpKeys = Object.keys(this.#opts.mcpServers || {});
427
- const mcpPatterns = mcpKeys.map(key => `mcp__${key}__*`);
428
- const allowedTools = [
429
- ...(this.#opts.allowedTools || []),
430
- ...mcpPatterns,
431
- ];
748
+ const allowedTools = this.#opts.allowedTools || [];
432
749
  const sdkOptions = {
433
750
  cwd: this.#opts.workingDirectory,
434
751
  permissionMode: this.#opts.permissionMode,
435
752
  allowedTools,
436
- model: this.#opts.model || 'claude-sonnet-4-6',
753
+ model: this.#opts.model || 'claude-sonnet-4-6', // Sonnet orchestrator with named sub-agents (Haiku tested but ignored delegation rules)
437
754
  enableFileCheckpointing: true,
438
755
  extraArgs: { 'replay-user-messages': null },
756
+ ...(this.#abortController && { abortController: this.#abortController }),
439
757
  ...(resumeSessionId && { resume: resumeSessionId }),
440
758
  ...(continueSession && !resumeSessionId && { continue: true }),
441
759
  ...(this.#sessionId && !resumeSessionId && !continueSession && { resume: this.#sessionId }),
442
- ...(mcpKeys.length > 0 && {
443
- mcpServers: this.#opts.mcpServers,
444
- }),
445
- ...(mcpKeys.length > 0 && (() => {
446
- for (const [key, cfg] of Object.entries(this.#opts.mcpServers || {})) {
447
- const cfgType = cfg.type || 'stdio';
448
- console.log(`🔌 SDK query MCP: ${key} [type=${cfgType}]`);
449
- }
450
- return {};
451
- })()),
452
- // Research mode system prompt — always injected
453
- systemPrompt: getResearchSystemPrompt(workspacePath),
760
+ // System prompt direct mode gets speech-optimized prompt, realtime gets structured research prompt
761
+ // Skills from agent/.claude/skills/ are appended if present
762
+ systemPrompt: [
763
+ this.#opts.voiceMode === 'direct'
764
+ ? getDirectModeResearchPrompt(workspacePath)
765
+ : getResearchSystemPrompt(workspacePath),
766
+ loadSkillsFromDir(this.#opts.sessionBaseDir || this.#opts.workingDirectory || process.cwd()),
767
+ ].filter(Boolean).join('\n\n'),
454
768
  canUseTool: async (toolName, input, _options) => {
455
769
  // Auto-approve writes to session workspace (but block spec.md and library/ — fast brain manages those)
456
770
  if (toolName === 'Write' || toolName === 'Edit') {
457
771
  const filePath = String(input?.file_path || '');
772
+ const agentType = input?.agent_type || null;
773
+ const toolUseId = _options?.toolUseID;
774
+ const toolInput = input?.tool_input || {};
775
+ console.log('input,', input, 'input.file_path', filePath, 'agent_type', agentType);
776
+ console.log(`🔍 canUseTool: ${toolName} filePath="${filePath}" keys=${Object.keys(input || {}).join(',')}`);
777
+ console.log(`🔍 canUseTool _options keys=[${Object.keys(_options || {}).join(', ')}] title="${_options?.title || ''}" decisionReason="${_options?.decisionReason || ''}" blockedPath="${_options?.blockedPath || ''}"`);
458
778
  if (filePath.includes('.osborn/sessions/') || filePath.includes('.osborn/research/')) {
459
779
  // Block writes to spec.md and library/ — the fast brain manages these
460
780
  const fileName = filePath.split('/').pop() || '';
@@ -465,6 +785,11 @@ class ClaudeLLMStream extends llm.LLMStream {
465
785
  console.log(`✅ Auto-approved ${toolName} to workspace: ${filePath}`);
466
786
  return { behavior: 'allow', updatedInput: input };
467
787
  }
788
+ // if (toolUseId && this.#approvedWriterToolUseIds.has(toolUseId)) {
789
+ // this.#approvedWriterToolUseIds.delete(toolUseId)
790
+ // console.log(`✅ Writer pre-approved ${toolName}: ${filePath}`)
791
+ // return { behavior: 'allow', updatedInput: input }
792
+ // }
468
793
  }
469
794
  // Auto-approve AskUserQuestion — research agent should freely ask clarifying questions
470
795
  if (toolName === 'AskUserQuestion') {
@@ -485,13 +810,24 @@ class ClaudeLLMStream extends llm.LLMStream {
485
810
  hooks: [async (input) => {
486
811
  const toolName = input?.tool_name || 'unknown';
487
812
  const toolInput = input?.tool_input || {};
488
- // Safety: block Write/Edit outside session workspace
489
- if (toolName === 'Write' || toolName === 'Edit') {
813
+ const agentType = input?.agent_type || null;
814
+ console.log(`🔍 PreToolUse: toolName=${toolName} agent_type=${agentType} agent_id=${input?.agent_id || 'none'} all_keys=[${Object.keys(input || {}).join(', ')}]`);
815
+ // Write/Edit/MultiEdit access control
816
+ if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
817
+ // Writer sub-agent gets full write access everywhere
818
+ console.log('verifying agent_type', agentType);
819
+ // Writer agent: no longer auto-approved — falls through to canUseTool for permission dialog
820
+ if (agentType === 'writer') {
821
+ console.log(`✍️ Writer agent: deferring to canUseTool for permission`);
822
+ this.#eventEmitter.emit('tool_use', { name: toolName, input: toolInput });
823
+ return { hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'ask' } };
824
+ }
825
+ // All other agents (main, researcher, reasoner, etc.): workspace only
490
826
  const filePath = String(toolInput.file_path || '');
491
827
  if (filePath && !filePath.includes('.osborn/sessions/') && !filePath.includes('.osborn/research/')) {
492
- console.log(`🚫 Research mode: blocked write to ${filePath}`);
828
+ console.log(`🚫 Research mode: blocked write to ${filePath} (agent_type: ${agentType ?? 'main'})`);
493
829
  this.#eventEmitter.emit('tool_blocked', { name: toolName, reason: 'Research mode: writes restricted to session workspace' });
494
- return { decision: 'block', reason: 'Research mode: write to .osborn/sessions/ only.' };
830
+ return { hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'deny' }, reason: 'Research mode: write to .osborn/sessions/ only.' };
495
831
  }
496
832
  }
497
833
  console.log(`🔧 Claude: ${toolName}`);
@@ -510,12 +846,160 @@ class ClaudeLLMStream extends llm.LLMStream {
510
846
  return {};
511
847
  }]
512
848
  }]
513
- }
849
+ },
850
+ // Named sub-agents — Haiku overseer delegates to these specialists.
851
+ // Each has a specific role, model, and tool set.
852
+ agents: {
853
+ researcher: {
854
+ description: [
855
+ 'Information gathering agent (Sonnet). Use for: codebase exploration, web research,',
856
+ 'finding patterns, reading multiple files, searching for examples.',
857
+ 'Returns structured findings — does NOT make decisions or edit files.',
858
+ 'Use this for ANY task that needs more than 2 tool calls to gather information.',
859
+ ].join(' '),
860
+ tools: ['Read', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch', 'Task'],
861
+ model: 'sonnet',
862
+ prompt: [
863
+ 'You are Osborn\'s research agent. Your job is information gathering — thorough, structured, factual.',
864
+ '',
865
+ '## Your role',
866
+ 'Gather information the main agent needs to answer the user\'s question or make a decision.',
867
+ 'You are a scout — go find things, read them carefully, and report back.',
868
+ '',
869
+ '## How to work',
870
+ '1. Understand what information is needed and why.',
871
+ '2. Search broadly first (Glob, Grep, WebSearch), then read deeply (Read specific files).',
872
+ '3. For large investigations, use the Task tool to run parallel searches.',
873
+ '4. Cap yourself at 5-8 tool calls unless the task clearly requires more.',
874
+ '',
875
+ '## What to return',
876
+ 'Structured findings with specifics:',
877
+ '- File paths and line numbers where you found relevant code',
878
+ '- Exact values, configs, versions — not paraphrases',
879
+ '- Direct quotes from documentation or web sources',
880
+ '- What you looked for but did NOT find (negative results matter)',
881
+ '',
882
+ '## What NOT to do',
883
+ '- Do NOT make recommendations or decisions — just surface facts',
884
+ '- Do NOT edit or write any files',
885
+ '- Do NOT run destructive commands (no rm, no git push, no npm publish)',
886
+ '- If you need clarification, ask the main agent — it will relay to the user if needed',
887
+ ].join('\n'),
888
+ },
889
+ reasoner: {
890
+ description: [
891
+ 'Deep reasoning agent (Opus). Use for: architecture decisions, complex problem analysis,',
892
+ 'tradeoff evaluation, generating implementation plans, understanding hard problems.',
893
+ 'Slow but thorough — only use for genuinely complex problems that need careful thought.',
894
+ 'Does NOT edit files — returns a clear plan for the writer agent to execute.',
895
+ ].join(' '),
896
+ tools: ['Read', 'Glob', 'Grep', 'WebSearch', 'WebFetch'],
897
+ model: 'opus',
898
+ prompt: [
899
+ 'You are Osborn\'s reasoning agent. Your job is deep analysis, architectural thinking, and decision-making.',
900
+ '',
901
+ '## Your role',
902
+ 'Think hard about complex problems. Consider multiple approaches. Identify risks and edge cases.',
903
+ 'Return a clear, opinionated recommendation with reasoning — not just a list of options.',
904
+ '',
905
+ '## How to work',
906
+ '1. Read and understand the full context before forming an opinion.',
907
+ '2. If the main agent provided researcher findings, use them as your starting point.',
908
+ '3. Consider at least 2-3 alternative approaches before recommending one.',
909
+ '4. Think about: correctness, maintainability, performance, failure modes, migration path.',
910
+ '5. Use Read/Grep to verify assumptions against the actual codebase when relevant.',
911
+ '',
912
+ '## What to return',
913
+ '- RECOMMENDATION: what to do (one clear answer, not "it depends")',
914
+ '- REASONING: why this approach wins over alternatives (2-3 sentences)',
915
+ '- PLAN: step-by-step implementation instructions specific enough for the writer agent',
916
+ '- RISKS: what could go wrong and how to mitigate',
917
+ '- If the problem is genuinely ambiguous, say what additional information would resolve it',
918
+ '',
919
+ '## What NOT to do',
920
+ '- Do NOT edit or write files — return a plan for the writer agent',
921
+ '- Do NOT give wishy-washy "both options are valid" non-answers — commit to a recommendation',
922
+ '- If you need more information, ask the main agent to delegate to the researcher',
923
+ ].join('\n'),
924
+ },
925
+ writer: {
926
+ description: [
927
+ 'Execution agent with file write/edit permissions (Sonnet).',
928
+ 'Handles ALL file operations: code, config, docs, scripts, data files.',
929
+ 'VERIFY-FIRST workflow: checks assumptions before making changes, runs tests after.',
930
+ 'If anything is unclear, asks the main agent for clarification before touching files.',
931
+ ].join(' '),
932
+ tools: ['Read', 'Write', 'Edit', 'MultiEdit', 'Bash', 'Glob', 'Grep', 'NotebookRead', 'NotebookEdit'],
933
+ model: 'sonnet',
934
+ prompt: [
935
+ 'You are Osborn\'s writer agent. You execute file changes with a verify-first approach.',
936
+ '',
937
+ '## Your role',
938
+ 'Handle ALL file operations — code, config, documentation, scripts, data files.',
939
+ 'You are the only agent that writes. The main agent and reasoner produce plans; you execute them.',
940
+ '',
941
+ '## VERIFY-FIRST workflow (mandatory)',
942
+ '',
943
+ '### Step 1: Verify assumptions',
944
+ '1. Read the files you\'re about to modify. Confirm they match what the plan expects.',
945
+ '2. If the plan references specific code patterns, grep to confirm they exist.',
946
+ '3. If applicable, run the current test suite or build to confirm the starting state works.',
947
+ '4. If ANYTHING has drifted from the plan (file moved, code refactored, dependency changed):',
948
+ ' STOP and report back to the main agent. Do NOT improvise.',
949
+ '',
950
+ '### Step 2: Clarify unknowns',
951
+ '1. If the plan is vague or ambiguous — ask the main agent a specific clarifying question.',
952
+ ' Examples: "Which config format — YAML or JSON?", "New file or extend existing auth.ts?"',
953
+ '2. The main agent will answer from context or relay to the user.',
954
+ '3. Do NOT guess. One clear question is better than a wrong assumption.',
955
+ '4. Restate what you will do before doing it: which files, what changes, in what order.',
956
+ '',
957
+ '### Step 3: Execute changes',
958
+ '- Make ONLY the changes described in the plan.',
959
+ '- Do NOT refactor adjacent code, fix unrelated issues, add unrequested comments/docs.',
960
+ '- If you hit an unexpected issue, STOP and report to the main agent.',
961
+ '',
962
+ '### Step 4: Verify results',
963
+ '1. Run tests if available (npm test, pytest, cargo test, etc.).',
964
+ '2. Run the build if applicable (npm run build, tsc --noEmit, etc.).',
965
+ '3. If tests or build fail: attempt to fix the issue you introduced. Re-run.',
966
+ '4. Report: files changed, what changed in each, test results, any failures.',
967
+ ].join('\n'),
968
+ },
969
+ },
514
970
  };
515
971
  // Run Claude Agent SDK query() and stream results
516
972
  let hasOutput = false;
517
973
  let fullResponse = ''; // Collect full response for frontend
518
- for await (const message of query({ prompt: userText, options: sdkOptions })) {
974
+ // DIRECT MODE OPTIMIZATION: When skipTTSQueue is true, we run the Claude query
975
+ // in the background and return from run() immediately. This is critical because:
976
+ //
977
+ // LiveKit's main speech loop (agent_activity.ts) processes one SpeechHandle at a time.
978
+ // The LLM's SpeechHandle blocks the queue until run() returns (which closes the queue
979
+ // → pipeline completes → _markGenerationDone()). If we await the full query() here,
980
+ // the pipeline is blocked for the entire duration of tool execution (10-30s).
981
+ // Meanwhile, session.say() SpeechHandles queue up but can't play.
982
+ //
983
+ // By returning early, the pipeline completes in milliseconds. The say() handles
984
+ // created by tts_say events get processed by the main loop immediately.
985
+ // The query continues in the background — text arrives via tts_say, tools via hooks.
986
+ if (this.#opts.skipTTSQueue) {
987
+ // PERSISTENT SESSION: Push message to existing subprocess (no JSONL replay).
988
+ // First call creates the query (cold start). Subsequent calls are instant.
989
+ // The background consumer in ClaudeLLM handles all message routing (TTS, tools, etc.)
990
+ this.#llmRef.pushMessage(userText, sdkOptions, {
991
+ onSessionId: this.#onSessionId,
992
+ onCheckpoint: this.#onCheckpoint,
993
+ eventEmitter: this.#eventEmitter,
994
+ });
995
+ // Return immediately — queue closes, pipeline completes, say() handles play
996
+ console.log('🚀 Direct mode: Claude query running in background, pipeline released');
997
+ return;
998
+ }
999
+ // Store active query for interrupt/rewind access
1000
+ activeQuery = query({ prompt: userText, options: sdkOptions });
1001
+ this.#llmRef.setActiveQuery(activeQuery);
1002
+ for await (const message of activeQuery) {
519
1003
  // Capture session ID for context continuity
520
1004
  if (message.type === 'system' && message.subtype === 'init') {
521
1005
  // Log MCP server connection status
@@ -565,53 +1049,102 @@ class ClaudeLLMStream extends llm.LLMStream {
565
1049
  const checkpointId = message.uuid;
566
1050
  this.#onCheckpoint(checkpointId);
567
1051
  }
568
- // Stream text chunks
1052
+ // Stream text chunks — send each assistant text block to TTS
569
1053
  if (message.type === 'assistant' && message.message?.content) {
1054
+ // Emit SDK requestId on first assistant message — identifies this query()
1055
+ // in the JSONL for tracking which research task produced which output
1056
+ const sdkRequestId = message.requestId;
1057
+ if (sdkRequestId) {
1058
+ this.#eventEmitter.emit('query_request_id', { requestId: sdkRequestId });
1059
+ }
570
1060
  for (const block of message.message.content) {
571
1061
  if (block.type === 'text' && block.text) {
572
1062
  hasOutput = true;
573
1063
  const rawText = block.text;
574
1064
  // Emit RAW text to frontend (for chat bubbles with full formatting)
575
1065
  this.#eventEmitter.emit('assistant_text', { text: rawText });
576
- // Collect for final TTS summary
577
- fullResponse += rawText + ' ';
1066
+ // Strip markdown for clean speech
1067
+ const ttsChunk = stripMarkdownForTTS(rawText);
1068
+ if (ttsChunk.trim()) {
1069
+ if (this.#opts.skipTTSQueue) {
1070
+ // Direct mode: emit event for session.say() — bypasses LiveKit's
1071
+ // BufferedTokenStream which causes stuck/delayed/out-of-order audio
1072
+ console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
1073
+ this.#eventEmitter.emit('tts_say', { text: ttsChunk });
1074
+ }
1075
+ else {
1076
+ // Realtime mode: use LLM stream queue (framework handles TTS)
1077
+ console.log(`🔊 TTS stream (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
1078
+ this.queue.put({
1079
+ id: requestId,
1080
+ delta: { role: 'assistant', content: ttsChunk },
1081
+ });
1082
+ }
1083
+ }
578
1084
  }
579
1085
  }
580
1086
  }
581
- // Final result
1087
+ // Final result — only speak if no text blocks were streamed already
582
1088
  if (message.type === 'result' && message.result) {
583
1089
  const rawResult = message.result;
584
1090
  // Emit RAW result to frontend
585
1091
  this.#eventEmitter.emit('assistant_result', { text: rawResult });
586
1092
  if (!hasOutput) {
587
- fullResponse = rawResult;
588
1093
  hasOutput = true;
1094
+ const ttsText = stripMarkdownForTTS(rawResult);
1095
+ if (ttsText.trim()) {
1096
+ if (this.#opts.skipTTSQueue) {
1097
+ console.log(`🔊 TTS say result (${ttsText.length} chars): "${ttsText.substring(0, 60)}..."`);
1098
+ this.#eventEmitter.emit('tts_say', { text: ttsText });
1099
+ }
1100
+ else {
1101
+ console.log(`🔊 TTS result (${ttsText.length} chars): "${ttsText.substring(0, 60)}..."`);
1102
+ this.queue.put({
1103
+ id: requestId,
1104
+ delta: { role: 'assistant', content: ttsText },
1105
+ });
1106
+ }
1107
+ }
589
1108
  }
590
1109
  }
591
1110
  }
592
- // Send SUMMARIZED output to TTS (spoken)
593
- if (hasOutput && fullResponse.trim()) {
594
- const ttsText = summarizeForTTS(fullResponse.trim());
595
- console.log(`🔊 TTS (summarized ${fullResponse.length} → ${ttsText.length} chars): "${ttsText.substring(0, 80)}..."`);
596
- this.queue.put({
597
- id: requestId,
598
- delta: { role: 'assistant', content: ttsText },
599
- });
1111
+ // If Claude produced no output at all, say "Done."
1112
+ if (!hasOutput) {
1113
+ if (this.#opts.skipTTSQueue) {
1114
+ this.#eventEmitter.emit('tts_say', { text: 'Done.' });
1115
+ }
1116
+ else {
1117
+ this.queue.put({
1118
+ id: requestId,
1119
+ delta: { role: 'assistant', content: 'Done.' },
1120
+ });
1121
+ }
1122
+ }
1123
+ console.log('✅ Claude response complete');
1124
+ }
1125
+ catch (error) {
1126
+ // AbortError = clean abort (disconnect, new research, recovery) — don't push
1127
+ // garbage text that would flow through the post-research pipeline
1128
+ if (this.#abortController?.signal.aborted) {
1129
+ console.log('🛑 Claude Agent SDK query aborted');
1130
+ if (!this.#opts.skipTTSQueue) {
1131
+ this.queue.put({ id: requestId, delta: { role: 'assistant', content: '' } });
1132
+ }
1133
+ return;
1134
+ }
1135
+ console.error('❌ Claude Agent SDK error:', error);
1136
+ if (this.#opts.skipTTSQueue) {
1137
+ this.#eventEmitter.emit('tts_say', { text: 'Sorry, I encountered an error.' });
600
1138
  }
601
1139
  else {
602
1140
  this.queue.put({
603
1141
  id: requestId,
604
- delta: { role: 'assistant', content: 'Done.' },
1142
+ delta: { role: 'assistant', content: 'Sorry, I encountered an error.' },
605
1143
  });
606
1144
  }
607
- console.log('✅ Claude response complete');
608
1145
  }
609
- catch (error) {
610
- console.error('❌ Claude Agent SDK error:', error);
611
- this.queue.put({
612
- id: requestId,
613
- delta: { role: 'assistant', content: 'Sorry, I encountered an error.' },
614
- });
1146
+ finally {
1147
+ this.#llmRef.removeActiveQuery(activeQuery);
615
1148
  }
616
1149
  }
617
1150
  }