osborn 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +75 -0
  5. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  6. package/dist/claude-llm.d.ts +29 -1
  7. package/dist/claude-llm.js +346 -79
  8. package/dist/config.d.ts +6 -2
  9. package/dist/config.js +6 -1
  10. package/dist/fast-brain.d.ts +124 -12
  11. package/dist/fast-brain.js +1361 -96
  12. package/dist/index-3-2-26-legacy.d.ts +1 -0
  13. package/dist/index-3-2-26-legacy.js +2233 -0
  14. package/dist/index.js +889 -394
  15. package/dist/jsonl-search.d.ts +66 -0
  16. package/dist/jsonl-search.js +274 -0
  17. package/dist/leagcyprompts2.d.ts +0 -0
  18. package/dist/leagcyprompts2.js +573 -0
  19. package/dist/pipeline-direct-llm.d.ts +77 -0
  20. package/dist/pipeline-direct-llm.js +216 -0
  21. package/dist/pipeline-fastbrain.d.ts +45 -0
  22. package/dist/pipeline-fastbrain.js +367 -0
  23. package/dist/prompts-2-25-26.d.ts +0 -0
  24. package/dist/prompts-2-25-26.js +518 -0
  25. package/dist/prompts-3-2-26.d.ts +78 -0
  26. package/dist/prompts-3-2-26.js +1319 -0
  27. package/dist/prompts.d.ts +83 -8
  28. package/dist/prompts.js +1990 -374
  29. package/dist/session-access.d.ts +60 -2
  30. package/dist/session-access.js +172 -2
  31. package/dist/summary-index.d.ts +87 -0
  32. package/dist/summary-index.js +570 -0
  33. package/dist/turn-detector-shim.d.ts +24 -0
  34. package/dist/turn-detector-shim.js +83 -0
  35. package/dist/voice-io.d.ts +9 -3
  36. package/dist/voice-io.js +39 -20
  37. package/package.json +18 -11
@@ -10,7 +10,9 @@ import { llm, shortuuid, DEFAULT_API_CONNECT_OPTIONS } from '@livekit/agents';
10
10
  import { query } from '@anthropic-ai/claude-agent-sdk';
11
11
  import { EventEmitter } from 'events';
12
12
  import { saveSessionMetadata } from './config.js';
13
- import { getResearchSystemPrompt } from './prompts.js';
13
+ import { getResearchSystemPrompt, getDirectModeResearchPrompt } from './prompts.js';
14
+ import { existsSync, readdirSync, readFileSync } from 'node:fs';
15
+ import { join } from 'node:path';
14
16
  /**
15
17
  * Strip markdown formatting for TTS (text-to-speech)
16
18
  * Removes **bold**, ##headers, ```code```, etc. so TTS doesn't read them literally
@@ -47,40 +49,30 @@ function stripMarkdownForTTS(text) {
47
49
  .trim();
48
50
  }
49
51
  /**
50
- * Summarize text for TTS - create short spoken summaries
51
- * Full output goes to frontend, this condensed version is spoken
52
+ * Load skill files from agent/.claude/skills/{name}/SKILL.md
53
+ * Injects into system prompt so Claude sees them as available capabilities.
54
+ * Skills execute via Bash — no SDK settingSources needed.
52
55
  */
53
- function summarizeForTTS(text, maxLength = 500) {
54
- // First strip markdown
55
- let summary = stripMarkdownForTTS(text);
56
- // Remove file paths (keep just filename)
57
- summary = summary.replace(/\/[\w\-\.\/]+\/([\w\-\.]+)/g, '$1');
58
- // Remove code block placeholders if too many
59
- const codeBlockCount = (summary.match(/\[code block\]/g) || []).length;
60
- if (codeBlockCount > 1) {
61
- summary = summary.replace(/\[code block\]/g, '').replace(/\s+/g, ' ');
62
- summary = summary.trim() + ` I've included ${codeBlockCount} code examples.`;
63
- }
64
- // If still too long, take first sentence(s) up to maxLength
65
- if (summary.length > maxLength) {
66
- // Try to break at sentence boundaries
67
- const sentences = summary.match(/[^.!?]+[.!?]+/g) || [summary];
68
- let result = '';
69
- for (const sentence of sentences) {
70
- if ((result + sentence).length <= maxLength) {
71
- result += sentence;
56
+ function loadSkillsFromDir(agentDir) {
57
+ const skillsDir = join(agentDir, '.claude', 'skills');
58
+ if (!existsSync(skillsDir))
59
+ return '';
60
+ const skills = [];
61
+ try {
62
+ for (const skillName of readdirSync(skillsDir)) {
63
+ const skillFile = join(skillsDir, skillName, 'SKILL.md');
64
+ if (existsSync(skillFile)) {
65
+ skills.push(readFileSync(skillFile, 'utf-8').trim());
72
66
  }
73
- else {
74
- break;
75
- }
76
- }
77
- // If no complete sentence fits, truncate with ellipsis
78
- if (!result) {
79
- result = summary.substring(0, maxLength - 3) + '...';
80
67
  }
81
- summary = result.trim();
82
68
  }
83
- return summary || 'Done.';
69
+ catch (err) {
70
+ console.warn('⚠️ Failed to load skills:', err);
71
+ }
72
+ if (skills.length === 0)
73
+ return '';
74
+ console.log(`📚 Loaded ${skills.length} skill(s) from ${skillsDir}`);
75
+ return `<available-skills>\n${skills.join('\n\n---\n\n')}\n</available-skills>`;
84
76
  }
85
77
  // Research mode tools — full research capabilities
86
78
  const RESEARCH_TOOLS = [
@@ -104,6 +96,10 @@ export class ClaudeLLM extends llm.LLM {
104
96
  #latestCheckpoint = null;
105
97
  // Pending permission request (for voice approval flow)
106
98
  #pendingPermission = null;
99
+ // Persistent session: single process, no JSONL replay on follow-up messages
100
+ // Active queries — multiple can be running (SDK queues them internally).
101
+ // We keep ALL references so interrupt() can stop whatever is currently executing.
102
+ #activeQueries = new Set();
107
103
  constructor(opts = {}) {
108
104
  super();
109
105
  // Session resume/continue options
@@ -113,15 +109,21 @@ export class ClaudeLLM extends llm.LLM {
113
109
  this.#mcpServers = opts.mcpServers || {};
114
110
  this.#opts = {
115
111
  workingDirectory: opts.workingDirectory || process.cwd(),
112
+ sessionBaseDir: opts.sessionBaseDir || opts.workingDirectory || process.cwd(),
116
113
  permissionMode: opts.permissionMode || 'default',
117
114
  allowedTools: opts.allowedTools || RESEARCH_TOOLS,
118
115
  resumeSessionId: this.#resumeSessionId || undefined,
119
116
  continueSession: this.#continueSession,
120
117
  mcpServers: this.#mcpServers,
118
+ voiceMode: opts.voiceMode || 'realtime',
119
+ skipTTSQueue: opts.skipTTSQueue || false,
121
120
  };
122
121
  this.#eventEmitter = opts.eventEmitter || new EventEmitter();
123
122
  console.log('🟠 ClaudeLLM initialized (Research Mode)');
124
- console.log(` 📁 Working dir: ${this.#opts.workingDirectory}`);
123
+ console.log(` 📁 Working dir (cwd): ${this.#opts.workingDirectory}`);
124
+ if (this.#opts.sessionBaseDir !== this.#opts.workingDirectory) {
125
+ console.log(` 📁 Session base dir: ${this.#opts.sessionBaseDir}`);
126
+ }
125
127
  console.log(` 🔧 Allowed tools: ${this.#opts.allowedTools?.join(', ')}`);
126
128
  const mcpCount = Object.keys(this.#mcpServers).length;
127
129
  if (mcpCount > 0) {
@@ -335,13 +337,98 @@ export class ClaudeLLM extends llm.LLM {
335
337
  hasCheckpoints() {
336
338
  return this.#checkpoints.length > 0;
337
339
  }
338
- chat({ chatCtx, toolCtx, connOptions = DEFAULT_API_CONNECT_OPTIONS, }) {
340
+ // ============================================================
341
+ // AGENT CONTROL — interrupt, abort, rewind (for fast brain)
342
+ // ============================================================
343
+ /**
344
+ * Interrupt the current Claude query gracefully (like pressing Esc).
345
+ * Stops current tool execution but keeps the process alive.
346
+ * Returns true if interrupted, false if no active query.
347
+ */
348
+ async interruptQuery() {
349
+ if (this.#activeQueries.size === 0)
350
+ return false;
351
+ let interrupted = false;
352
+ // Interrupt ALL active queries — stops the current task + any queued ones
353
+ for (const q of this.#activeQueries) {
354
+ if (typeof q.interrupt === 'function') {
355
+ try {
356
+ await q.interrupt();
357
+ interrupted = true;
358
+ }
359
+ catch (err) {
360
+ console.error('⚠️ Interrupt failed:', err?.message);
361
+ }
362
+ }
363
+ }
364
+ if (interrupted) {
365
+ console.log(`🛑 Interrupted ${this.#activeQueries.size} active query(s) (Esc equivalent)`);
366
+ }
367
+ return interrupted;
368
+ }
369
+ /**
370
+ * Hard abort all active queries (like Ctrl+C).
371
+ * Kills subprocesses. Next message will spawn new processes.
372
+ */
373
+ abortQuery() {
374
+ for (const q of this.#activeQueries) {
375
+ try {
376
+ q.return?.();
377
+ }
378
+ catch { }
379
+ }
380
+ this.#activeQueries.clear();
381
+ console.log('🛑 All queries aborted (Ctrl+C equivalent)');
382
+ }
383
+ /**
384
+ * Rewind file changes to a specific checkpoint.
385
+ * Uses the most recently added query (most likely to have the rewind capability).
386
+ */
387
+ async rewindToCheckpoint(checkpointId) {
388
+ const id = checkpointId || this.#latestCheckpoint;
389
+ if (!id) {
390
+ console.log('⚠️ No checkpoint available for rewind');
391
+ return false;
392
+ }
393
+ // Try rewind on the latest query
394
+ const queries = [...this.#activeQueries];
395
+ const latest = queries[queries.length - 1];
396
+ if (latest && typeof latest.rewindFiles === 'function') {
397
+ try {
398
+ await latest.rewindFiles(id);
399
+ console.log(`🔄 Files rewound to checkpoint: ${id.substring(0, 8)}...`);
400
+ return true;
401
+ }
402
+ catch (err) {
403
+ console.error('⚠️ Rewind failed:', err?.message);
404
+ }
405
+ }
406
+ return false;
407
+ }
408
+ /**
409
+ * Check if there are active queries that can be interrupted
410
+ */
411
+ hasActiveQuery() {
412
+ return this.#activeQueries.size > 0;
413
+ }
414
+ /** Add an active query (called from ClaudeLLMStream when query starts) */
415
+ setActiveQuery(q) {
416
+ if (q) {
417
+ this.#activeQueries.add(q);
418
+ }
419
+ }
420
+ /** Remove an active query (called from ClaudeLLMStream when query completes) */
421
+ removeActiveQuery(q) {
422
+ this.#activeQueries.delete(q);
423
+ }
424
+ chat({ chatCtx, toolCtx, connOptions = DEFAULT_API_CONNECT_OPTIONS, abortController, }) {
339
425
  return new ClaudeLLMStream(this, {
340
426
  chatCtx,
341
427
  toolCtx,
342
428
  connOptions,
343
429
  opts: this.#opts,
344
430
  sessionId: this.#sessionId,
431
+ abortController,
345
432
  onSessionId: (id) => {
346
433
  const isFirst = !this.#sessionId;
347
434
  this.#sessionId = id;
@@ -375,17 +462,22 @@ class ClaudeLLMStream extends llm.LLMStream {
375
462
  #eventEmitter;
376
463
  #onPermissionRequest;
377
464
  #onCheckpoint;
378
- constructor(llmInstance, { chatCtx, toolCtx, connOptions, opts, sessionId, onSessionId, eventEmitter, onCheckpoint, onPermissionRequest, }) {
465
+ #abortController;
466
+ #llmRef;
467
+ constructor(llmInstance, { chatCtx, toolCtx, connOptions, opts, sessionId, onSessionId, eventEmitter, onCheckpoint, onPermissionRequest, abortController, }) {
379
468
  super(llmInstance, { chatCtx, toolCtx, connOptions });
469
+ this.#llmRef = llmInstance;
380
470
  this.#opts = opts;
381
471
  this.#sessionId = sessionId;
382
472
  this.#onSessionId = onSessionId;
383
473
  this.#eventEmitter = eventEmitter;
384
474
  this.#onCheckpoint = onCheckpoint;
385
475
  this.#onPermissionRequest = onPermissionRequest;
476
+ this.#abortController = abortController;
386
477
  }
387
478
  async run() {
388
479
  const requestId = `claude_${shortuuid()}`;
480
+ let activeQuery = null;
389
481
  try {
390
482
  // Extract user's message from chat context
391
483
  // ChatContext has .items which are ChatItem[] (ChatMessage | FunctionCall | FunctionCallOutput)
@@ -415,20 +507,16 @@ class ClaudeLLMStream extends llm.LLMStream {
415
507
  // Build Claude Agent SDK options
416
508
  const resumeSessionId = this.#opts.resumeSessionId;
417
509
  const continueSession = this.#opts.continueSession;
418
- // Session workspace path for system prompt — only available after SDK assigns a real session ID
510
+ // Session workspace path for system prompt — uses sessionBaseDir (not cwd) so
511
+ // workspace always lives in the Osborn install dir regardless of cwd setting
419
512
  const sessionId = this.#sessionId || this.#opts.resumeSessionId || null;
513
+ const baseDir = this.#opts.sessionBaseDir || this.#opts.workingDirectory;
420
514
  const workspacePath = sessionId
421
- ? (this.#opts.workingDirectory
422
- ? `${this.#opts.workingDirectory}/.osborn/sessions/${sessionId}/`
515
+ ? (baseDir
516
+ ? `${baseDir}/.osborn/sessions/${sessionId}/`
423
517
  : `.osborn/sessions/${sessionId}/`)
424
518
  : null;
425
- // Build allowedTools with MCP wildcard patterns
426
- const mcpKeys = Object.keys(this.#opts.mcpServers || {});
427
- const mcpPatterns = mcpKeys.map(key => `mcp__${key}__*`);
428
- const allowedTools = [
429
- ...(this.#opts.allowedTools || []),
430
- ...mcpPatterns,
431
- ];
519
+ const allowedTools = this.#opts.allowedTools || [];
432
520
  const sdkOptions = {
433
521
  cwd: this.#opts.workingDirectory,
434
522
  permissionMode: this.#opts.permissionMode,
@@ -436,30 +524,38 @@ class ClaudeLLMStream extends llm.LLMStream {
436
524
  model: this.#opts.model || 'claude-sonnet-4-6',
437
525
  enableFileCheckpointing: true,
438
526
  extraArgs: { 'replay-user-messages': null },
527
+ ...(this.#abortController && { abortController: this.#abortController }),
439
528
  ...(resumeSessionId && { resume: resumeSessionId }),
440
529
  ...(continueSession && !resumeSessionId && { continue: true }),
441
530
  ...(this.#sessionId && !resumeSessionId && !continueSession && { resume: this.#sessionId }),
442
- ...(mcpKeys.length > 0 && {
443
- mcpServers: this.#opts.mcpServers,
444
- }),
445
- ...(mcpKeys.length > 0 && (() => {
446
- for (const [key, cfg] of Object.entries(this.#opts.mcpServers || {})) {
447
- const cfgType = cfg.type || 'stdio';
448
- console.log(`🔌 SDK query MCP: ${key} [type=${cfgType}]`);
449
- }
450
- return {};
451
- })()),
452
- // Research mode system prompt — always injected
453
- systemPrompt: getResearchSystemPrompt(workspacePath),
531
+ // System prompt direct mode gets speech-optimized prompt, realtime gets structured research prompt
532
+ // Skills from agent/.claude/skills/ are appended if present
533
+ systemPrompt: [
534
+ this.#opts.voiceMode === 'direct'
535
+ ? getDirectModeResearchPrompt(workspacePath)
536
+ : getResearchSystemPrompt(workspacePath),
537
+ loadSkillsFromDir(this.#opts.sessionBaseDir || this.#opts.workingDirectory || process.cwd()),
538
+ ].filter(Boolean).join('\n\n'),
454
539
  canUseTool: async (toolName, input, _options) => {
455
- // Auto-approve writes to session workspace
540
+ // Auto-approve writes to session workspace (but block spec.md and library/ — fast brain manages those)
456
541
  if (toolName === 'Write' || toolName === 'Edit') {
457
542
  const filePath = String(input?.file_path || '');
458
543
  if (filePath.includes('.osborn/sessions/') || filePath.includes('.osborn/research/')) {
544
+ // Block writes to spec.md and library/ — the fast brain manages these
545
+ const fileName = filePath.split('/').pop() || '';
546
+ if (fileName === 'spec.md' || filePath.includes('/library/')) {
547
+ console.log(`🚫 Blocked research agent write to managed file: ${filePath} (fast brain handles spec.md and library/)`);
548
+ return { behavior: 'deny', message: 'spec.md and library/ are managed by the fast brain sub-agent. Do NOT write to them. Return your findings in your response text — the fast brain will organize them into spec.md and library/ automatically.' };
549
+ }
459
550
  console.log(`✅ Auto-approved ${toolName} to workspace: ${filePath}`);
460
551
  return { behavior: 'allow', updatedInput: input };
461
552
  }
462
553
  }
554
+ // Auto-approve AskUserQuestion — research agent should freely ask clarifying questions
555
+ if (toolName === 'AskUserQuestion') {
556
+ console.log(`✅ Auto-approved ${toolName}`);
557
+ return { behavior: 'allow', updatedInput: input };
558
+ }
463
559
  // Auto-deny tools the research agent should never use
464
560
  if (toolName === 'EnterPlanMode' || toolName === 'ExitPlanMode') {
465
561
  console.log(`🚫 Auto-denied ${toolName} (not used in research mode)`);
@@ -504,7 +600,129 @@ class ClaudeLLMStream extends llm.LLMStream {
504
600
  // Run Claude Agent SDK query() and stream results
505
601
  let hasOutput = false;
506
602
  let fullResponse = ''; // Collect full response for frontend
507
- for await (const message of query({ prompt: userText, options: sdkOptions })) {
603
+ // DIRECT MODE OPTIMIZATION: When skipTTSQueue is true, we run the Claude query
604
+ // in the background and return from run() immediately. This is critical because:
605
+ //
606
+ // LiveKit's main speech loop (agent_activity.ts) processes one SpeechHandle at a time.
607
+ // The LLM's SpeechHandle blocks the queue until run() returns (which closes the queue
608
+ // → pipeline completes → _markGenerationDone()). If we await the full query() here,
609
+ // the pipeline is blocked for the entire duration of tool execution (10-30s).
610
+ // Meanwhile, session.say() SpeechHandles queue up but can't play.
611
+ //
612
+ // By returning early, the pipeline completes in milliseconds. The say() handles
613
+ // created by tts_say events get processed by the main loop immediately.
614
+ // The query continues in the background — text arrives via tts_say, tools via hooks.
615
+ if (this.#opts.skipTTSQueue) {
616
+ const bgAbortController = this.#abortController;
617
+ const bgEventEmitter = this.#eventEmitter;
618
+ const bgOpts = this.#opts;
619
+ const bgOnSessionId = this.#onSessionId;
620
+ const bgOnCheckpoint = this.#onCheckpoint;
621
+ const self = this;
622
+ (async () => {
623
+ // Declare outside try so finally can access it
624
+ const activeQuery = query({ prompt: userText, options: sdkOptions });
625
+ self.#llmRef.setActiveQuery(activeQuery);
626
+ try {
627
+ for await (const message of activeQuery) {
628
+ // Abort check
629
+ if (bgAbortController?.signal.aborted)
630
+ break;
631
+ // Session ID capture (same as synchronous path)
632
+ if (message.type === 'system' && message.subtype === 'init') {
633
+ const mcpServers = message.mcp_servers;
634
+ if (mcpServers && Array.isArray(mcpServers)) {
635
+ for (const s of mcpServers) {
636
+ const status = s.status === 'connected' ? '✅' : '❌';
637
+ console.log(`${status} MCP server ${s.name}: ${s.status}`);
638
+ if (s.status !== 'connected') {
639
+ console.log(` 🔍 MCP error:`, JSON.stringify(s));
640
+ }
641
+ }
642
+ }
643
+ const newSessionId = message.session_id;
644
+ if (newSessionId) {
645
+ bgOnSessionId(newSessionId);
646
+ const isNewSession = !self.#sessionId;
647
+ if (isNewSession)
648
+ console.log(`📋 New session: ${newSessionId}`);
649
+ self.#sessionId = newSessionId;
650
+ if (isNewSession && bgOpts.workingDirectory) {
651
+ saveSessionMetadata(bgOpts.workingDirectory, {
652
+ sessionId: newSessionId,
653
+ lastUpdated: new Date().toISOString(),
654
+ projectPath: bgOpts.workingDirectory,
655
+ });
656
+ }
657
+ const requestedResumeId = bgOpts.resumeSessionId;
658
+ if (requestedResumeId && newSessionId !== requestedResumeId) {
659
+ console.error(`❌ Session resume FAILED: Expected ${requestedResumeId.substring(0, 8)}..., got ${newSessionId.substring(0, 8)}...`);
660
+ bgEventEmitter.emit('session_resume_failed', { requestedSessionId: requestedResumeId, actualSessionId: newSessionId });
661
+ }
662
+ else if (requestedResumeId && newSessionId === requestedResumeId) {
663
+ console.log(`✅ Session resumed successfully: ${newSessionId.substring(0, 8)}...`);
664
+ }
665
+ }
666
+ }
667
+ // Checkpoint capture
668
+ if (message.type === 'user' && message.uuid) {
669
+ bgOnCheckpoint(message.uuid);
670
+ }
671
+ // Stream text → tts_say events (the whole point of background mode)
672
+ if (message.type === 'assistant' && message.message?.content) {
673
+ const sdkRequestId = message.requestId;
674
+ if (sdkRequestId)
675
+ bgEventEmitter.emit('query_request_id', { requestId: sdkRequestId });
676
+ for (const block of message.message.content) {
677
+ if (block.type === 'text' && block.text) {
678
+ hasOutput = true;
679
+ bgEventEmitter.emit('assistant_text', { text: block.text });
680
+ const ttsChunk = stripMarkdownForTTS(block.text);
681
+ if (ttsChunk.trim()) {
682
+ console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
683
+ bgEventEmitter.emit('tts_say', { text: ttsChunk });
684
+ }
685
+ }
686
+ }
687
+ }
688
+ // Final result
689
+ if (message.type === 'result' && message.result) {
690
+ bgEventEmitter.emit('assistant_result', { text: message.result });
691
+ if (!hasOutput) {
692
+ hasOutput = true;
693
+ const ttsText = stripMarkdownForTTS(message.result);
694
+ if (ttsText.trim()) {
695
+ console.log(`🔊 TTS say result (${ttsText.length} chars): "${ttsText.substring(0, 60)}..."`);
696
+ bgEventEmitter.emit('tts_say', { text: ttsText });
697
+ }
698
+ }
699
+ }
700
+ }
701
+ if (!hasOutput) {
702
+ bgEventEmitter.emit('tts_say', { text: 'Done.' });
703
+ }
704
+ console.log('✅ Claude response complete (background)');
705
+ }
706
+ catch (error) {
707
+ if (bgAbortController?.signal.aborted) {
708
+ console.log('🛑 Claude Agent SDK query aborted (background)');
709
+ return;
710
+ }
711
+ console.error('❌ Claude Agent SDK error (background):', error);
712
+ bgEventEmitter.emit('tts_say', { text: 'Sorry, I encountered an error.' });
713
+ }
714
+ finally {
715
+ self.#llmRef.removeActiveQuery(activeQuery);
716
+ }
717
+ })();
718
+ // Return immediately — queue closes, pipeline completes, say() handles play
719
+ console.log('🚀 Direct mode: Claude query running in background, pipeline released');
720
+ return;
721
+ }
722
+ // Store active query for interrupt/rewind access
723
+ activeQuery = query({ prompt: userText, options: sdkOptions });
724
+ this.#llmRef.setActiveQuery(activeQuery);
725
+ for await (const message of activeQuery) {
508
726
  // Capture session ID for context continuity
509
727
  if (message.type === 'system' && message.subtype === 'init') {
510
728
  // Log MCP server connection status
@@ -554,53 +772,102 @@ class ClaudeLLMStream extends llm.LLMStream {
554
772
  const checkpointId = message.uuid;
555
773
  this.#onCheckpoint(checkpointId);
556
774
  }
557
- // Stream text chunks
775
+ // Stream text chunks — send each assistant text block to TTS
558
776
  if (message.type === 'assistant' && message.message?.content) {
777
+ // Emit SDK requestId on first assistant message — identifies this query()
778
+ // in the JSONL for tracking which research task produced which output
779
+ const sdkRequestId = message.requestId;
780
+ if (sdkRequestId) {
781
+ this.#eventEmitter.emit('query_request_id', { requestId: sdkRequestId });
782
+ }
559
783
  for (const block of message.message.content) {
560
784
  if (block.type === 'text' && block.text) {
561
785
  hasOutput = true;
562
786
  const rawText = block.text;
563
787
  // Emit RAW text to frontend (for chat bubbles with full formatting)
564
788
  this.#eventEmitter.emit('assistant_text', { text: rawText });
565
- // Collect for final TTS summary
566
- fullResponse += rawText + ' ';
789
+ // Strip markdown for clean speech
790
+ const ttsChunk = stripMarkdownForTTS(rawText);
791
+ if (ttsChunk.trim()) {
792
+ if (this.#opts.skipTTSQueue) {
793
+ // Direct mode: emit event for session.say() — bypasses LiveKit's
794
+ // BufferedTokenStream which causes stuck/delayed/out-of-order audio
795
+ console.log(`🔊 TTS say (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
796
+ this.#eventEmitter.emit('tts_say', { text: ttsChunk });
797
+ }
798
+ else {
799
+ // Realtime mode: use LLM stream queue (framework handles TTS)
800
+ console.log(`🔊 TTS stream (${ttsChunk.length} chars): "${ttsChunk.substring(0, 60)}..."`);
801
+ this.queue.put({
802
+ id: requestId,
803
+ delta: { role: 'assistant', content: ttsChunk },
804
+ });
805
+ }
806
+ }
567
807
  }
568
808
  }
569
809
  }
570
- // Final result
810
+ // Final result — only speak if no text blocks were streamed already
571
811
  if (message.type === 'result' && message.result) {
572
812
  const rawResult = message.result;
573
813
  // Emit RAW result to frontend
574
814
  this.#eventEmitter.emit('assistant_result', { text: rawResult });
575
815
  if (!hasOutput) {
576
- fullResponse = rawResult;
577
816
  hasOutput = true;
817
+ const ttsText = stripMarkdownForTTS(rawResult);
818
+ if (ttsText.trim()) {
819
+ if (this.#opts.skipTTSQueue) {
820
+ console.log(`🔊 TTS say result (${ttsText.length} chars): "${ttsText.substring(0, 60)}..."`);
821
+ this.#eventEmitter.emit('tts_say', { text: ttsText });
822
+ }
823
+ else {
824
+ console.log(`🔊 TTS result (${ttsText.length} chars): "${ttsText.substring(0, 60)}..."`);
825
+ this.queue.put({
826
+ id: requestId,
827
+ delta: { role: 'assistant', content: ttsText },
828
+ });
829
+ }
830
+ }
578
831
  }
579
832
  }
580
833
  }
581
- // Send SUMMARIZED output to TTS (spoken)
582
- if (hasOutput && fullResponse.trim()) {
583
- const ttsText = summarizeForTTS(fullResponse.trim());
584
- console.log(`🔊 TTS (summarized ${fullResponse.length} → ${ttsText.length} chars): "${ttsText.substring(0, 80)}..."`);
585
- this.queue.put({
586
- id: requestId,
587
- delta: { role: 'assistant', content: ttsText },
588
- });
834
+ // If Claude produced no output at all, say "Done."
835
+ if (!hasOutput) {
836
+ if (this.#opts.skipTTSQueue) {
837
+ this.#eventEmitter.emit('tts_say', { text: 'Done.' });
838
+ }
839
+ else {
840
+ this.queue.put({
841
+ id: requestId,
842
+ delta: { role: 'assistant', content: 'Done.' },
843
+ });
844
+ }
845
+ }
846
+ console.log('✅ Claude response complete');
847
+ }
848
+ catch (error) {
849
+ // AbortError = clean abort (disconnect, new research, recovery) — don't push
850
+ // garbage text that would flow through the post-research pipeline
851
+ if (this.#abortController?.signal.aborted) {
852
+ console.log('🛑 Claude Agent SDK query aborted');
853
+ if (!this.#opts.skipTTSQueue) {
854
+ this.queue.put({ id: requestId, delta: { role: 'assistant', content: '' } });
855
+ }
856
+ return;
857
+ }
858
+ console.error('❌ Claude Agent SDK error:', error);
859
+ if (this.#opts.skipTTSQueue) {
860
+ this.#eventEmitter.emit('tts_say', { text: 'Sorry, I encountered an error.' });
589
861
  }
590
862
  else {
591
863
  this.queue.put({
592
864
  id: requestId,
593
- delta: { role: 'assistant', content: 'Done.' },
865
+ delta: { role: 'assistant', content: 'Sorry, I encountered an error.' },
594
866
  });
595
867
  }
596
- console.log('✅ Claude response complete');
597
868
  }
598
- catch (error) {
599
- console.error('❌ Claude Agent SDK error:', error);
600
- this.queue.put({
601
- id: requestId,
602
- delta: { role: 'assistant', content: 'Sorry, I encountered an error.' },
603
- });
869
+ finally {
870
+ this.#llmRef.removeActiveQuery(activeQuery);
604
871
  }
605
872
  }
606
873
  }
package/dist/config.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { McpServerConfig } from './claude-handler.js';
2
- export type VoiceMode = 'direct' | 'realtime';
2
+ export type VoiceMode = 'direct' | 'realtime' | 'pipeline';
3
3
  export type EditMode = 'read-only' | 'edit';
4
4
  export type AgentMode = 'plan' | 'execute' | 'research';
5
5
  export type RealtimeProvider = 'openai' | 'gemini';
@@ -25,6 +25,9 @@ export interface DirectConfig {
25
25
  voice?: string;
26
26
  };
27
27
  }
28
+ export interface PipelineDirectConfig extends DirectConfig {
29
+ enableCollisionGuard?: boolean;
30
+ }
28
31
  export interface PipelinedConfig {
29
32
  stt?: {
30
33
  provider?: STTProvider;
@@ -49,6 +52,7 @@ export interface OsbornConfig {
49
52
  voiceMode?: VoiceMode;
50
53
  realtime?: RealtimeConfig;
51
54
  direct?: DirectConfig;
55
+ 'pipeline-direct'?: PipelineDirectConfig;
52
56
  pipelined?: PipelinedConfig;
53
57
  }
54
58
  interface McpServerConfigYaml {
@@ -245,7 +249,7 @@ export declare function listLibraryFiles(projectPath: string, sessionId: string)
245
249
  export interface ResearchArtifact {
246
250
  fileName: string;
247
251
  filePath: string;
248
- type: 'plan' | 'diagram' | 'notes' | 'image' | 'summary' | 'other';
252
+ type: 'plan' | 'diagram' | 'notes' | 'image' | 'summary' | 'html' | 'other';
249
253
  size: number;
250
254
  updatedAt: string;
251
255
  }
package/dist/config.js CHANGED
@@ -58,7 +58,7 @@ const DEFAULT_CONFIG = {
58
58
  },
59
59
  tts: {
60
60
  provider: 'deepgram',
61
- voice: 'aura-asteria-en',
61
+ voice: 'aura-2-asteria-en',
62
62
  },
63
63
  },
64
64
  mcpServers: {
@@ -842,6 +842,8 @@ function classifyFile(fileName) {
842
842
  return 'plan';
843
843
  if (ext === 'mmd' || ext === 'mermaid')
844
844
  return 'diagram';
845
+ if (ext === 'html' || ext === 'htm')
846
+ return 'html';
845
847
  if (ext === 'md')
846
848
  return 'notes';
847
849
  if (['png', 'jpg', 'jpeg', 'svg', 'gif', 'webp'].includes(ext))
@@ -861,6 +863,9 @@ function scanDirForArtifacts(dir) {
861
863
  scan(fullPath);
862
864
  }
863
865
  else {
866
+ // Skip internal index files and .index/ folder
867
+ if (entry.startsWith('search-index') || entry === '.index')
868
+ continue;
864
869
  results.push({
865
870
  fileName: entry,
866
871
  filePath: fullPath,