npm - osborn - Versions diffs - 0.9.67 → 0.9.71 - Mend

osborn 0.9.67 → 0.9.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -15,6 +15,9 @@ import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { spawn } from 'node:child_process';
 import { randomUUID } from 'node:crypto';
+import { createRequire } from 'node:module';
+// 0.9.71: createRequire for resolving package.json versions inside ESM
+const __sdkVersionRequire = createRequire(import.meta.url);
 import { homedir, tmpdir } from 'node:os';
 import { PassThrough } from 'node:stream';
 import { createGunzip } from 'node:zlib';
@@ -2175,6 +2178,34 @@ async function main() {
                 },
             },
         });
+        // 0.9.71: dump the RESOLVED AgentSession options (after defaults applied)
+        // so prod logs prove exactly what tuning is live for any given session.
+        try {
+            const so = session.sessionOptions ?? {};
+            const detect = session.interruptionDetection;
+            const turn = so.turnHandling ?? {};
+            console.log('🧪 [BE-AGENT-SESSION-CONFIG]', JSON.stringify({
+                t: new Date().toISOString(),
+                maxToolSteps: so.maxToolSteps,
+                userAwayTimeout: so.userAwayTimeout,
+                aecWarmupDuration: so.aecWarmupDuration,
+                ttsReadIdleTimeout: so.ttsReadIdleTimeout,
+                forwardAudioIdleTimeout: so.forwardAudioIdleTimeout,
+                useTtsAlignedTranscript: so.useTtsAlignedTranscript,
+                ttsTextTransforms: so.ttsTextTransforms,
+                interruptionDetectionMode: detect, // 'vad' | 'adaptive' | undefined
+                turnHandling: {
+                    turnDetection: turn.turnDetection,
+                    endpointing: turn.endpointing,
+                    interruption: turn.interruption,
+                    preemptiveGeneration: turn.preemptiveGeneration,
+                    userTurnLimit: turn.userTurnLimit,
+                },
+            }));
+        }
+        catch (err) {
+            console.log('🧪 [BE-AGENT-SESSION-CONFIG] failed:', err instanceof Error ? err.message : String(err));
+        }
         return { session, agent };
     }
     // ============================================================
@@ -2703,7 +2734,45 @@ async function main() {
     // Room Event Handlers
     // ============================================================
     room.on(RoomEvent.Connected, () => {
-        console.log('✅ Connected to room:', roomName);
+        // 0.9.68: log Room SID + name PROMINENTLY so we can cross-reference
+        // this specific session in LiveKit Cloud dashboard → Sessions tab.
+        // @livekit/rtc-node Room exposes SID via async getSid() (it's resolved
+        // after WebRTC handshake), so we fetch it asynchronously and log when ready.
+        console.log(`✅ Connected to room: ${roomName} | t=${new Date().toISOString()}`);
+        room.getSid().then((sid) => {
+            console.log(`🔗 [LIVEKIT-DASHBOARD] room sid=${sid} name=${roomName} — search at https://cloud.livekit.io/projects → Sessions → "${sid}"`);
+        }).catch((err) => {
+            console.log(`⚠️ [LIVEKIT-DASHBOARD] failed to fetch room SID: ${err instanceof Error ? err.message : String(err)}`);
+        });
+        // 0.9.71: SDK + runtime snapshot — proves what's actually running so
+        // future log forensics can rule out version drift in one grep.
+        try {
+            const pkgs = {};
+            for (const name of [
+                'osborn',
+                '@livekit/agents',
+                '@livekit/agents-plugin-openai',
+                '@livekit/agents-plugin-deepgram',
+                '@livekit/agents-plugin-silero',
+                '@livekit/agents-plugin-google',
+                '@livekit/agents-plugin-elevenlabs',
+                '@livekit/agents-plugin-livekit',
+                '@livekit/rtc-node',
+                'livekit-server-sdk',
+                '@anthropic-ai/claude-agent-sdk',
+                '@google/genai',
+                'openai',
+            ]) {
+                try {
+                    pkgs[name] = __sdkVersionRequire(`${name}/package.json`).version;
+                }
+                catch { }
+            }
+            console.log('🧪 [BE-SDK-VERSIONS]', JSON.stringify({ t: new Date().toISOString(), node: process.version, pkgs }));
+        }
+        catch (err) {
+            console.log('🧪 [BE-SDK-VERSIONS] failed:', err instanceof Error ? err.message : String(err));
+        }
         localParticipant = room.localParticipant;
         // Arm the alone timer: if we connected but no user joins within the grace
         // window (e.g. machine woken then abandoned mid-handshake), leave the room
@@ -2720,6 +2789,46 @@ async function main() {
     // Flux STT's speech-vs-noise classification: slower (~100-300ms) but
     // confidence-aware. The latency tradeoff is worth eliminating the false
     // interrupts at the root.
+    // 0.9.71: Room-level audio observability — observe-only logs so we can
+    // cross-reference user mic mute/quality changes against TTS cutoffs without
+    // re-introducing the over-eager ActiveSpeakers interrupt.
+    room.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
+        try {
+            const ids = (speakers || []).map((s) => s?.identity).filter(Boolean);
+            console.log(`🎙️ [ROOM-SPEAKERS] count=${ids.length} ids=${JSON.stringify(ids)} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
+    room.on(RoomEvent.ConnectionQualityChanged, (quality, participant) => {
+        try {
+            console.log(`📶 [ROOM-QUALITY] participant=${participant?.identity} quality=${quality} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
+    room.on(RoomEvent.TrackMuted, (publication, participant) => {
+        try {
+            console.log(`🔇 [ROOM-TRACK-MUTED] participant=${participant?.identity} kind=${publication?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
+    room.on(RoomEvent.TrackUnmuted, (publication, participant) => {
+        try {
+            console.log(`🔊 [ROOM-TRACK-UNMUTED] participant=${participant?.identity} kind=${publication?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
+    room.on(RoomEvent.TrackSubscribed, (track, publication, participant) => {
+        try {
+            console.log(`📥 [ROOM-TRACK-SUBSCRIBED] participant=${participant?.identity} kind=${track?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
+    room.on(RoomEvent.TrackUnsubscribed, (track, publication, participant) => {
+        try {
+            console.log(`📤 [ROOM-TRACK-UNSUBSCRIBED] participant=${participant?.identity} kind=${track?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
+        }
+        catch { }
+    });
     room.on(RoomEvent.Disconnected, () => {
         console.log('👋 Disconnected from room');
         // Clean up active research and voice queue
@@ -3179,13 +3288,101 @@ async function main() {
             // FALLBACK: playout_completed
             sess.on('playout_completed', (ev) => {
                 const message = ev.message || ev.text || ev.content;
+                console.log(`🎧 PLAYOUT COMPLETED [${new Date().toISOString()}]:`, JSON.stringify({
+                    speechId: ev.speechHandle?.id ?? ev.speechId,
+                    interrupted: ev.interrupted,
+                    durationMs: ev.durationMs,
+                    messageLen: message ? message.length : 0,
+                }));
                 if (message && message.length > 0) {
                     sendAgentTranscript(message, 'playout');
                 }
             });
+            // 0.9.71: metrics_collected — per-call latency for STT/TTS/LLM/VAD/EOU/Interruption.
+            // SINGLE highest-signal event for diagnosing audio cutoffs.
+            //   • TTSMetrics.ttfbMs / durationMs / audioDurationMs / cancelled → directly answers
+            //     "did the OpenAI HTTP fetch hang or did it complete and the SDK aborted?"
+            //   • STTMetrics.audioDurationMs / durationMs → Deepgram latency per utterance
+            //   • LLMMetrics.ttftMs → cold-vs-warm Claude subprocess
+            //   • EOUMetrics.endOfUtteranceDelayMs / transcriptionDelayMs → end-of-turn timing
+            //   • InterruptionMetrics.{detectionDelay, numInterruptions, numBackchannels} →
+            //     turn-detector signal at the source
+            sess.on('metrics_collected', (ev) => {
+                const m = ev?.metrics;
+                if (!m)
+                    return;
+                const compact = { type: m.type, label: m.label, t: new Date().toISOString() };
+                // Per-type subset — keep tight
+                if (m.type === 'tts_metrics') {
+                    compact.ttfbMs = Math.round(m.ttfbMs ?? -1);
+                    compact.durationMs = Math.round(m.durationMs ?? -1);
+                    compact.audioDurationMs = Math.round(m.audioDurationMs ?? -1);
+                    compact.cancelled = !!m.cancelled;
+                    compact.charactersCount = m.charactersCount;
+                    compact.streamed = !!m.streamed;
+                    compact.speechId = m.speechId;
+                }
+                else if (m.type === 'stt_metrics') {
+                    compact.audioDurationMs = Math.round(m.audioDurationMs ?? -1);
+                    compact.durationMs = Math.round(m.durationMs ?? -1);
+                    compact.streamed = !!m.streamed;
+                }
+                else if (m.type === 'llm_metrics') {
+                    compact.ttftMs = Math.round(m.ttftMs ?? -1);
+                    compact.durationMs = Math.round(m.durationMs ?? -1);
+                    compact.cancelled = !!m.cancelled;
+                    compact.completionTokens = m.completionTokens;
+                    compact.promptTokens = m.promptTokens;
+                    compact.speechId = m.speechId;
+                }
+                else if (m.type === 'vad_metrics') {
+                    compact.idleTimeMs = Math.round(m.idleTimeMs ?? -1);
+                    compact.inferenceCount = m.inferenceCount;
+                }
+                else if (m.type === 'eou_metrics') {
+                    compact.endOfUtteranceDelayMs = Math.round(m.endOfUtteranceDelayMs ?? -1);
+                    compact.transcriptionDelayMs = Math.round(m.transcriptionDelayMs ?? -1);
+                    compact.onUserTurnCompletedDelayMs = Math.round(m.onUserTurnCompletedDelayMs ?? -1);
+                    compact.speechId = m.speechId;
+                }
+                else if (m.type === 'interruption_metrics') {
+                    compact.detectionDelay = Math.round(m.detectionDelay ?? -1);
+                    compact.predictionDuration = Math.round(m.predictionDuration ?? -1);
+                    compact.numInterruptions = m.numInterruptions;
+                    compact.numBackchannels = m.numBackchannels;
+                    compact.numRequests = m.numRequests;
+                }
+                console.log(`📈 [METRICS]`, JSON.stringify(compact));
+            });
+            // 0.9.71: function_tools_executed — when a tool batch completes inside the SDK.
+            sess.on('function_tools_executed', (ev) => {
+                try {
+                    const calls = ev?.functionCalls?.length ?? 0;
+                    const outputs = ev?.functionOutputs?.length ?? 0;
+                    console.log(`🛠️ [TOOLS-EXECUTED] calls=${calls} outputs=${outputs} t=${new Date().toISOString()}`);
+                }
+                catch { }
+            });
+            // 0.9.68: mirror SDK's internal unrecoverable-error counters so we can
+            // see EXACTLY how close we are to closeImpl() firing (default threshold 3).
+            // Counter resets on each successful "speaking" transition (agent_session.js:740).
+            let __ttsErrorCounter = 0;
+            let __llmErrorCounter = 0;
+            const __maxUnrecov = 3; // SDK default DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors
             // Error handler
             sess.on('error', (ev) => {
                 const msg = ev.error?.message || String(ev.error);
+                const errType = ev.type || 'unknown';
+                const recoverable = ev.recoverable;
+                // 0.9.68: counter mirror — increment for recoverable:false same as SDK does
+                if (recoverable === false) {
+                    if (errType === 'tts_error')
+                        __ttsErrorCounter++;
+                    else if (errType === 'llm_error')
+                        __llmErrorCounter++;
+                }
+                const willCloseNext = (__ttsErrorCounter > __maxUnrecov || __llmErrorCounter > __maxUnrecov);
+                console.log(`📊 [ERROR-COUNTER] type=${errType} recoverable=${recoverable} ttsErrorCount=${__ttsErrorCounter}/${__maxUnrecov} llmErrorCount=${__llmErrorCounter}/${__maxUnrecov} willCloseNext=${willCloseNext} t=${new Date().toISOString()}`);
                 // OpenAI race: voice queue collided with server-side VAD auto-response
                 if (msg.includes('conversation_already_has_active_response') || msg.includes('active_response')) {
                     console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
@@ -3198,6 +3395,15 @@ async function main() {
                 }
                 console.error('❌ Session error:', ev.error);
             });
+            // 0.9.68: reset error counter mirror when SDK does (on speaking transition).
+            // Reuses the existing agent_state_changed handler logic — fires AFTER.
+            sess.on('agent_state_changed', (ev) => {
+                if (ev.newState === 'speaking' && (__ttsErrorCounter > 0 || __llmErrorCounter > 0)) {
+                    console.log(`📊 [COUNTER-RESET] speaking transition cleared ttsErrorCount=${__ttsErrorCounter}→0 llmErrorCount=${__llmErrorCounter}→0`);
+                    __ttsErrorCounter = 0;
+                    __llmErrorCounter = 0;
+                }
+            });
             // Capture voice mode at session creation — prevents state confusion
             // if currentVoiceMode changes between session start and crash recovery
             const sessionVoiceMode = currentVoiceMode;

package/dist/voice-io.js CHANGED Viewed

@@ -137,7 +137,8 @@ export const DIRECT_MODE_STT = {
 export const DIRECT_MODE_TTS = {
     // provider: 'deepgram', model: 'aura-2-asteria-en',  // WebSocket-based: handles TTS abort cleanly (no unrecoverable crash on interruption)
     // provider: 'gemini', model: 'gemini-2.5-flash-preview-tts', voice: 'apollo',
-    provider: 'openai', model: 'tts-1', voice: 'fable', // HTTP streaming: throws APIUserAbortError on interrupt → unrecoverable session crash
+    // provider: 'openai', model: 'tts-1', voice: 'fable',  // HTTP streaming: throws APIUserAbortError on interrupt → unrecoverable session crash
+    provider: 'openai', model: 'tts-1-hd', voice: 'fable', // 0.9.70: test tts-1-hd — tts-1 had chronic per-sentence HTTP hangs (40s SDK watchdog → APIUserAbortError mid-message)
     // provider: 'groq-orpheus', model: 'canopylabs/orpheus-v1-english', voice: 'autumn',  // $22/M chars — voices: autumn, diana, hannah, austin, daniel, troy
 };
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "osborn",
-  "version": "0.9.67",
+  "version": "0.9.71",
   "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
   "type": "module",
   "bin": {