osborn 0.9.65 → 0.9.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ let geminiClient = null;
71
71
  let initialized = false;
72
72
  // Model IDs — configurable per provider
73
73
  const ANTHROPIC_FAST_MODEL = 'claude-haiku-4-5-20251001';
74
- const GEMINI_FAST_MODEL = 'gemini-2.0-flash';
74
+ const GEMINI_FAST_MODEL = 'gemini-2.5-flash'; // 0.9.67: was gemini-2.0-flash — 404 deprecated by Google (verified against @livekit/agents-plugin-google test default)
75
75
  // Agent SDK session tracking — resume across voice questions for context continuity
76
76
  let fastBrainSessionId = null;
77
77
  // Gemini Chat session — persists across voice questions for context continuity.
package/dist/index.js CHANGED
@@ -2703,7 +2703,16 @@ async function main() {
2703
2703
  // Room Event Handlers
2704
2704
  // ============================================================
2705
2705
  room.on(RoomEvent.Connected, () => {
2706
- console.log('✅ Connected to room:', roomName);
2706
+ // 0.9.68: log Room SID + name PROMINENTLY so we can cross-reference
2707
+ // this specific session in LiveKit Cloud dashboard → Sessions tab.
2708
+ // @livekit/rtc-node Room exposes SID via async getSid() (it's resolved
2709
+ // after WebRTC handshake), so we fetch it asynchronously and log when ready.
2710
+ console.log(`✅ Connected to room: ${roomName} | t=${new Date().toISOString()}`);
2711
+ room.getSid().then((sid) => {
2712
+ console.log(`🔗 [LIVEKIT-DASHBOARD] room sid=${sid} name=${roomName} — search at https://cloud.livekit.io/projects → Sessions → "${sid}"`);
2713
+ }).catch((err) => {
2714
+ console.log(`⚠️ [LIVEKIT-DASHBOARD] failed to fetch room SID: ${err instanceof Error ? err.message : String(err)}`);
2715
+ });
2707
2716
  localParticipant = room.localParticipant;
2708
2717
  // Arm the alone timer: if we connected but no user joins within the grace
2709
2718
  // window (e.g. machine woken then abandoned mid-handshake), leave the room
@@ -3075,17 +3084,47 @@ async function main() {
3075
3084
  userState = ev.newState;
3076
3085
  console.log(`👤 User state: ${prev} → ${ev.newState} (agent: ${agentState})`);
3077
3086
  if (ev.newState === 'speaking' && agentState === 'speaking' && sessionVoiceMode !== 'realtime') {
3078
- // Simple manual interrupt for echo-side defense fallback. With 1.4.x
3079
- // the SDK's interrupt-by-audio-activity path is properly gated by
3080
- // turnHandling.interruption.{minDuration, minWords, falseInterruptionTimeout},
3081
- // and resumeFalseInterruption auto-recovers if echo was misclassified.
3082
- // This handler stays as a secondary trigger only.
3087
+ // 0.9.67: action commented out, condition + debug kept.
3088
+ //
3089
+ // Why removed: in @livekit/agents 1.4.x SpeechHandle.interrupt() calls
3090
+ // replyAbortController.abort() AbortSignal.any composes into the
3091
+ // OpenAI TTS HTTP fetch arrayBuffer() throws AbortError
3092
+ // APIUserAbortError (openai/client.mjs:364) → SDK marks the error
3093
+ // recoverable:false → connOptions.maxUnrecoverableErrors counter trips
3094
+ // → session collapses. In 1.2.1 the same call was a hard-kill that
3095
+ // never reached an HTTP fetch — that's why it ran fine for ~1 month
3096
+ // under the silently caret-resolved 1.4.5 (which had inherited the
3097
+ // abort plumbing) until it crossed the unrecoverable-errors threshold.
3098
+ //
3099
+ // What handles interruption now: SDK 1.4.x's gated path —
3100
+ // turnHandling.interruption.{minDuration:2500, minWords:4,
3101
+ // falseInterruptionTimeout:4000, resumeFalseInterruption:true}
3102
+ // pauses TTS via audioOutput.pause() (no abort) and either resumes
3103
+ // on a false trigger or hard-interrupts on a confirmed barge-in.
3104
+ //
3105
+ // Debug: this block now ONLY observes — logs what we'd have interrupted
3106
+ // on so we can compare against the SDK's own behavior. If the SDK
3107
+ // under-reacts to real barge-ins we can re-enable selectively.
3083
3108
  try {
3084
- console.log('🎤 user_state_changed=speaking + agent speaking interrupting TTS');
3085
- currentSession?.interrupt();
3109
+ const evKeys = ev && typeof ev === 'object' ? Object.keys(ev) : [];
3110
+ const evShape = evKeys.reduce((acc, k) => {
3111
+ const v = ev[k];
3112
+ acc[k] = (v && typeof v === 'object') ? `<object:${Object.keys(v).join(',')}>` : v;
3113
+ return acc;
3114
+ }, {});
3115
+ console.log('🔎 [DEBUG] manual-interrupt WOULD HAVE FIRED — SDK gated path now owns it:', JSON.stringify({
3116
+ t: new Date().toISOString(),
3117
+ userPrev: prev,
3118
+ userNew: ev.newState,
3119
+ agentState,
3120
+ sessionVoiceMode,
3121
+ evKeys,
3122
+ evShape,
3123
+ }));
3124
+ // currentSession?.interrupt() // ← 0.9.67 DISABLED: cascades to APIUserAbortError → recoverable:false → session collapse
3086
3125
  }
3087
3126
  catch (err) {
3088
- console.warn('⚠️ user-state interrupt failed:', err instanceof Error ? err.message : err);
3127
+ console.warn('⚠️ user-state interrupt debug failed:', err instanceof Error ? err.message : err);
3089
3128
  }
3090
3129
  }
3091
3130
  // When user stops speaking, retry voice queue — items may be waiting
@@ -3153,9 +3192,26 @@ async function main() {
3153
3192
  sendAgentTranscript(message, 'playout');
3154
3193
  }
3155
3194
  });
3195
+ // 0.9.68: mirror SDK's internal unrecoverable-error counters so we can
3196
+ // see EXACTLY how close we are to closeImpl() firing (default threshold 3).
3197
+ // Counter resets on each successful "speaking" transition (agent_session.js:740).
3198
+ let __ttsErrorCounter = 0;
3199
+ let __llmErrorCounter = 0;
3200
+ const __maxUnrecov = 3; // SDK default DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors
3156
3201
  // Error handler
3157
3202
  sess.on('error', (ev) => {
3158
3203
  const msg = ev.error?.message || String(ev.error);
3204
+ const errType = ev.type || 'unknown';
3205
+ const recoverable = ev.recoverable;
3206
+ // 0.9.68: counter mirror — increment for recoverable:false same as SDK does
3207
+ if (recoverable === false) {
3208
+ if (errType === 'tts_error')
3209
+ __ttsErrorCounter++;
3210
+ else if (errType === 'llm_error')
3211
+ __llmErrorCounter++;
3212
+ }
3213
+ const willCloseNext = (__ttsErrorCounter > __maxUnrecov || __llmErrorCounter > __maxUnrecov);
3214
+ console.log(`📊 [ERROR-COUNTER] type=${errType} recoverable=${recoverable} ttsErrorCount=${__ttsErrorCounter}/${__maxUnrecov} llmErrorCount=${__llmErrorCounter}/${__maxUnrecov} willCloseNext=${willCloseNext} t=${new Date().toISOString()}`);
3159
3215
  // OpenAI race: voice queue collided with server-side VAD auto-response
3160
3216
  if (msg.includes('conversation_already_has_active_response') || msg.includes('active_response')) {
3161
3217
  console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
@@ -3168,6 +3224,15 @@ async function main() {
3168
3224
  }
3169
3225
  console.error('❌ Session error:', ev.error);
3170
3226
  });
3227
+ // 0.9.68: reset error counter mirror when SDK does (on speaking transition).
3228
+ // Reuses the existing agent_state_changed handler logic — fires AFTER.
3229
+ sess.on('agent_state_changed', (ev) => {
3230
+ if (ev.newState === 'speaking' && (__ttsErrorCounter > 0 || __llmErrorCounter > 0)) {
3231
+ console.log(`📊 [COUNTER-RESET] speaking transition cleared ttsErrorCount=${__ttsErrorCounter}→0 llmErrorCount=${__llmErrorCounter}→0`);
3232
+ __ttsErrorCounter = 0;
3233
+ __llmErrorCounter = 0;
3234
+ }
3235
+ });
3171
3236
  // Capture voice mode at session creation — prevents state confusion
3172
3237
  // if currentVoiceMode changes between session start and crash recovery
3173
3238
  const sessionVoiceMode = currentVoiceMode;
@@ -17,7 +17,7 @@ import { GoogleGenAI } from '@google/genai';
17
17
  // ============================================================
18
18
  // CONSTANTS
19
19
  // ============================================================
20
- const GEMINI_MODEL = 'gemini-2.0-flash';
20
+ const GEMINI_MODEL = 'gemini-2.5-flash'; // 0.9.67: was gemini-2.0-flash — 404 deprecated by Google
21
21
  const TIMEOUT_MS = 20_000; // AFC needs time for tool calls + processing + synthesis
22
22
  const MAX_AFC_CALLS = 4;
23
23
  // ============================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.65",
3
+ "version": "0.9.68",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {