osborn 0.9.64 → 0.9.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ let geminiClient = null;
71
71
  let initialized = false;
72
72
  // Model IDs — configurable per provider
73
73
  const ANTHROPIC_FAST_MODEL = 'claude-haiku-4-5-20251001';
74
- const GEMINI_FAST_MODEL = 'gemini-2.0-flash';
74
+ const GEMINI_FAST_MODEL = 'gemini-2.5-flash'; // 0.9.67: was gemini-2.0-flash — 404 deprecated by Google (verified against @livekit/agents-plugin-google test default)
75
75
  // Agent SDK session tracking — resume across voice questions for context continuity
76
76
  let fastBrainSessionId = null;
77
77
  // Gemini Chat session — persists across voice questions for context continuity.
package/dist/index.js CHANGED
@@ -2151,8 +2151,8 @@ async function main() {
2151
2151
  // Bumping both watchdogs to 30s gives the forwarder room to ride out
2152
2152
  // normal between-message pauses without timing out. Independent of the
2153
2153
  // interruption block above, which is doing its job (0 interrupts fired).
2154
- ttsReadIdleTimeout: 30_000, // default 10000 → 30000
2155
- forwardAudioIdleTimeout: 30_000, // default 10000 → 30000
2154
+ ttsReadIdleTimeout: 40_000, // default 10000 → 30000
2155
+ forwardAudioIdleTimeout: 40_000, // default 10000 → 30000
2156
2156
  // connOptions: {
2157
2157
  // maxUnrecoverableErrors: 15, // default 3 (left at default)
2158
2158
  // },
@@ -2167,9 +2167,9 @@ async function main() {
2167
2167
  // a full 3s window to keep talking before deciding it was false and
2168
2168
  // resuming. Other two knobs left at SDK defaults.
2169
2169
  interruption: {
2170
- minDuration: 2000, // default 500 — require 2s sustained speech
2171
- minWords: 3, // default 0 — require ≥3 transcript words
2172
- falseInterruptionTimeout: 3000, // default 2000 — wait 3s before auto-resume
2170
+ minDuration: 2500, // default 500 — require 2s sustained speech
2171
+ minWords: 4, // default 0 — require ≥3 transcript words
2172
+ falseInterruptionTimeout: 4000, // default 2000 — wait 3s before auto-resume
2173
2173
  // resumeFalseInterruption: true, // default true (unchanged)
2174
2174
  // discardAudioIfUninterruptible: true,// default true (unchanged)
2175
2175
  },
@@ -3075,17 +3075,47 @@ async function main() {
3075
3075
  userState = ev.newState;
3076
3076
  console.log(`👤 User state: ${prev} → ${ev.newState} (agent: ${agentState})`);
3077
3077
  if (ev.newState === 'speaking' && agentState === 'speaking' && sessionVoiceMode !== 'realtime') {
3078
- // Simple manual interrupt for echo-side defense fallback. With 1.4.x
3079
- // the SDK's interrupt-by-audio-activity path is properly gated by
3080
- // turnHandling.interruption.{minDuration, minWords, falseInterruptionTimeout},
3081
- // and resumeFalseInterruption auto-recovers if echo was misclassified.
3082
- // This handler stays as a secondary trigger only.
3078
+ // 0.9.67: action commented out, condition + debug kept.
3079
+ //
3080
+ // Why removed: in @livekit/agents 1.4.x SpeechHandle.interrupt() calls
3081
+ // replyAbortController.abort() AbortSignal.any composes into the
3082
+ // OpenAI TTS HTTP fetch arrayBuffer() throws AbortError
3083
+ // APIUserAbortError (openai/client.mjs:364) → SDK marks the error
3084
+ // recoverable:false → connOptions.maxUnrecoverableErrors counter trips
3085
+ // → session collapses. In 1.2.1 the same call was a hard-kill that
3086
+ // never reached an HTTP fetch — that's why it ran fine for ~1 month
3087
+ // under the silently caret-resolved 1.4.5 (which had inherited the
3088
+ // abort plumbing) until it crossed the unrecoverable-errors threshold.
3089
+ //
3090
+ // What handles interruption now: SDK 1.4.x's gated path —
3091
+ // turnHandling.interruption.{minDuration:2500, minWords:4,
3092
+ // falseInterruptionTimeout:4000, resumeFalseInterruption:true}
3093
+ // pauses TTS via audioOutput.pause() (no abort) and either resumes
3094
+ // on a false trigger or hard-interrupts on a confirmed barge-in.
3095
+ //
3096
+ // Debug: this block now ONLY observes — logs what we'd have interrupted
3097
+ // on so we can compare against the SDK's own behavior. If the SDK
3098
+ // under-reacts to real barge-ins we can re-enable selectively.
3083
3099
  try {
3084
- console.log('🎤 user_state_changed=speaking + agent speaking interrupting TTS');
3085
- currentSession?.interrupt();
3100
+ const evKeys = ev && typeof ev === 'object' ? Object.keys(ev) : [];
3101
+ const evShape = evKeys.reduce((acc, k) => {
3102
+ const v = ev[k];
3103
+ acc[k] = (v && typeof v === 'object') ? `<object:${Object.keys(v).join(',')}>` : v;
3104
+ return acc;
3105
+ }, {});
3106
+ console.log('🔎 [DEBUG] manual-interrupt WOULD HAVE FIRED — SDK gated path now owns it:', JSON.stringify({
3107
+ t: new Date().toISOString(),
3108
+ userPrev: prev,
3109
+ userNew: ev.newState,
3110
+ agentState,
3111
+ sessionVoiceMode,
3112
+ evKeys,
3113
+ evShape,
3114
+ }));
3115
+ // currentSession?.interrupt() // ← 0.9.67 DISABLED: cascades to APIUserAbortError → recoverable:false → session collapse
3086
3116
  }
3087
3117
  catch (err) {
3088
- console.warn('⚠️ user-state interrupt failed:', err instanceof Error ? err.message : err);
3118
+ console.warn('⚠️ user-state interrupt debug failed:', err instanceof Error ? err.message : err);
3089
3119
  }
3090
3120
  }
3091
3121
  // When user stops speaking, retry voice queue — items may be waiting
@@ -17,7 +17,7 @@ import { GoogleGenAI } from '@google/genai';
17
17
  // ============================================================
18
18
  // CONSTANTS
19
19
  // ============================================================
20
- const GEMINI_MODEL = 'gemini-2.0-flash';
20
+ const GEMINI_MODEL = 'gemini-2.5-flash'; // 0.9.67: was gemini-2.0-flash — 404 deprecated by Google
21
21
  const TIMEOUT_MS = 20_000; // AFC needs time for tool calls + processing + synthesis
22
22
  const MAX_AFC_CALLS = 4;
23
23
  // ============================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.64",
3
+ "version": "0.9.67",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {