osborn 0.9.62 → 0.9.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +74 -10
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -2142,11 +2142,19 @@ async function main() {
2142
2142
  // SDK 1.4.6 (matching what silently ran via caret-resolved 1.4.5 throughout
2143
2143
  // the user's last-working month) are restored by leaving these unset.
2144
2144
  //
2145
- // aecWarmupDuration: 5000, // default 3000
2146
- // ttsReadIdleTimeout: 30_000, // default 10000
2147
- // forwardAudioIdleTimeout: 30_000, // default 10000
2145
+ // aecWarmupDuration: 5000, // default 3000 (left at default)
2146
+ // 0.9.64 evidence (0.9.63 osbornojure logs): the ONE stall in a long
2147
+ // session fired AFTER session.say DONE during a ~73s silent gap before
2148
+ // the next agent response — the forwarder's 10s idle timer fired during
2149
+ // an LLM-think pause, not from an interrupt (OVERLAPPING SPEECH: 0,
2150
+ // AGENT FALSE INTERRUPTION: 0, interrupting TTS: 0 in that session).
2151
+ // Bumping both watchdogs to 30s gives the forwarder room to ride out
2152
+ // normal between-message pauses without timing out. Independent of the
2153
+ // interruption block above, which is doing its job (0 interrupts fired).
2154
+ ttsReadIdleTimeout: 30_000, // default 10000 → 30000
2155
+ forwardAudioIdleTimeout: 30_000, // default 10000 → 30000
2148
2156
  // connOptions: {
2149
- // maxUnrecoverableErrors: 15, // default 3
2157
+ // maxUnrecoverableErrors: 15, // default 3 (left at default)
2150
2158
  // },
2151
2159
  turnHandling: {
2152
2160
  endpointing: {
@@ -2154,13 +2162,16 @@ async function main() {
2154
2162
  minDelay: 500, // Wait 500ms after STT commits before generating reply
2155
2163
  maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
2156
2164
  },
2157
- // Commented out see note above the AgentSession constructor.
2165
+ // Tightened gates: only commit to the pause path when the STT layer is
2166
+ // confident this is real speech, not echo. Once paused, give the user
2167
+ // a full 3s window to keep talking before deciding it was false and
2168
+ // resuming. Other two knobs left at SDK defaults.
2158
2169
  interruption: {
2159
- minDuration: 2000, // default 500
2160
- minWords: 3, // default 0
2161
- falseInterruptionTimeout: 3000, // default 2000 (same)
2162
- // resumeFalseInterruption: true, // default true (same)
2163
- // discardAudioIfUninterruptible: true,// default true (same)
2170
+ minDuration: 2000, // default 500 — require 2s sustained speech
2171
+ minWords: 3, // default 0 — require ≥3 transcript words
2172
+ falseInterruptionTimeout: 3000, // default 2000 — wait 3s before auto-resume
2173
+ // resumeFalseInterruption: true, // default true (unchanged)
2174
+ // discardAudioIfUninterruptible: true,// default true (unchanged)
2164
2175
  },
2165
2176
  },
2166
2177
  });
@@ -3082,6 +3093,59 @@ async function main() {
3082
3093
  setTimeout(() => processVoiceQueue(), 500);
3083
3094
  }
3084
3095
  });
3096
+ // ============================================================
3097
+ // Interrupt-debug instrumentation (0.9.63) — log every SDK event
3098
+ // that touches the pause/resume + transcript path so we can correlate
3099
+ // a "TTS stream stalled" or visible cutoff to the exact transcript
3100
+ // text + timing that triggered it.
3101
+ //
3102
+ // The events below are emitted by AgentSession in @livekit/agents 1.4.6.
3103
+ // Each line prints with a wall-clock timestamp so it can be cross-referenced
3104
+ // against the WARN/ERROR lines from the SDK itself.
3105
+ // ============================================================
3106
+ // user_input_transcribed — the actual transcript Deepgram emitted.
3107
+ // Fires for BOTH interim and final transcripts. This is the smoking-gun
3108
+ // log for false interrupts: if echo bleeds through and Deepgram transcribes
3109
+ // a 1-2 word fragment, you'll see it here a fraction of a second before
3110
+ // user_state_changed=speaking or the SDK fires interruptByAudioActivity.
3111
+ sess.on('user_input_transcribed', (ev) => {
3112
+ const t = ev.transcript ?? '';
3113
+ const isFinal = !!ev.isFinal;
3114
+ const words = t.trim().split(/\s+/).filter(Boolean).length;
3115
+ const tag = isFinal ? '📝 FINAL' : '✏️ interim';
3116
+ console.log(`${tag} transcript (${words}w, ${t.length}c) [${new Date().toISOString()}]: "${t.slice(0, 120)}${t.length > 120 ? '…' : ''}"`);
3117
+ });
3118
+ // overlapping_speech — SDK detected user audio while agent was speaking.
3119
+ // This is the moment the pause path fires (before any interrupt() call).
3120
+ sess.on('overlapping_speech', (ev) => {
3121
+ console.log(`🔁 OVERLAPPING SPEECH detected [${new Date().toISOString()}]:`, JSON.stringify({
3122
+ type: ev.type,
3123
+ isInterruption: ev.isInterruption,
3124
+ interruptedAt: ev.interruptedAt,
3125
+ // Whatever else SDK provides — dump it all for now
3126
+ fields: Object.keys(ev),
3127
+ }));
3128
+ });
3129
+ // agent_false_interruption — the SDK's "actually that was a false alarm,
3130
+ // resuming TTS" event. Fires falseInterruptionTimeout after a pause.
3131
+ // resumed:true means the TTS audio was resumed cleanly; resumed:false
3132
+ // means resume was attempted but blocked (canPause check, etc.) — the
3133
+ // canonical signal for our deadlock scenario.
3134
+ sess.on('agent_false_interruption', (ev) => {
3135
+ console.log(`✅ AGENT FALSE INTERRUPTION [${new Date().toISOString()}]:`, JSON.stringify({
3136
+ resumed: ev.resumed,
3137
+ createdAt: ev.createdAt,
3138
+ }));
3139
+ });
3140
+ // speech_created — every time TTS audio is queued. Lets us correlate
3141
+ // a speech-handle id back to the transcript that triggered it.
3142
+ sess.on('speech_created', (ev) => {
3143
+ console.log(`🗣️ SPEECH CREATED [${new Date().toISOString()}]:`, JSON.stringify({
3144
+ speechId: ev.speechHandle?.id,
3145
+ source: ev.source,
3146
+ userInitiated: ev.userInitiated,
3147
+ }));
3148
+ });
3085
3149
  // FALLBACK: playout_completed
3086
3150
  sess.on('playout_completed', (ev) => {
3087
3151
  const message = ev.message || ev.text || ev.content;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.62",
3
+ "version": "0.9.64",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {