osborn 0.9.62 → 0.9.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +62 -6
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2154,13 +2154,16 @@ async function main() {
|
|
|
2154
2154
|
minDelay: 500, // Wait 500ms after STT commits before generating reply
|
|
2155
2155
|
maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
|
|
2156
2156
|
},
|
|
2157
|
-
//
|
|
2157
|
+
// Tightened gates: only commit to the pause path when the STT layer is
|
|
2158
|
+
// confident this is real speech, not echo. Once paused, give the user
|
|
2159
|
+
// a full 3s window to keep talking before deciding it was false and
|
|
2160
|
+
// resuming. Other two knobs left at SDK defaults.
|
|
2158
2161
|
interruption: {
|
|
2159
|
-
minDuration: 2000, // default 500
|
|
2160
|
-
minWords: 3, // default 0
|
|
2161
|
-
falseInterruptionTimeout: 3000, // default 2000
|
|
2162
|
-
//
|
|
2163
|
-
//
|
|
2162
|
+
minDuration: 2000, // default 500 — require 2s sustained speech
|
|
2163
|
+
minWords: 3, // default 0 — require ≥3 transcript words
|
|
2164
|
+
falseInterruptionTimeout: 3000, // default 2000 — wait 3s before auto-resume
|
|
2165
|
+
// resumeFalseInterruption: true, // default true (unchanged)
|
|
2166
|
+
// discardAudioIfUninterruptible: true,// default true (unchanged)
|
|
2164
2167
|
},
|
|
2165
2168
|
},
|
|
2166
2169
|
});
|
|
@@ -3082,6 +3085,59 @@ async function main() {
|
|
|
3082
3085
|
setTimeout(() => processVoiceQueue(), 500);
|
|
3083
3086
|
}
|
|
3084
3087
|
});
|
|
3088
|
+
// ============================================================
|
|
3089
|
+
// Interrupt-debug instrumentation (0.9.63) — log every SDK event
|
|
3090
|
+
// that touches the pause/resume + transcript path so we can correlate
|
|
3091
|
+
// a "TTS stream stalled" or visible cutoff to the exact transcript
|
|
3092
|
+
// text + timing that triggered it.
|
|
3093
|
+
//
|
|
3094
|
+
// The events below are emitted by AgentSession in @livekit/agents 1.4.6.
|
|
3095
|
+
// Each line prints with a wall-clock timestamp so it can be cross-referenced
|
|
3096
|
+
// against the WARN/ERROR lines from the SDK itself.
|
|
3097
|
+
// ============================================================
|
|
3098
|
+
// user_input_transcribed — the actual transcript Deepgram emitted.
|
|
3099
|
+
// Fires for BOTH interim and final transcripts. This is the smoking-gun
|
|
3100
|
+
// log for false interrupts: if echo bleeds through and Deepgram transcribes
|
|
3101
|
+
// a 1-2 word fragment, you'll see it here a fraction of a second before
|
|
3102
|
+
// user_state_changed=speaking or the SDK fires interruptByAudioActivity.
|
|
3103
|
+
sess.on('user_input_transcribed', (ev) => {
|
|
3104
|
+
const t = ev.transcript ?? '';
|
|
3105
|
+
const isFinal = !!ev.isFinal;
|
|
3106
|
+
const words = t.trim().split(/\s+/).filter(Boolean).length;
|
|
3107
|
+
const tag = isFinal ? '📝 FINAL' : '✏️ interim';
|
|
3108
|
+
console.log(`${tag} transcript (${words}w, ${t.length}c) [${new Date().toISOString()}]: "${t.slice(0, 120)}${t.length > 120 ? '…' : ''}"`);
|
|
3109
|
+
});
|
|
3110
|
+
// overlapping_speech — SDK detected user audio while agent was speaking.
|
|
3111
|
+
// This is the moment the pause path fires (before any interrupt() call).
|
|
3112
|
+
sess.on('overlapping_speech', (ev) => {
|
|
3113
|
+
console.log(`🔁 OVERLAPPING SPEECH detected [${new Date().toISOString()}]:`, JSON.stringify({
|
|
3114
|
+
type: ev.type,
|
|
3115
|
+
isInterruption: ev.isInterruption,
|
|
3116
|
+
interruptedAt: ev.interruptedAt,
|
|
3117
|
+
// Whatever else SDK provides — dump it all for now
|
|
3118
|
+
fields: Object.keys(ev),
|
|
3119
|
+
}));
|
|
3120
|
+
});
|
|
3121
|
+
// agent_false_interruption — the SDK's "actually that was a false alarm,
|
|
3122
|
+
// resuming TTS" event. Fires falseInterruptionTimeout after a pause.
|
|
3123
|
+
// resumed:true means the TTS audio was resumed cleanly; resumed:false
|
|
3124
|
+
// means resume was attempted but blocked (canPause check, etc.) — the
|
|
3125
|
+
// canonical signal for our deadlock scenario.
|
|
3126
|
+
sess.on('agent_false_interruption', (ev) => {
|
|
3127
|
+
console.log(`✅ AGENT FALSE INTERRUPTION [${new Date().toISOString()}]:`, JSON.stringify({
|
|
3128
|
+
resumed: ev.resumed,
|
|
3129
|
+
createdAt: ev.createdAt,
|
|
3130
|
+
}));
|
|
3131
|
+
});
|
|
3132
|
+
// speech_created — every time TTS audio is queued. Lets us correlate
|
|
3133
|
+
// a speech-handle id back to the transcript that triggered it.
|
|
3134
|
+
sess.on('speech_created', (ev) => {
|
|
3135
|
+
console.log(`🗣️ SPEECH CREATED [${new Date().toISOString()}]:`, JSON.stringify({
|
|
3136
|
+
speechId: ev.speechHandle?.id,
|
|
3137
|
+
source: ev.source,
|
|
3138
|
+
userInitiated: ev.userInitiated,
|
|
3139
|
+
}));
|
|
3140
|
+
});
|
|
3085
3141
|
// FALLBACK: playout_completed
|
|
3086
3142
|
sess.on('playout_completed', (ev) => {
|
|
3087
3143
|
const message = ev.message || ev.text || ev.content;
|