osborn 0.9.58 → 0.9.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +40 -22
- package/package.json +9 -9
package/dist/index.js
CHANGED
|
@@ -2116,28 +2116,48 @@ async function main() {
|
|
|
2116
2116
|
const session = new voice.AgentSession({
|
|
2117
2117
|
turnDetection: 'stt',
|
|
2118
2118
|
preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
|
|
2119
|
+
// First-line echo defense: drop mic frames from BOTH the recognition stream
|
|
2120
|
+
// and the realtime audio stream for this many ms after the agent first
|
|
2121
|
+
// enters 'speaking' state. STT receives no audio during the warmup → no
|
|
2122
|
+
// interim/final transcripts can fire → echo cannot trigger an interrupt.
|
|
2123
|
+
// 1.4.x default is 3000; bumping to 5000 widens the safe zone at session start.
|
|
2124
|
+
// One-shot per session (NOT re-armed each turn), so this protects only the
|
|
2125
|
+
// first agent response. After that the in-block interruption settings handle it.
|
|
2126
|
+
aecWarmupDuration: 5000,
|
|
2127
|
+
// TTS stall mitigations (0.9.61). The 1.4.x SDK added a 10s default
|
|
2128
|
+
// readIdleTimeout in generation.js:519 (PR livekit/agents-js#1461) — when
|
|
2129
|
+
// the TTS stream goes silent for >10s, it force-closes via reader.cancel()
|
|
2130
|
+
// which trips the OpenAI SDK's AbortSignal → APIUserAbortError →
|
|
2131
|
+
// tts_error recoverable:false. Root cause is upstream: the OpenAI plugin
|
|
2132
|
+
// BUFFERS the entire tts-1 PCM response (arrayBuffer()) before emitting a
|
|
2133
|
+
// single frame. Long sentences intermittently exceed 10s end-to-end with
|
|
2134
|
+
// tts-1. Raising both watchdogs to 30s gives slow OpenAI responses room
|
|
2135
|
+
// to complete; raising maxUnrecoverableErrors from default 3 to 15 prevents
|
|
2136
|
+
// a transient burst of stalls from killing the AgentSession outright (the
|
|
2137
|
+
// counter resets on every successful speaking transition).
|
|
2138
|
+
ttsReadIdleTimeout: 30_000,
|
|
2139
|
+
forwardAudioIdleTimeout: 30_000,
|
|
2140
|
+
connOptions: {
|
|
2141
|
+
maxUnrecoverableErrors: 15,
|
|
2142
|
+
},
|
|
2119
2143
|
turnHandling: {
|
|
2120
2144
|
endpointing: {
|
|
2121
2145
|
mode: 'fixed',
|
|
2122
2146
|
minDelay: 500, // Wait 500ms after STT commits before generating reply
|
|
2123
2147
|
maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
|
|
2124
2148
|
},
|
|
2125
|
-
//
|
|
2126
|
-
//
|
|
2127
|
-
//
|
|
2128
|
-
//
|
|
2129
|
-
// accumulate a clean silence, which helps when echo or ambient noise
|
|
2130
|
-
// keeps resetting the 2s window. Other tunables in this same block
|
|
2131
|
-
// (NOT changed yet — try the timeout first, escalate if needed):
|
|
2132
|
-
// - minDuration (default 500ms) — minimum sustained speech to count
|
|
2133
|
-
// - minWords (default 0) — minimum word count in interim transcript
|
|
2134
|
-
// - enabled (default true) — kept ON (auto-interrupt path active)
|
|
2135
|
-
// - resumeFalseInterruption (default true) — auto-resume kept ON
|
|
2136
|
-
// - discardAudioIfUninterruptible (default true)
|
|
2149
|
+
// 1.4.x SDK fully wires these — minDuration now applies to the STT path
|
|
2150
|
+
// (not just VAD), falseInterruptionTimeout actually fires the
|
|
2151
|
+
// agentFalseInterruption event with auto-resume, discardAudioIfUninterruptible
|
|
2152
|
+
// is checked at runtime. All inert in 1.2.1; live in 1.4.x.
|
|
2137
2153
|
interruption: {
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2154
|
+
// enabled defaults true — kept default (don't set to false; cascades into
|
|
2155
|
+
// allowInterruptions:false which breaks manual interrupt() calls).
|
|
2156
|
+
minDuration: 1000, // 1.4.x: now gates STT-path; require 1s sustained speech
|
|
2157
|
+
minWords: 3, // require ≥3 words in interim transcript
|
|
2158
|
+
falseInterruptionTimeout: 2000, // emit agentFalseInterruption after 2s silence
|
|
2159
|
+
resumeFalseInterruption: true, // auto-resume TTS on false interrupt detection
|
|
2160
|
+
discardAudioIfUninterruptible: true, // drop buffered echo audio
|
|
2141
2161
|
},
|
|
2142
2162
|
},
|
|
2143
2163
|
});
|
|
@@ -3041,13 +3061,11 @@ async function main() {
|
|
|
3041
3061
|
userState = ev.newState;
|
|
3042
3062
|
console.log(`👤 User state: ${prev} → ${ev.newState} (agent: ${agentState})`);
|
|
3043
3063
|
if (ev.newState === 'speaking' && agentState === 'speaking' && sessionVoiceMode !== 'realtime') {
|
|
3044
|
-
//
|
|
3045
|
-
//
|
|
3046
|
-
//
|
|
3047
|
-
//
|
|
3048
|
-
//
|
|
3049
|
-
// (2 of 3 interrupts that session were from this handler firing on echo).
|
|
3050
|
-
// Echo prevention moved to browser AEC on the publisher side.
|
|
3064
|
+
// Simple manual interrupt for echo-side defense fallback. With 1.4.x
|
|
3065
|
+
// the SDK's interrupt-by-audio-activity path is properly gated by
|
|
3066
|
+
// turnHandling.interruption.{minDuration, minWords, falseInterruptionTimeout},
|
|
3067
|
+
// and resumeFalseInterruption auto-recovers if echo was misclassified.
|
|
3068
|
+
// This handler stays as a secondary trigger only.
|
|
3051
3069
|
try {
|
|
3052
3070
|
console.log('🎤 user_state_changed=speaking + agent speaking → interrupting TTS');
|
|
3053
3071
|
currentSession?.interrupt();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "osborn",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.61",
|
|
4
4
|
"description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -33,14 +33,14 @@
|
|
|
33
33
|
"@anthropic-ai/claude-agent-sdk": "^0.2.91",
|
|
34
34
|
"@anthropic-ai/sdk": "^0.80.0",
|
|
35
35
|
"@google/genai": "^1.0.0",
|
|
36
|
-
"@livekit/agents": "1.
|
|
37
|
-
"@livekit/agents-plugin-deepgram": "1.
|
|
38
|
-
"@livekit/agents-plugin-elevenlabs": "1.
|
|
39
|
-
"@livekit/agents-plugin-google": "1.
|
|
40
|
-
"@livekit/agents-plugin-livekit": "1.
|
|
41
|
-
"@livekit/agents-plugin-openai": "1.
|
|
42
|
-
"@livekit/agents-plugin-silero": "1.
|
|
43
|
-
"@livekit/rtc-node": "0.13.
|
|
36
|
+
"@livekit/agents": "1.4.6",
|
|
37
|
+
"@livekit/agents-plugin-deepgram": "1.4.6",
|
|
38
|
+
"@livekit/agents-plugin-elevenlabs": "1.4.6",
|
|
39
|
+
"@livekit/agents-plugin-google": "1.4.6",
|
|
40
|
+
"@livekit/agents-plugin-livekit": "1.4.6",
|
|
41
|
+
"@livekit/agents-plugin-openai": "1.4.6",
|
|
42
|
+
"@livekit/agents-plugin-silero": "1.4.6",
|
|
43
|
+
"@livekit/rtc-node": "0.13.29",
|
|
44
44
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
45
45
|
"@openai/codex-sdk": "^0.77.0",
|
|
46
46
|
"@smithery/api": "^0.48.0",
|