osborn 0.9.67 โ 0.9.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +207 -1
- package/dist/voice-io.js +2 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -15,6 +15,9 @@ import { dirname, join } from 'node:path';
|
|
|
15
15
|
import { fileURLToPath } from 'node:url';
|
|
16
16
|
import { spawn } from 'node:child_process';
|
|
17
17
|
import { randomUUID } from 'node:crypto';
|
|
18
|
+
import { createRequire } from 'node:module';
|
|
19
|
+
// 0.9.71: createRequire for resolving package.json versions inside ESM
|
|
20
|
+
const __sdkVersionRequire = createRequire(import.meta.url);
|
|
18
21
|
import { homedir, tmpdir } from 'node:os';
|
|
19
22
|
import { PassThrough } from 'node:stream';
|
|
20
23
|
import { createGunzip } from 'node:zlib';
|
|
@@ -2175,6 +2178,34 @@ async function main() {
|
|
|
2175
2178
|
},
|
|
2176
2179
|
},
|
|
2177
2180
|
});
|
|
2181
|
+
// 0.9.71: dump the RESOLVED AgentSession options (after defaults applied)
|
|
2182
|
+
// so prod logs prove exactly what tuning is live for any given session.
|
|
2183
|
+
try {
|
|
2184
|
+
const so = session.sessionOptions ?? {};
|
|
2185
|
+
const detect = session.interruptionDetection;
|
|
2186
|
+
const turn = so.turnHandling ?? {};
|
|
2187
|
+
console.log('๐งช [BE-AGENT-SESSION-CONFIG]', JSON.stringify({
|
|
2188
|
+
t: new Date().toISOString(),
|
|
2189
|
+
maxToolSteps: so.maxToolSteps,
|
|
2190
|
+
userAwayTimeout: so.userAwayTimeout,
|
|
2191
|
+
aecWarmupDuration: so.aecWarmupDuration,
|
|
2192
|
+
ttsReadIdleTimeout: so.ttsReadIdleTimeout,
|
|
2193
|
+
forwardAudioIdleTimeout: so.forwardAudioIdleTimeout,
|
|
2194
|
+
useTtsAlignedTranscript: so.useTtsAlignedTranscript,
|
|
2195
|
+
ttsTextTransforms: so.ttsTextTransforms,
|
|
2196
|
+
interruptionDetectionMode: detect, // 'vad' | 'adaptive' | undefined
|
|
2197
|
+
turnHandling: {
|
|
2198
|
+
turnDetection: turn.turnDetection,
|
|
2199
|
+
endpointing: turn.endpointing,
|
|
2200
|
+
interruption: turn.interruption,
|
|
2201
|
+
preemptiveGeneration: turn.preemptiveGeneration,
|
|
2202
|
+
userTurnLimit: turn.userTurnLimit,
|
|
2203
|
+
},
|
|
2204
|
+
}));
|
|
2205
|
+
}
|
|
2206
|
+
catch (err) {
|
|
2207
|
+
console.log('๐งช [BE-AGENT-SESSION-CONFIG] failed:', err instanceof Error ? err.message : String(err));
|
|
2208
|
+
}
|
|
2178
2209
|
return { session, agent };
|
|
2179
2210
|
}
|
|
2180
2211
|
// ============================================================
|
|
@@ -2703,7 +2734,45 @@ async function main() {
|
|
|
2703
2734
|
// Room Event Handlers
|
|
2704
2735
|
// ============================================================
|
|
2705
2736
|
room.on(RoomEvent.Connected, () => {
|
|
2706
|
-
|
|
2737
|
+
// 0.9.68: log Room SID + name PROMINENTLY so we can cross-reference
|
|
2738
|
+
// this specific session in LiveKit Cloud dashboard โ Sessions tab.
|
|
2739
|
+
// @livekit/rtc-node Room exposes SID via async getSid() (it's resolved
|
|
2740
|
+
// after WebRTC handshake), so we fetch it asynchronously and log when ready.
|
|
2741
|
+
console.log(`โ
Connected to room: ${roomName} | t=${new Date().toISOString()}`);
|
|
2742
|
+
room.getSid().then((sid) => {
|
|
2743
|
+
console.log(`๐ [LIVEKIT-DASHBOARD] room sid=${sid} name=${roomName} โ search at https://cloud.livekit.io/projects โ Sessions โ "${sid}"`);
|
|
2744
|
+
}).catch((err) => {
|
|
2745
|
+
console.log(`โ ๏ธ [LIVEKIT-DASHBOARD] failed to fetch room SID: ${err instanceof Error ? err.message : String(err)}`);
|
|
2746
|
+
});
|
|
2747
|
+
// 0.9.71: SDK + runtime snapshot โ proves what's actually running so
|
|
2748
|
+
// future log forensics can rule out version drift in one grep.
|
|
2749
|
+
try {
|
|
2750
|
+
const pkgs = {};
|
|
2751
|
+
for (const name of [
|
|
2752
|
+
'osborn',
|
|
2753
|
+
'@livekit/agents',
|
|
2754
|
+
'@livekit/agents-plugin-openai',
|
|
2755
|
+
'@livekit/agents-plugin-deepgram',
|
|
2756
|
+
'@livekit/agents-plugin-silero',
|
|
2757
|
+
'@livekit/agents-plugin-google',
|
|
2758
|
+
'@livekit/agents-plugin-elevenlabs',
|
|
2759
|
+
'@livekit/agents-plugin-livekit',
|
|
2760
|
+
'@livekit/rtc-node',
|
|
2761
|
+
'livekit-server-sdk',
|
|
2762
|
+
'@anthropic-ai/claude-agent-sdk',
|
|
2763
|
+
'@google/genai',
|
|
2764
|
+
'openai',
|
|
2765
|
+
]) {
|
|
2766
|
+
try {
|
|
2767
|
+
pkgs[name] = __sdkVersionRequire(`${name}/package.json`).version;
|
|
2768
|
+
}
|
|
2769
|
+
catch { }
|
|
2770
|
+
}
|
|
2771
|
+
console.log('๐งช [BE-SDK-VERSIONS]', JSON.stringify({ t: new Date().toISOString(), node: process.version, pkgs }));
|
|
2772
|
+
}
|
|
2773
|
+
catch (err) {
|
|
2774
|
+
console.log('๐งช [BE-SDK-VERSIONS] failed:', err instanceof Error ? err.message : String(err));
|
|
2775
|
+
}
|
|
2707
2776
|
localParticipant = room.localParticipant;
|
|
2708
2777
|
// Arm the alone timer: if we connected but no user joins within the grace
|
|
2709
2778
|
// window (e.g. machine woken then abandoned mid-handshake), leave the room
|
|
@@ -2720,6 +2789,46 @@ async function main() {
|
|
|
2720
2789
|
// Flux STT's speech-vs-noise classification: slower (~100-300ms) but
|
|
2721
2790
|
// confidence-aware. The latency tradeoff is worth eliminating the false
|
|
2722
2791
|
// interrupts at the root.
|
|
2792
|
+
// 0.9.71: Room-level audio observability โ observe-only logs so we can
|
|
2793
|
+
// cross-reference user mic mute/quality changes against TTS cutoffs without
|
|
2794
|
+
// re-introducing the over-eager ActiveSpeakers interrupt.
|
|
2795
|
+
room.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
|
|
2796
|
+
try {
|
|
2797
|
+
const ids = (speakers || []).map((s) => s?.identity).filter(Boolean);
|
|
2798
|
+
console.log(`๐๏ธ [ROOM-SPEAKERS] count=${ids.length} ids=${JSON.stringify(ids)} t=${new Date().toISOString()}`);
|
|
2799
|
+
}
|
|
2800
|
+
catch { }
|
|
2801
|
+
});
|
|
2802
|
+
room.on(RoomEvent.ConnectionQualityChanged, (quality, participant) => {
|
|
2803
|
+
try {
|
|
2804
|
+
console.log(`๐ถ [ROOM-QUALITY] participant=${participant?.identity} quality=${quality} t=${new Date().toISOString()}`);
|
|
2805
|
+
}
|
|
2806
|
+
catch { }
|
|
2807
|
+
});
|
|
2808
|
+
room.on(RoomEvent.TrackMuted, (publication, participant) => {
|
|
2809
|
+
try {
|
|
2810
|
+
console.log(`๐ [ROOM-TRACK-MUTED] participant=${participant?.identity} kind=${publication?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
|
|
2811
|
+
}
|
|
2812
|
+
catch { }
|
|
2813
|
+
});
|
|
2814
|
+
room.on(RoomEvent.TrackUnmuted, (publication, participant) => {
|
|
2815
|
+
try {
|
|
2816
|
+
console.log(`๐ [ROOM-TRACK-UNMUTED] participant=${participant?.identity} kind=${publication?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
|
|
2817
|
+
}
|
|
2818
|
+
catch { }
|
|
2819
|
+
});
|
|
2820
|
+
room.on(RoomEvent.TrackSubscribed, (track, publication, participant) => {
|
|
2821
|
+
try {
|
|
2822
|
+
console.log(`๐ฅ [ROOM-TRACK-SUBSCRIBED] participant=${participant?.identity} kind=${track?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
|
|
2823
|
+
}
|
|
2824
|
+
catch { }
|
|
2825
|
+
});
|
|
2826
|
+
room.on(RoomEvent.TrackUnsubscribed, (track, publication, participant) => {
|
|
2827
|
+
try {
|
|
2828
|
+
console.log(`๐ค [ROOM-TRACK-UNSUBSCRIBED] participant=${participant?.identity} kind=${track?.kind} source=${publication?.source} sid=${publication?.sid} t=${new Date().toISOString()}`);
|
|
2829
|
+
}
|
|
2830
|
+
catch { }
|
|
2831
|
+
});
|
|
2723
2832
|
room.on(RoomEvent.Disconnected, () => {
|
|
2724
2833
|
console.log('๐ Disconnected from room');
|
|
2725
2834
|
// Clean up active research and voice queue
|
|
@@ -3179,13 +3288,101 @@ async function main() {
|
|
|
3179
3288
|
// FALLBACK: playout_completed
|
|
3180
3289
|
sess.on('playout_completed', (ev) => {
|
|
3181
3290
|
const message = ev.message || ev.text || ev.content;
|
|
3291
|
+
console.log(`๐ง PLAYOUT COMPLETED [${new Date().toISOString()}]:`, JSON.stringify({
|
|
3292
|
+
speechId: ev.speechHandle?.id ?? ev.speechId,
|
|
3293
|
+
interrupted: ev.interrupted,
|
|
3294
|
+
durationMs: ev.durationMs,
|
|
3295
|
+
messageLen: message ? message.length : 0,
|
|
3296
|
+
}));
|
|
3182
3297
|
if (message && message.length > 0) {
|
|
3183
3298
|
sendAgentTranscript(message, 'playout');
|
|
3184
3299
|
}
|
|
3185
3300
|
});
|
|
3301
|
+
// 0.9.71: metrics_collected โ per-call latency for STT/TTS/LLM/VAD/EOU/Interruption.
|
|
3302
|
+
// SINGLE highest-signal event for diagnosing audio cutoffs.
|
|
3303
|
+
// โข TTSMetrics.ttfbMs / durationMs / audioDurationMs / cancelled โ directly answers
|
|
3304
|
+
// "did the OpenAI HTTP fetch hang or did it complete and the SDK aborted?"
|
|
3305
|
+
// โข STTMetrics.audioDurationMs / durationMs โ Deepgram latency per utterance
|
|
3306
|
+
// โข LLMMetrics.ttftMs โ cold-vs-warm Claude subprocess
|
|
3307
|
+
// โข EOUMetrics.endOfUtteranceDelayMs / transcriptionDelayMs โ end-of-turn timing
|
|
3308
|
+
// โข InterruptionMetrics.{detectionDelay, numInterruptions, numBackchannels} โ
|
|
3309
|
+
// turn-detector signal at the source
|
|
3310
|
+
sess.on('metrics_collected', (ev) => {
|
|
3311
|
+
const m = ev?.metrics;
|
|
3312
|
+
if (!m)
|
|
3313
|
+
return;
|
|
3314
|
+
const compact = { type: m.type, label: m.label, t: new Date().toISOString() };
|
|
3315
|
+
// Per-type subset โ keep tight
|
|
3316
|
+
if (m.type === 'tts_metrics') {
|
|
3317
|
+
compact.ttfbMs = Math.round(m.ttfbMs ?? -1);
|
|
3318
|
+
compact.durationMs = Math.round(m.durationMs ?? -1);
|
|
3319
|
+
compact.audioDurationMs = Math.round(m.audioDurationMs ?? -1);
|
|
3320
|
+
compact.cancelled = !!m.cancelled;
|
|
3321
|
+
compact.charactersCount = m.charactersCount;
|
|
3322
|
+
compact.streamed = !!m.streamed;
|
|
3323
|
+
compact.speechId = m.speechId;
|
|
3324
|
+
}
|
|
3325
|
+
else if (m.type === 'stt_metrics') {
|
|
3326
|
+
compact.audioDurationMs = Math.round(m.audioDurationMs ?? -1);
|
|
3327
|
+
compact.durationMs = Math.round(m.durationMs ?? -1);
|
|
3328
|
+
compact.streamed = !!m.streamed;
|
|
3329
|
+
}
|
|
3330
|
+
else if (m.type === 'llm_metrics') {
|
|
3331
|
+
compact.ttftMs = Math.round(m.ttftMs ?? -1);
|
|
3332
|
+
compact.durationMs = Math.round(m.durationMs ?? -1);
|
|
3333
|
+
compact.cancelled = !!m.cancelled;
|
|
3334
|
+
compact.completionTokens = m.completionTokens;
|
|
3335
|
+
compact.promptTokens = m.promptTokens;
|
|
3336
|
+
compact.speechId = m.speechId;
|
|
3337
|
+
}
|
|
3338
|
+
else if (m.type === 'vad_metrics') {
|
|
3339
|
+
compact.idleTimeMs = Math.round(m.idleTimeMs ?? -1);
|
|
3340
|
+
compact.inferenceCount = m.inferenceCount;
|
|
3341
|
+
}
|
|
3342
|
+
else if (m.type === 'eou_metrics') {
|
|
3343
|
+
compact.endOfUtteranceDelayMs = Math.round(m.endOfUtteranceDelayMs ?? -1);
|
|
3344
|
+
compact.transcriptionDelayMs = Math.round(m.transcriptionDelayMs ?? -1);
|
|
3345
|
+
compact.onUserTurnCompletedDelayMs = Math.round(m.onUserTurnCompletedDelayMs ?? -1);
|
|
3346
|
+
compact.speechId = m.speechId;
|
|
3347
|
+
}
|
|
3348
|
+
else if (m.type === 'interruption_metrics') {
|
|
3349
|
+
compact.detectionDelay = Math.round(m.detectionDelay ?? -1);
|
|
3350
|
+
compact.predictionDuration = Math.round(m.predictionDuration ?? -1);
|
|
3351
|
+
compact.numInterruptions = m.numInterruptions;
|
|
3352
|
+
compact.numBackchannels = m.numBackchannels;
|
|
3353
|
+
compact.numRequests = m.numRequests;
|
|
3354
|
+
}
|
|
3355
|
+
console.log(`๐ [METRICS]`, JSON.stringify(compact));
|
|
3356
|
+
});
|
|
3357
|
+
// 0.9.71: function_tools_executed โ when a tool batch completes inside the SDK.
|
|
3358
|
+
sess.on('function_tools_executed', (ev) => {
|
|
3359
|
+
try {
|
|
3360
|
+
const calls = ev?.functionCalls?.length ?? 0;
|
|
3361
|
+
const outputs = ev?.functionOutputs?.length ?? 0;
|
|
3362
|
+
console.log(`๐ ๏ธ [TOOLS-EXECUTED] calls=${calls} outputs=${outputs} t=${new Date().toISOString()}`);
|
|
3363
|
+
}
|
|
3364
|
+
catch { }
|
|
3365
|
+
});
|
|
3366
|
+
// 0.9.68: mirror SDK's internal unrecoverable-error counters so we can
|
|
3367
|
+
// see EXACTLY how close we are to closeImpl() firing (default threshold 3).
|
|
3368
|
+
// Counter resets on each successful "speaking" transition (agent_session.js:740).
|
|
3369
|
+
let __ttsErrorCounter = 0;
|
|
3370
|
+
let __llmErrorCounter = 0;
|
|
3371
|
+
const __maxUnrecov = 3; // SDK default DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors
|
|
3186
3372
|
// Error handler
|
|
3187
3373
|
sess.on('error', (ev) => {
|
|
3188
3374
|
const msg = ev.error?.message || String(ev.error);
|
|
3375
|
+
const errType = ev.type || 'unknown';
|
|
3376
|
+
const recoverable = ev.recoverable;
|
|
3377
|
+
// 0.9.68: counter mirror โ increment for recoverable:false same as SDK does
|
|
3378
|
+
if (recoverable === false) {
|
|
3379
|
+
if (errType === 'tts_error')
|
|
3380
|
+
__ttsErrorCounter++;
|
|
3381
|
+
else if (errType === 'llm_error')
|
|
3382
|
+
__llmErrorCounter++;
|
|
3383
|
+
}
|
|
3384
|
+
const willCloseNext = (__ttsErrorCounter > __maxUnrecov || __llmErrorCounter > __maxUnrecov);
|
|
3385
|
+
console.log(`๐ [ERROR-COUNTER] type=${errType} recoverable=${recoverable} ttsErrorCount=${__ttsErrorCounter}/${__maxUnrecov} llmErrorCount=${__llmErrorCounter}/${__maxUnrecov} willCloseNext=${willCloseNext} t=${new Date().toISOString()}`);
|
|
3189
3386
|
// OpenAI race: voice queue collided with server-side VAD auto-response
|
|
3190
3387
|
if (msg.includes('conversation_already_has_active_response') || msg.includes('active_response')) {
|
|
3191
3388
|
console.log('โ ๏ธ OpenAI active response collision โ queue will retry on next listening state');
|
|
@@ -3198,6 +3395,15 @@ async function main() {
|
|
|
3198
3395
|
}
|
|
3199
3396
|
console.error('โ Session error:', ev.error);
|
|
3200
3397
|
});
|
|
3398
|
+
// 0.9.68: reset error counter mirror when SDK does (on speaking transition).
|
|
3399
|
+
// Reuses the existing agent_state_changed handler logic โ fires AFTER.
|
|
3400
|
+
sess.on('agent_state_changed', (ev) => {
|
|
3401
|
+
if (ev.newState === 'speaking' && (__ttsErrorCounter > 0 || __llmErrorCounter > 0)) {
|
|
3402
|
+
console.log(`๐ [COUNTER-RESET] speaking transition cleared ttsErrorCount=${__ttsErrorCounter}โ0 llmErrorCount=${__llmErrorCounter}โ0`);
|
|
3403
|
+
__ttsErrorCounter = 0;
|
|
3404
|
+
__llmErrorCounter = 0;
|
|
3405
|
+
}
|
|
3406
|
+
});
|
|
3201
3407
|
// Capture voice mode at session creation โ prevents state confusion
|
|
3202
3408
|
// if currentVoiceMode changes between session start and crash recovery
|
|
3203
3409
|
const sessionVoiceMode = currentVoiceMode;
|
package/dist/voice-io.js
CHANGED
|
@@ -137,7 +137,8 @@ export const DIRECT_MODE_STT = {
|
|
|
137
137
|
export const DIRECT_MODE_TTS = {
|
|
138
138
|
// provider: 'deepgram', model: 'aura-2-asteria-en', // WebSocket-based: handles TTS abort cleanly (no unrecoverable crash on interruption)
|
|
139
139
|
// provider: 'gemini', model: 'gemini-2.5-flash-preview-tts', voice: 'apollo',
|
|
140
|
-
provider: 'openai', model: 'tts-1', voice: 'fable',
|
|
140
|
+
// provider: 'openai', model: 'tts-1', voice: 'fable', // HTTP streaming: throws APIUserAbortError on interrupt โ unrecoverable session crash
|
|
141
|
+
provider: 'openai', model: 'tts-1-hd', voice: 'fable', // 0.9.70: test tts-1-hd โ tts-1 had chronic per-sentence HTTP hangs (40s SDK watchdog โ APIUserAbortError mid-message)
|
|
141
142
|
// provider: 'groq-orpheus', model: 'canopylabs/orpheus-v1-english', voice: 'autumn', // $22/M chars โ voices: autumn, diana, hannah, austin, daniel, troy
|
|
142
143
|
};
|
|
143
144
|
/**
|