osborn 0.9.51 → 0.9.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +198 -6
- package/package.json +9 -9
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Load environment variables FIRST before any other imports
|
|
2
2
|
import 'dotenv/config';
|
|
3
3
|
import { voice, initializeLogger } from '@livekit/agents';
|
|
4
|
-
import { Room, RoomEvent, } from '@livekit/rtc-node';
|
|
4
|
+
import { Room, RoomEvent, RemoteParticipant, } from '@livekit/rtc-node';
|
|
5
5
|
import { AccessToken } from 'livekit-server-sdk';
|
|
6
6
|
// Initialize logger before anything else
|
|
7
7
|
initializeLogger({ pretty: true, level: 'info' });
|
|
@@ -165,6 +165,30 @@ const livekitState = {
|
|
|
165
165
|
lastAttemptAt: null,
|
|
166
166
|
attemptCount: 0,
|
|
167
167
|
};
|
|
168
|
+
// ── Room-presence lifecycle (2026-06-09) ──────────────────────────────────────
|
|
169
|
+
// The agent used to eager-connect to LiveKit on boot and hold the room for the
|
|
170
|
+
// machine's entire life. With 1 participant (the agent itself), LiveKit never
|
|
171
|
+
// considers the room empty, so it never closes — a single forgotten session
|
|
172
|
+
// burned 25h of connection-minutes (room osborn-jzs94j) before we caught it.
|
|
173
|
+
//
|
|
174
|
+
// Fix: the agent now LEAVES the LiveKit room when no user is present, and only
|
|
175
|
+
// rejoins when a user actually connects. Two triggers, both feeding room.disconnect():
|
|
176
|
+
// 1. Agent-side "alone" timer — armed in ParticipantDisconnected once a real
|
|
177
|
+
// session has ended; if no user rejoins within ALONE_GRACE_MS, the agent
|
|
178
|
+
// leaves on its own. This is tab-close-proof (does not depend on the
|
|
179
|
+
// frontend's JS still running — the exact gap that let the 25h room linger).
|
|
180
|
+
// 2. POST /leave-room — the frontend's explicit "leave" button leaves instantly.
|
|
181
|
+
// Rejoin happens via POST /connect-room (frontend connect flow) which re-runs the
|
|
182
|
+
// connect-with-retry loop.
|
|
183
|
+
//
|
|
184
|
+
// `intentionalLeave` distinguishes a voluntary leave from an involuntary LiveKit
|
|
185
|
+
// eviction. The ghost-agent fix in RoomEvent.Disconnected auto-rejoins on drop;
|
|
186
|
+
// that must NOT fire after a voluntary leave (it would recreate the burn we just
|
|
187
|
+
// stopped). The hooks below are populated by main() (which owns `room` and the
|
|
188
|
+
// connect-with-retry loop) so the module-level HTTP server can drive them.
|
|
189
|
+
let intentionalLeave = false;
|
|
190
|
+
let connectRoomHook = null;
|
|
191
|
+
let leaveRoomHook = null;
|
|
168
192
|
function startApiServer(workingDir, port) {
|
|
169
193
|
const server = createServer(async (req, res) => {
|
|
170
194
|
// CORS headers for cloud frontend
|
|
@@ -282,6 +306,32 @@ function startApiServer(workingDir, port) {
|
|
|
282
306
|
res.end(JSON.stringify({ roomCode: currentRoomCode }));
|
|
283
307
|
return;
|
|
284
308
|
}
|
|
309
|
+
// POST /connect-room — the frontend connect flow calls this so the agent
|
|
310
|
+
// joins LiveKit for an incoming user. Idempotent: if already connected it's
|
|
311
|
+
// a no-op. Must complete (agent in room) BEFORE the user joins, because the
|
|
312
|
+
// voice session is created from the ParticipantConnected event, which only
|
|
313
|
+
// fires for participants who join AFTER the agent. The frontend polls
|
|
314
|
+
// /health for livekit.status==='connected' before minting its token.
|
|
315
|
+
if (req.method === 'POST' && url.pathname === '/connect-room') {
|
|
316
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
317
|
+
res.end(JSON.stringify({ ok: true, status: livekitState.status }));
|
|
318
|
+
if (connectRoomHook) {
|
|
319
|
+
connectRoomHook().catch((e) => console.error('❌ /connect-room hook failed:', e));
|
|
320
|
+
}
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
// POST /leave-room — the frontend's explicit "leave"/disconnect leaves the
|
|
324
|
+
// LiveKit room immediately so connection-minute burn stops the instant the
|
|
325
|
+
// user is done (no waiting for the agent-side alone timer). Sets
|
|
326
|
+
// intentionalLeave so the Disconnected handler does NOT auto-rejoin.
|
|
327
|
+
if (req.method === 'POST' && url.pathname === '/leave-room') {
|
|
328
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
329
|
+
res.end(JSON.stringify({ ok: true }));
|
|
330
|
+
if (leaveRoomHook) {
|
|
331
|
+
leaveRoomHook('frontend_leave').catch((e) => console.error('❌ /leave-room hook failed:', e));
|
|
332
|
+
}
|
|
333
|
+
return;
|
|
334
|
+
}
|
|
285
335
|
// POST /restart — graceful process restart (process manager will restart)
|
|
286
336
|
if (req.method === 'POST' && url.pathname === '/restart') {
|
|
287
337
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
@@ -1069,6 +1119,35 @@ async function main() {
|
|
|
1069
1119
|
let currentSession = null;
|
|
1070
1120
|
let currentAgent = null; // For updateChatCtx() context injection
|
|
1071
1121
|
let currentLLM = null;
|
|
1122
|
+
// Agent-side "alone in room" leave timer (see Room-presence lifecycle note up
|
|
1123
|
+
// top). Armed in ParticipantDisconnected once a user has left; if no one
|
|
1124
|
+
// rejoins within the grace window the agent leaves LiveKit on its own.
|
|
1125
|
+
// Cancelled in ParticipantConnected. 3 min: long enough to ride out a brief
|
|
1126
|
+
// reconnect (page refresh, network blip), short enough that a forgotten
|
|
1127
|
+
// session costs ~3 min of connection-minutes instead of hours.
|
|
1128
|
+
let aloneTimer = null;
|
|
1129
|
+
const ALONE_GRACE_MS = 3 * 60 * 1000;
|
|
1130
|
+
// Arm (or re-arm) the alone timer: if no remote participant is present, leave
|
|
1131
|
+
// the LiveKit room after the grace window. Called on Connected (covers a
|
|
1132
|
+
// machine woken but then abandoned before the user joined) and on
|
|
1133
|
+
// ParticipantDisconnected (covers a finished session). Cancelled the moment a
|
|
1134
|
+
// user joins. Net invariant: the agent never holds an empty room beyond
|
|
1135
|
+
// ALONE_GRACE_MS, in any scenario — the root cause of the 25h burn.
|
|
1136
|
+
const armAloneTimer = () => {
|
|
1137
|
+
if (aloneTimer)
|
|
1138
|
+
clearTimeout(aloneTimer);
|
|
1139
|
+
aloneTimer = null;
|
|
1140
|
+
if (room.remoteParticipants.size > 0)
|
|
1141
|
+
return;
|
|
1142
|
+
aloneTimer = setTimeout(() => {
|
|
1143
|
+
aloneTimer = null;
|
|
1144
|
+
if (room.remoteParticipants.size === 0 && livekitState.status === 'connected') {
|
|
1145
|
+
console.log(`🕊️ Alone in room ${ALONE_GRACE_MS / 1000}s — leaving LiveKit to stop connection-minute burn`);
|
|
1146
|
+
intentionalLeave = true;
|
|
1147
|
+
room.disconnect().catch((e) => console.error('alone-leave room.disconnect failed:', e));
|
|
1148
|
+
}
|
|
1149
|
+
}, ALONE_GRACE_MS);
|
|
1150
|
+
};
|
|
1072
1151
|
/**
|
|
1073
1152
|
* Hard-kill the in-flight Claude SDK query AND the persistent subprocess.
|
|
1074
1153
|
*
|
|
@@ -1126,6 +1205,26 @@ async function main() {
|
|
|
1126
1205
|
// Session-level always-allow list: paths the user has approved for this session without prompting
|
|
1127
1206
|
let sessionAlwaysAllowPaths = new Set();
|
|
1128
1207
|
let userState = 'listening'; // Track user speech state for queue safety
|
|
1208
|
+
// Leading-edge debounce for the TTS interrupt below — restores the same
|
|
1209
|
+
// anti-flap protection the removed ActiveSpeakersChanged handler had pre-0.9.39
|
|
1210
|
+
// (May 21 / c345c98). Wall-clock timestamp + ms compare; no setTimeout, no
|
|
1211
|
+
// promise, no new API. Suppresses repeat interrupts within the window so a
|
|
1212
|
+
// single user-input transition fires at most one interrupt() call per second.
|
|
1213
|
+
// Without it, TTS echo bleeding through the mic causes user_state to oscillate
|
|
1214
|
+
// speaking ↔ listening across rapid Deepgram frames, each transition firing a
|
|
1215
|
+
// fresh interrupt — and even after 1.4.x's stricter error classification, the
|
|
1216
|
+
// first one survives but the cascade kills the session.
|
|
1217
|
+
let lastInterruptAt = 0;
|
|
1218
|
+
// Self-echo guard for the TTS interrupt below. Updated by the
|
|
1219
|
+
// ActiveSpeakersChanged listener registered near the other room.on(...) handlers.
|
|
1220
|
+
// user_state_changed carries NO speaker identity (verified against the SDK type
|
|
1221
|
+
// — UserStateChangedEvent has only oldState/newState/createdAt), so a separate
|
|
1222
|
+
// remote-speaker timestamp is the only way to distinguish "real user spoke" from
|
|
1223
|
+
// "agent's own TTS echoed through the mic". Independent producer: rtc-node
|
|
1224
|
+
// emits activeSpeakersChanged from server WebRTC audio-level reports
|
|
1225
|
+
// (room.js:213), with NO reference to AgentSession or STT — so there's no
|
|
1226
|
+
// dependency loop with user_state_changed's STT-driven producer.
|
|
1227
|
+
let lastRemoteSpeakerAt = 0;
|
|
1129
1228
|
let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
|
|
1130
1229
|
let currentProvider = realtimeConfig.provider; // Track active realtime provider
|
|
1131
1230
|
// Authenticated Supabase userId from participant metadata. Used to scope
|
|
@@ -2505,6 +2604,24 @@ async function main() {
|
|
|
2505
2604
|
room.on(RoomEvent.Connected, () => {
|
|
2506
2605
|
console.log('✅ Connected to room:', roomName);
|
|
2507
2606
|
localParticipant = room.localParticipant;
|
|
2607
|
+
// Arm the alone timer: if we connected but no user joins within the grace
|
|
2608
|
+
// window (e.g. machine woken then abandoned mid-handshake), leave the room
|
|
2609
|
+
// rather than hold it indefinitely. Cancelled in ParticipantConnected.
|
|
2610
|
+
armAloneTimer();
|
|
2611
|
+
});
|
|
2612
|
+
// Self-echo guard producer. Server WebRTC audio-level reports drive this
|
|
2613
|
+
// (rtc-node room.js:213, ~50-100ms latency from mic onset — faster than
|
|
2614
|
+
// Deepgram STT classification, so by the time user_state_changed fires
|
|
2615
|
+
// lastRemoteSpeakerAt is already current). Filter speakers to RemoteParticipant
|
|
2616
|
+
// — LocalParticipant is the agent itself and including it would defeat the
|
|
2617
|
+
// whole point (the echo we're guarding against IS the agent's local audio).
|
|
2618
|
+
// This is the speaker-identity filter the removed ActiveSpeakersChanged
|
|
2619
|
+
// handler had (May 21 / c345c98) — minus the interrupt() call, since the
|
|
2620
|
+
// user_state_changed handler now owns interrupt firing.
|
|
2621
|
+
room.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
|
|
2622
|
+
if (speakers.some((s) => s instanceof RemoteParticipant)) {
|
|
2623
|
+
lastRemoteSpeakerAt = Date.now();
|
|
2624
|
+
}
|
|
2508
2625
|
});
|
|
2509
2626
|
// NOTE: previously this section also had a RoomEvent.ActiveSpeakersChanged
|
|
2510
2627
|
// handler that interrupted TTS on any sustained audio activity (~50ms after
|
|
@@ -2559,6 +2676,20 @@ async function main() {
|
|
|
2559
2676
|
//
|
|
2560
2677
|
// Note: we mark status='retrying' immediately so /health reflects the real
|
|
2561
2678
|
// state — closing the lie window between Disconnected and the next attempt.
|
|
2679
|
+
// ── Voluntary-leave guard (2026-06-09) ──
|
|
2680
|
+
// If we left the room ON PURPOSE (user clicked leave → /leave-room, or the
|
|
2681
|
+
// agent-side alone timer fired), do NOT auto-rejoin — rejoining would
|
|
2682
|
+
// recreate the connection-minute burn we just stopped. Mark the connection
|
|
2683
|
+
// 'idle' (machine stays warm, /health still 200) and wait for the next
|
|
2684
|
+
// /connect-room. Reset the flag so a later involuntary drop still rejoins.
|
|
2685
|
+
if (intentionalLeave) {
|
|
2686
|
+
intentionalLeave = false;
|
|
2687
|
+
livekitState.status = 'idle';
|
|
2688
|
+
livekitState.error = null;
|
|
2689
|
+
livekitState.errorCode = null;
|
|
2690
|
+
console.log('🕊️ Left LiveKit room intentionally — idle, awaiting /connect-room (no auto-rejoin)');
|
|
2691
|
+
return;
|
|
2692
|
+
}
|
|
2562
2693
|
livekitState.status = 'retrying';
|
|
2563
2694
|
livekitState.error = 'LiveKit room disconnected; attempting to rejoin';
|
|
2564
2695
|
livekitState.errorCode = 'disconnected';
|
|
@@ -2569,6 +2700,11 @@ async function main() {
|
|
|
2569
2700
|
});
|
|
2570
2701
|
room.on(RoomEvent.ParticipantConnected, async (participant) => {
|
|
2571
2702
|
console.log(`\n👤 User joined: ${participant.identity}`);
|
|
2703
|
+
// A user is present — cancel any pending agent-side "alone" leave.
|
|
2704
|
+
if (aloneTimer) {
|
|
2705
|
+
clearTimeout(aloneTimer);
|
|
2706
|
+
aloneTimer = null;
|
|
2707
|
+
}
|
|
2572
2708
|
// Wait for previous session's byte stream handler to fully deregister.
|
|
2573
2709
|
// Quick reconnects (< ~6s) crash with "byte stream handler already set" without this.
|
|
2574
2710
|
if (pendingSessionClose) {
|
|
@@ -2852,12 +2988,34 @@ async function main() {
|
|
|
2852
2988
|
userState = ev.newState;
|
|
2853
2989
|
console.log(`👤 User state: ${prev} → ${ev.newState} (agent: ${agentState})`);
|
|
2854
2990
|
if (ev.newState === 'speaking' && agentState === 'speaking' && sessionVoiceMode !== 'realtime') {
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2991
|
+
const now = Date.now();
|
|
2992
|
+
// Self-echo guard FIRST. Reject this trigger entirely if no remote
|
|
2993
|
+
// participant has been heard speaking in the last 500ms — at that
|
|
2994
|
+
// point user_state=speaking is almost certainly TTS bleeding through
|
|
2995
|
+
// the mic (Deepgram correctly identifies it as "speech", we add the
|
|
2996
|
+
// identity filter the high-level event lacks). 500ms is wider than
|
|
2997
|
+
// the ~50-300ms gap between ActiveSpeakersChanged and user_state_changed
|
|
2998
|
+
// firing, so a real user is comfortably inside the window.
|
|
2999
|
+
if (now - lastRemoteSpeakerAt > 500) {
|
|
3000
|
+
console.log('🔇 Skipping interrupt — no recent remote-speaker activity (self-echo guard)');
|
|
3001
|
+
return;
|
|
2858
3002
|
}
|
|
2859
|
-
|
|
2860
|
-
|
|
3003
|
+
// Leading-edge 1s debounce — verbatim shape of the removed
|
|
3004
|
+
// ActiveSpeakersChanged handler's anti-flap (see lastInterruptAt
|
|
3005
|
+
// declaration). Belt + suspenders with the self-echo guard above.
|
|
3006
|
+
const debounced = now - lastInterruptAt < 1000;
|
|
3007
|
+
lastInterruptAt = now;
|
|
3008
|
+
if (debounced) {
|
|
3009
|
+
console.log('🔇 user-state interrupt debounced (< 1s since last)');
|
|
3010
|
+
}
|
|
3011
|
+
else {
|
|
3012
|
+
try {
|
|
3013
|
+
console.log('🎤 user_state_changed=speaking + agent speaking + remote-speaker confirmed → interrupting TTS');
|
|
3014
|
+
currentSession?.interrupt();
|
|
3015
|
+
}
|
|
3016
|
+
catch (err) {
|
|
3017
|
+
console.warn('⚠️ user-state interrupt failed:', err instanceof Error ? err.message : err);
|
|
3018
|
+
}
|
|
2861
3019
|
}
|
|
2862
3020
|
}
|
|
2863
3021
|
// When user stops speaking, retry voice queue — items may be waiting
|
|
@@ -3291,6 +3449,10 @@ async function main() {
|
|
|
3291
3449
|
activeMeetingBotId = null;
|
|
3292
3450
|
}
|
|
3293
3451
|
}
|
|
3452
|
+
// Arm the agent-side "alone" leave timer (tab-close-proof — runs on the
|
|
3453
|
+
// agent, not the frontend, so it fires even if the user closed the tab
|
|
3454
|
+
// without clicking leave).
|
|
3455
|
+
armAloneTimer();
|
|
3294
3456
|
console.log('⏳ Waiting for new user...\n');
|
|
3295
3457
|
});
|
|
3296
3458
|
room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
|
|
@@ -4092,6 +4254,36 @@ async function main() {
|
|
|
4092
4254
|
}
|
|
4093
4255
|
}
|
|
4094
4256
|
};
|
|
4257
|
+
// Wire the module-level HTTP control hooks now that `room` and the
|
|
4258
|
+
// connect-with-retry loop exist (see Room-presence lifecycle note up top).
|
|
4259
|
+
// The /connect-room and /leave-room endpoints in startApiServer call these.
|
|
4260
|
+
connectRoomHook = async () => {
|
|
4261
|
+
intentionalLeave = false;
|
|
4262
|
+
if (aloneTimer) {
|
|
4263
|
+
clearTimeout(aloneTimer);
|
|
4264
|
+
aloneTimer = null;
|
|
4265
|
+
}
|
|
4266
|
+
if (livekitState.status === 'connected')
|
|
4267
|
+
return; // already in the room — no-op
|
|
4268
|
+
console.log('🔌 /connect-room — joining LiveKit for incoming user');
|
|
4269
|
+
await connectWithRetry();
|
|
4270
|
+
};
|
|
4271
|
+
leaveRoomHook = async (reason) => {
|
|
4272
|
+
if (aloneTimer) {
|
|
4273
|
+
clearTimeout(aloneTimer);
|
|
4274
|
+
aloneTimer = null;
|
|
4275
|
+
}
|
|
4276
|
+
if (livekitState.status !== 'connected')
|
|
4277
|
+
return; // already out — no-op
|
|
4278
|
+
intentionalLeave = true;
|
|
4279
|
+
console.log(`🚪 Leaving LiveKit room (${reason}) — stops connection-minute burn`);
|
|
4280
|
+
try {
|
|
4281
|
+
await room.disconnect();
|
|
4282
|
+
}
|
|
4283
|
+
catch (e) {
|
|
4284
|
+
console.error('leave-room room.disconnect failed:', e);
|
|
4285
|
+
}
|
|
4286
|
+
};
|
|
4095
4287
|
// Fire and forget; the retry loop keeps the process alive on its own (so
|
|
4096
4288
|
// we don't need the explicit `new Promise(() => {})` keepalive anymore).
|
|
4097
4289
|
// Errors that escape the retry loop should never happen, but if they do,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "osborn",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.53",
|
|
4
4
|
"description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -33,14 +33,14 @@
|
|
|
33
33
|
"@anthropic-ai/claude-agent-sdk": "^0.2.91",
|
|
34
34
|
"@anthropic-ai/sdk": "^0.80.0",
|
|
35
35
|
"@google/genai": "^1.0.0",
|
|
36
|
-
"@livekit/agents": "
|
|
37
|
-
"@livekit/agents-plugin-deepgram": "
|
|
38
|
-
"@livekit/agents-plugin-elevenlabs": "
|
|
39
|
-
"@livekit/agents-plugin-google": "
|
|
40
|
-
"@livekit/agents-plugin-livekit": "
|
|
41
|
-
"@livekit/agents-plugin-openai": "
|
|
42
|
-
"@livekit/agents-plugin-silero": "
|
|
43
|
-
"@livekit/rtc-node": "
|
|
36
|
+
"@livekit/agents": "1.2.1",
|
|
37
|
+
"@livekit/agents-plugin-deepgram": "1.2.1",
|
|
38
|
+
"@livekit/agents-plugin-elevenlabs": "1.2.1",
|
|
39
|
+
"@livekit/agents-plugin-google": "1.2.1",
|
|
40
|
+
"@livekit/agents-plugin-livekit": "1.2.1",
|
|
41
|
+
"@livekit/agents-plugin-openai": "1.2.1",
|
|
42
|
+
"@livekit/agents-plugin-silero": "1.2.1",
|
|
43
|
+
"@livekit/rtc-node": "0.13.24",
|
|
44
44
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
45
45
|
"@openai/codex-sdk": "^0.77.0",
|
|
46
46
|
"@smithery/api": "^0.48.0",
|