osborn 0.9.50 → 0.9.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +151 -0
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -165,6 +165,30 @@ const livekitState = {
165
165
  lastAttemptAt: null,
166
166
  attemptCount: 0,
167
167
  };
168
+ // ── Room-presence lifecycle (2026-06-09) ──────────────────────────────────────
169
+ // The agent used to eager-connect to LiveKit on boot and hold the room for the
170
+ // machine's entire life. With 1 participant (the agent itself), LiveKit never
171
+ // considers the room empty, so it never closes — a single forgotten session
172
+ // burned 25h of connection-minutes (room osborn-jzs94j) before we caught it.
173
+ //
174
+ // Fix: the agent now LEAVES the LiveKit room when no user is present, and only
175
+ // rejoins when a user actually connects. Two triggers, both feeding room.disconnect():
176
+ // 1. Agent-side "alone" timer — armed in ParticipantDisconnected once a real
177
+ // session has ended; if no user rejoins within ALONE_GRACE_MS, the agent
178
+ // leaves on its own. This is tab-close-proof (does not depend on the
179
+ // frontend's JS still running — the exact gap that let the 25h room linger).
180
+ // 2. POST /leave-room — the frontend's explicit "leave" button leaves instantly.
181
+ // Rejoin happens via POST /connect-room (frontend connect flow) which re-runs the
182
+ // connect-with-retry loop.
183
+ //
184
+ // `intentionalLeave` distinguishes a voluntary leave from an involuntary LiveKit
185
+ // eviction. The ghost-agent fix in RoomEvent.Disconnected auto-rejoins on drop;
186
+ // that must NOT fire after a voluntary leave (it would recreate the burn we just
187
+ // stopped). The hooks below are populated by main() (which owns `room` and the
188
+ // connect-with-retry loop) so the module-level HTTP server can drive them.
189
+ let intentionalLeave = false;
190
+ let connectRoomHook = null;
191
+ let leaveRoomHook = null;
168
192
  function startApiServer(workingDir, port) {
169
193
  const server = createServer(async (req, res) => {
170
194
  // CORS headers for cloud frontend
@@ -282,6 +306,32 @@ function startApiServer(workingDir, port) {
282
306
  res.end(JSON.stringify({ roomCode: currentRoomCode }));
283
307
  return;
284
308
  }
309
+ // POST /connect-room — the frontend connect flow calls this so the agent
310
+ // joins LiveKit for an incoming user. Idempotent: if already connected it's
311
+ // a no-op. Must complete (agent in room) BEFORE the user joins, because the
312
+ // voice session is created from the ParticipantConnected event, which only
313
+ // fires for participants who join AFTER the agent. The frontend polls
314
+ // /health for livekit.status==='connected' before minting its token.
315
+ if (req.method === 'POST' && url.pathname === '/connect-room') {
316
+ res.writeHead(200, { 'Content-Type': 'application/json' });
317
+ res.end(JSON.stringify({ ok: true, status: livekitState.status }));
318
+ if (connectRoomHook) {
319
+ connectRoomHook().catch((e) => console.error('❌ /connect-room hook failed:', e));
320
+ }
321
+ return;
322
+ }
323
+ // POST /leave-room — the frontend's explicit "leave"/disconnect leaves the
324
+ // LiveKit room immediately so connection-minute burn stops the instant the
325
+ // user is done (no waiting for the agent-side alone timer). Sets
326
+ // intentionalLeave so the Disconnected handler does NOT auto-rejoin.
327
+ if (req.method === 'POST' && url.pathname === '/leave-room') {
328
+ res.writeHead(200, { 'Content-Type': 'application/json' });
329
+ res.end(JSON.stringify({ ok: true }));
330
+ if (leaveRoomHook) {
331
+ leaveRoomHook('frontend_leave').catch((e) => console.error('❌ /leave-room hook failed:', e));
332
+ }
333
+ return;
334
+ }
285
335
  // POST /restart — graceful process restart (process manager will restart)
286
336
  if (req.method === 'POST' && url.pathname === '/restart') {
287
337
  res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -1069,6 +1119,35 @@ async function main() {
1069
1119
  let currentSession = null;
1070
1120
  let currentAgent = null; // For updateChatCtx() context injection
1071
1121
  let currentLLM = null;
1122
+ // Agent-side "alone in room" leave timer (see Room-presence lifecycle note up
1123
+ // top). Armed in ParticipantDisconnected once a user has left; if no one
1124
+ // rejoins within the grace window the agent leaves LiveKit on its own.
1125
+ // Cancelled in ParticipantConnected. 3 min: long enough to ride out a brief
1126
+ // reconnect (page refresh, network blip), short enough that a forgotten
1127
+ // session costs ~3 min of connection-minutes instead of hours.
1128
+ let aloneTimer = null;
1129
+ const ALONE_GRACE_MS = 3 * 60 * 1000;
1130
+ // Arm (or re-arm) the alone timer: if no remote participant is present, leave
1131
+ // the LiveKit room after the grace window. Called on Connected (covers a
1132
+ // machine woken but then abandoned before the user joined) and on
1133
+ // ParticipantDisconnected (covers a finished session). Cancelled the moment a
1134
+ // user joins. Net invariant: the agent never holds an empty room beyond
1135
+ // ALONE_GRACE_MS, in any scenario — the root cause of the 25h burn.
1136
+ const armAloneTimer = () => {
1137
+ if (aloneTimer)
1138
+ clearTimeout(aloneTimer);
1139
+ aloneTimer = null;
1140
+ if (room.remoteParticipants.size > 0)
1141
+ return;
1142
+ aloneTimer = setTimeout(() => {
1143
+ aloneTimer = null;
1144
+ if (room.remoteParticipants.size === 0 && livekitState.status === 'connected') {
1145
+ console.log(`🕊️ Alone in room ${ALONE_GRACE_MS / 1000}s — leaving LiveKit to stop connection-minute burn`);
1146
+ intentionalLeave = true;
1147
+ room.disconnect().catch((e) => console.error('alone-leave room.disconnect failed:', e));
1148
+ }
1149
+ }, ALONE_GRACE_MS);
1150
+ };
1072
1151
  /**
1073
1152
  * Hard-kill the in-flight Claude SDK query AND the persistent subprocess.
1074
1153
  *
@@ -1092,6 +1171,21 @@ async function main() {
1092
1171
  return;
1093
1172
  try {
1094
1173
  const llm = currentLLM;
1174
+ // Heap-OOM fix (2026-06-02): stop the PipelineDirectLLM summary-index
1175
+ // watcher BEFORE we abort + drop the reference. The watcher is a 10s
1176
+ // setInterval whose closure retains the entire PipelineDirectLLM →
1177
+ // ClaudeLLM object graph. killCurrentLLM is the single chokepoint all
1178
+ // three cleanup sites (Disconnected, previous-session-cleanup,
1179
+ // ParticipantDisconnected) call, but it previously only aborted the
1180
+ // SDK subprocess — leaving the interval (and the whole graph) alive and
1181
+ // uncollectable on every disconnect/reconnect. A reconnect-heavy session
1182
+ // (e.g. 15 reconnects from a frontend redeploy) leaked 15 timers + 15
1183
+ // retained graphs, each re-reading JSONL every 10s, until the node heap
1184
+ // OOM'd (~980MB) and the process crashed. Stopping the watcher here lets
1185
+ // the abandoned graph be GC'd. Duck-typed: only PipelineDirectLLM has it.
1186
+ if (typeof llm.stopIndexWatcher === 'function') {
1187
+ llm.stopIndexWatcher();
1188
+ }
1095
1189
  if (typeof llm.abortQuery === 'function') {
1096
1190
  llm.abortQuery();
1097
1191
  }
@@ -2490,6 +2584,10 @@ async function main() {
2490
2584
  room.on(RoomEvent.Connected, () => {
2491
2585
  console.log('✅ Connected to room:', roomName);
2492
2586
  localParticipant = room.localParticipant;
2587
+ // Arm the alone timer: if we connected but no user joins within the grace
2588
+ // window (e.g. machine woken then abandoned mid-handshake), leave the room
2589
+ // rather than hold it indefinitely. Cancelled in ParticipantConnected.
2590
+ armAloneTimer();
2493
2591
  });
2494
2592
  // NOTE: previously this section also had a RoomEvent.ActiveSpeakersChanged
2495
2593
  // handler that interrupted TTS on any sustained audio activity (~50ms after
@@ -2544,6 +2642,20 @@ async function main() {
2544
2642
  //
2545
2643
  // Note: we mark status='retrying' immediately so /health reflects the real
2546
2644
  // state — closing the lie window between Disconnected and the next attempt.
2645
+ // ── Voluntary-leave guard (2026-06-09) ──
2646
+ // If we left the room ON PURPOSE (user clicked leave → /leave-room, or the
2647
+ // agent-side alone timer fired), do NOT auto-rejoin — rejoining would
2648
+ // recreate the connection-minute burn we just stopped. Mark the connection
2649
+ // 'idle' (machine stays warm, /health still 200) and wait for the next
2650
+ // /connect-room. Reset the flag so a later involuntary drop still rejoins.
2651
+ if (intentionalLeave) {
2652
+ intentionalLeave = false;
2653
+ livekitState.status = 'idle';
2654
+ livekitState.error = null;
2655
+ livekitState.errorCode = null;
2656
+ console.log('🕊️ Left LiveKit room intentionally — idle, awaiting /connect-room (no auto-rejoin)');
2657
+ return;
2658
+ }
2547
2659
  livekitState.status = 'retrying';
2548
2660
  livekitState.error = 'LiveKit room disconnected; attempting to rejoin';
2549
2661
  livekitState.errorCode = 'disconnected';
@@ -2554,6 +2666,11 @@ async function main() {
2554
2666
  });
2555
2667
  room.on(RoomEvent.ParticipantConnected, async (participant) => {
2556
2668
  console.log(`\n👤 User joined: ${participant.identity}`);
2669
+ // A user is present — cancel any pending agent-side "alone" leave.
2670
+ if (aloneTimer) {
2671
+ clearTimeout(aloneTimer);
2672
+ aloneTimer = null;
2673
+ }
2557
2674
  // Wait for previous session's byte stream handler to fully deregister.
2558
2675
  // Quick reconnects (< ~6s) crash with "byte stream handler already set" without this.
2559
2676
  if (pendingSessionClose) {
@@ -3276,6 +3393,10 @@ async function main() {
3276
3393
  activeMeetingBotId = null;
3277
3394
  }
3278
3395
  }
3396
+ // Arm the agent-side "alone" leave timer (tab-close-proof — runs on the
3397
+ // agent, not the frontend, so it fires even if the user closed the tab
3398
+ // without clicking leave).
3399
+ armAloneTimer();
3279
3400
  console.log('⏳ Waiting for new user...\n');
3280
3401
  });
3281
3402
  room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
@@ -4077,6 +4198,36 @@ async function main() {
4077
4198
  }
4078
4199
  }
4079
4200
  };
4201
+ // Wire the module-level HTTP control hooks now that `room` and the
4202
+ // connect-with-retry loop exist (see Room-presence lifecycle note up top).
4203
+ // The /connect-room and /leave-room endpoints in startApiServer call these.
4204
+ connectRoomHook = async () => {
4205
+ intentionalLeave = false;
4206
+ if (aloneTimer) {
4207
+ clearTimeout(aloneTimer);
4208
+ aloneTimer = null;
4209
+ }
4210
+ if (livekitState.status === 'connected')
4211
+ return; // already in the room — no-op
4212
+ console.log('🔌 /connect-room — joining LiveKit for incoming user');
4213
+ await connectWithRetry();
4214
+ };
4215
+ leaveRoomHook = async (reason) => {
4216
+ if (aloneTimer) {
4217
+ clearTimeout(aloneTimer);
4218
+ aloneTimer = null;
4219
+ }
4220
+ if (livekitState.status !== 'connected')
4221
+ return; // already out — no-op
4222
+ intentionalLeave = true;
4223
+ console.log(`🚪 Leaving LiveKit room (${reason}) — stops connection-minute burn`);
4224
+ try {
4225
+ await room.disconnect();
4226
+ }
4227
+ catch (e) {
4228
+ console.error('leave-room room.disconnect failed:', e);
4229
+ }
4230
+ };
4080
4231
  // Fire and forget; the retry loop keeps the process alive on its own (so
4081
4232
  // we don't need the explicit `new Promise(() => {})` keepalive anymore).
4082
4233
  // Errors that escape the retry loop should never happen, but if they do,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.50",
3
+ "version": "0.9.52",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {