bloby-bot 0.53.1 → 0.53.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.53.1",
3
+ "version": "0.53.3",
4
4
  "releaseNotes": [
5
5
  "1. New Morphy animation system: config-driven sprites loaded from /morphy/*.json",
6
6
  "2. Swapped teleporting (splash) and headphones (bubble + chat) to the new format",
@@ -131,21 +131,52 @@ export function stopBackend(): Promise<void> {
131
131
  const dying = child;
132
132
  child = null;
133
133
 
134
- stopPromise = new Promise<void>((resolve) => {
135
- dying.once('exit', () => {
136
- stopPromise = null;
134
+ const promise = new Promise<void>((resolve) => {
135
+ let killTimer: ReturnType<typeof setTimeout> | null = null;
136
+ let finished = false;
137
+ const done = () => {
138
+ if (finished) return; // exit + SIGKILL paths can both fire; run once
139
+ finished = true;
140
+ if (killTimer) clearTimeout(killTimer);
141
+ // Only release the shared guard if it still points at THIS stop. A later stopBackend()
142
+ // may already have installed its own promise; the 3s safety timer (or a late exit) must
143
+ // never null a *different* stop's guard — that would make isBackendStopping() lie and let
144
+ // a concurrent spawn race the in-flight kill for the port.
145
+ if (stopPromise === promise) stopPromise = null;
137
146
  resolve();
138
- });
147
+ };
148
+ dying.once('exit', done);
139
149
  dying.kill();
140
- // Safety: force kill after 3s if SIGTERM doesn't work
141
- setTimeout(() => {
142
- try { dying.kill('SIGKILL'); } catch {}
143
- stopPromise = null;
144
- resolve();
145
- }, 3000);
150
+ // Safety: force kill after 3s if SIGTERM doesn't land.
151
+ killTimer = setTimeout(() => { try { dying.kill('SIGKILL'); } catch {} done(); }, 3000);
146
152
  });
153
+ stopPromise = promise;
154
+
155
+ return promise;
156
+ }
147
157
 
148
- return stopPromise;
158
+ let restartInFlight: Promise<void> | null = null;
159
+ let rerunRequested = false;
160
+
161
+ /** Serialized + coalescing backend restart — the single funnel for every deliberate restart
162
+ * (file watcher, turn-complete, scheduler pulse, channel manager). Concurrent callers share
163
+ * one in-flight restart; a request that arrives mid-restart triggers exactly one more
164
+ * stop→spawn cycle afterward, so the final backend was spawned after the latest request. This
165
+ * removes the double-spawn-onto-contended-port race of independent stopBackend().then(spawn) chains. */
166
+ export function restartBackend(port: number): Promise<void> {
167
+ if (restartInFlight) {
168
+ rerunRequested = true;
169
+ return restartInFlight;
170
+ }
171
+ restartInFlight = (async () => {
172
+ do {
173
+ rerunRequested = false;
174
+ resetBackendRestarts();
175
+ await stopBackend();
176
+ spawnBackend(port);
177
+ } while (rerunRequested);
178
+ })().finally(() => { restartInFlight = null; });
179
+ return restartInFlight;
149
180
  }
150
181
 
151
182
  export function isBackendAlive(): boolean {
@@ -86,6 +86,12 @@ export function isConversationBusy(conversationId: string): boolean {
86
86
  return Object.values(HARNESSES).some((h) => h.isConversationBusy(conversationId));
87
87
  }
88
88
 
89
+ /** True if ANY conversation in ANY harness is mid-turn. Lets the supervisor defer backend
90
+ * restarts during channel/Alexa turns, which don't set the dashboard's agentQueryActive flag. */
91
+ export function anyConversationBusy(): boolean {
92
+ return Object.values(HARNESSES).some((h) => h.anyConversationBusy());
93
+ }
94
+
89
95
  export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
90
96
  for (const h of Object.values(HARNESSES)) {
91
97
  if (h.hasConversation(conversationId)) {
@@ -538,6 +538,13 @@ export function isConversationBusy(conversationId: string): boolean {
538
538
  return liveConversations.get(conversationId)?.busy || false;
539
539
  }
540
540
 
541
+ /** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
542
+ * backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
543
+ export function anyConversationBusy(): boolean {
544
+ for (const c of liveConversations.values()) if (c.busy) return true;
545
+ return false;
546
+ }
547
+
541
548
  /** Stop a specific background sub-agent task */
542
549
  export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
543
550
  const conv = liveConversations.get(conversationId);
@@ -365,7 +365,14 @@ async function startTurn(conv: CodexConversation, content: string, savedFiles?:
365
365
  await conv.rpc.request('turn/start', params);
366
366
  } catch (err: any) {
367
367
  conv.busy = false;
368
+ conv.currentTurnId = null;
368
369
  conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
370
+ // turn/start produced no turn, so no turn/completed will arrive to clear the supervisor's
371
+ // agentQueryActive (set on bot:typing above). Left as-is, that wedges true forever:
372
+ // backend auto-heal is deferred indefinitely and chat is stuck showing "typing". Tear the
373
+ // conversation down so bot:conversation-ended fires (which, unlike bot:turn-complete, does
374
+ // NOT trigger a backend restart) — the next user message cold-starts a fresh thread.
375
+ teardownConversation(conv.id);
369
376
  }
370
377
  }
371
378
 
@@ -633,6 +640,13 @@ export function isConversationBusy(conversationId: string): boolean {
633
640
  return conversations.get(conversationId)?.busy ?? false;
634
641
  }
635
642
 
643
+ /** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
644
+ * backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
645
+ export function anyConversationBusy(): boolean {
646
+ for (const c of conversations.values()) if (c.busy) return true;
647
+ return false;
648
+ }
649
+
636
650
  export async function startConversation(
637
651
  conversationId: string,
638
652
  model: string,
@@ -320,6 +320,13 @@ export function isConversationBusy(conversationId: string): boolean {
320
320
  return liveConversations.get(conversationId)?.busy || false;
321
321
  }
322
322
 
323
+ /** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
324
+ * backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
325
+ export function anyConversationBusy(): boolean {
326
+ for (const c of liveConversations.values()) if (c.busy) return true;
327
+ return false;
328
+ }
329
+
323
330
  /** Pi has no sub-agents yet; provided for interface compatibility. */
324
331
  export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
325
332
  // no-op for Phase 1
@@ -220,8 +220,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
220
220
  if (toolUses.length === 0 && !pendingInterleave) break;
221
221
  }
222
222
 
223
- if (!turnErrored) {
224
- if (accumulatedText) {
223
+ // Emit text_end only on a clean turn (don't persist a half-baked answer from an errored
224
+ // turn). But ALWAYS emit turn_complete on a non-aborted turn — including the errored path
225
+ // — so the supervisor clears agentQueryActive (set on turn_started). Skipping it on error
226
+ // wedged the flag true: backend auto-heal stayed deferred and chat stuck in "typing" until
227
+ // the next successful turn. The 'error' event was already emitted by runOneRound, so the
228
+ // user still sees the failure. Aborted turns are torn down via bot:conversation-ended.
229
+ if (!init.abortController.signal.aborted) {
230
+ if (!turnErrored && accumulatedText) {
225
231
  init.onEvent({ type: 'text_end', text: accumulatedText });
226
232
  }
227
233
  const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
@@ -238,6 +244,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
238
244
  } catch (err: any) {
239
245
  log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
240
246
  init.onEvent({ type: 'error', error: err?.message || String(err) });
247
+ // A thrown turn emitted no turn_complete either — clear agentQueryActive so auto-heal
248
+ // and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
249
+ // usedFileTools=false is the safe default (it only governs whether to auto-restart now).
250
+ if (!init.abortController.signal.aborted) {
251
+ init.onEvent({ type: 'turn_complete', usedFileTools: false });
252
+ }
241
253
  }
242
254
  }
243
255
  },
@@ -57,6 +57,8 @@ export interface Harness {
57
57
  endConversation(conversationId: string): void;
58
58
  endAllConversations(): void;
59
59
  isConversationBusy(conversationId: string): boolean;
60
+ /** True if ANY conversation in this harness is mid-turn (no id — used to defer backend restarts). */
61
+ anyConversationBusy(): boolean;
60
62
  stopSubAgentTask(conversationId: string, taskId: string): Promise<void>;
61
63
  warmUpForLiveConversation(
62
64
  model: string,
@@ -11,12 +11,12 @@ import { log } from '../shared/logger.js';
11
11
  import { startTunnel, stopTunnel, isTunnelAlive, restartTunnel, startNamedTunnel, restartNamedTunnel } from './tunnel.js';
12
12
  import { createWorkerApp } from '../worker/index.js';
13
13
  import { closeDb, getSession, getSetting } from '../worker/db.js';
14
- import { spawnBackend, stopBackend, getBackendPort, isBackendAlive, isBackendStopping, resetBackendRestarts, setBackendEnv } from './backend.js';
14
+ import { spawnBackend, stopBackend, restartBackend, getBackendPort, isBackendAlive, isBackendStopping, setBackendEnv } from './backend.js';
15
15
  import { handleAgentQuery, type AgentQueryRequest } from './agent-api.js';
16
16
  import { updateTunnelUrl, startHeartbeat, stopHeartbeat, disconnect } from '../shared/relay.js';
17
17
  import {
18
18
  startConversation, hasConversation, endConversation, endAllConversations,
19
- isConversationBusy, stopSubAgentTask,
19
+ isConversationBusy, anyConversationBusy, stopSubAgentTask,
20
20
  startBlobyAgentQuery, stopBlobyAgentQuery,
21
21
  warmUpForLiveConversation,
22
22
  type RecentMessage,
@@ -1271,8 +1271,7 @@ mint();
1271
1271
  const result = await handleAgentQuery(parsed);
1272
1272
 
1273
1273
  if (result.usedFileTools) {
1274
- resetBackendRestarts();
1275
- stopBackend().then(() => spawnBackend(backendPort));
1274
+ void doRestart();
1276
1275
  broadcastBloby('app:hmr-update', {});
1277
1276
  }
1278
1277
 
@@ -1931,11 +1930,8 @@ mint();
1931
1930
  currentStreamBuffer = '';
1932
1931
 
1933
1932
  if (eventData.usedFileTools || pendingBackendRestart) {
1934
- log.info('[orchestrator] Restarting backend (file tools used)');
1935
- pendingBackendRestart = false;
1936
- if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
1937
- resetBackendRestarts();
1938
- stopBackend().then(() => spawnBackend(backendPort));
1933
+ log.info('[orchestrator] Restarting backend (file tools used / pending watcher change)');
1934
+ void doRestart();
1939
1935
  }
1940
1936
  if (pendingUpdate) {
1941
1937
  pendingUpdate = false;
@@ -2536,11 +2532,7 @@ mint();
2536
2532
  startScheduler({
2537
2533
  broadcastBloby,
2538
2534
  workerApi,
2539
- restartBackend: async () => {
2540
- resetBackendRestarts();
2541
- await stopBackend();
2542
- spawnBackend(backendPort);
2543
- },
2535
+ restartBackend: () => doRestart(),
2544
2536
  getModel: () => loadConfig().ai.model,
2545
2537
  });
2546
2538
 
@@ -2548,11 +2540,7 @@ mint();
2548
2540
  const channelManager = new ChannelManager({
2549
2541
  broadcastBloby,
2550
2542
  workerApi,
2551
- restartBackend: async () => {
2552
- resetBackendRestarts();
2553
- await stopBackend();
2554
- spawnBackend(backendPort);
2555
- },
2543
+ restartBackend: () => doRestart(),
2556
2544
  getModel: () => loadConfig().ai.model,
2557
2545
  });
2558
2546
 
@@ -2586,21 +2574,39 @@ mint();
2586
2574
  const backendDir = path.join(workspaceDir, 'backend');
2587
2575
  let backendRestartTimer: ReturnType<typeof setTimeout> | null = null;
2588
2576
 
2577
+ /** Single funnel for every DELIBERATE backend restart (file watcher, turn-complete, agent-api
2578
+ * one-shot, scheduler pulse, channel manager). Clears the deferred-restart flag and the
2579
+ * debounce timer, then delegates to backend.ts's serialized + coalescing restartBackend so
2580
+ * concurrent triggers can never double-spawn onto the contended port. */
2581
+ function doRestart(): Promise<void> {
2582
+ pendingBackendRestart = false;
2583
+ if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
2584
+ return restartBackend(backendPort);
2585
+ }
2586
+
2587
+ /** True while any surface is mid-turn. Dashboard chat sets agentQueryActive; WhatsApp/Alexa
2588
+ * turns instead set the harness conv.busy (they don't touch agentQueryActive), so we must
2589
+ * check both — otherwise an agent editing the backend over a channel would get the backend
2590
+ * restarted out from under it mid-turn. */
2591
+ const aTurnIsActive = () => agentQueryActive || anyConversationBusy();
2592
+
2589
2593
  function scheduleBackendRestart(reason: string) {
2590
- if (agentQueryActive) {
2591
- // Agent is working — don't restart now, flag it for bot:done
2594
+ if (aTurnIsActive()) {
2595
+ // A turn is working — don't restart now; flush at turn-complete (createSharedChatOnMessage)
2596
+ // or via the channel manager's own post-turn restart.
2592
2597
  pendingBackendRestart = true;
2593
2598
  return;
2594
2599
  }
2595
- // Skip if a stop/restart is already in progress (bot:done handler owns the restart)
2600
+ // Skip if a stop/restart is already in progress (that restart owns the spawn).
2596
2601
  if (isBackendStopping()) return;
2597
2602
  if (backendRestartTimer) clearTimeout(backendRestartTimer);
2598
- backendRestartTimer = setTimeout(async () => {
2599
- if (isBackendStopping()) return; // re-check after delay
2603
+ backendRestartTimer = setTimeout(() => {
2604
+ backendRestartTimer = null;
2605
+ // Re-check at fire time: a turn may have started during the 1s debounce window.
2606
+ if (aTurnIsActive()) { pendingBackendRestart = true; return; }
2607
+ if (isBackendStopping()) return;
2600
2608
  log.info(`[watcher] ${reason} — restarting backend...`);
2601
- resetBackendRestarts();
2602
- await stopBackend();
2603
- spawnBackend(backendPort);
2609
+ void doRestart();
2604
2610
  }, 1000);
2605
2611
  }
2606
2612
 
@@ -2610,12 +2616,22 @@ mint();
2610
2616
  scheduleBackendRestart(`Backend file changed: ${filename}`);
2611
2617
  });
2612
2618
 
2613
- // Watch workspace root for .env changes and .restart trigger
2619
+ // Watch workspace root for .env, dependency, and .restart/.update changes
2614
2620
  const workspaceWatcher = fs.watch(workspaceDir, (_event, filename) => {
2615
2621
  if (!filename) return;
2616
2622
  if (filename === '.env') {
2617
2623
  scheduleBackendRestart('.env changed');
2618
2624
  }
2625
+ if (filename === 'package.json' || filename === 'package-lock.json') {
2626
+ // The agent ran `npm install` to add/fix a backend dependency. Neither watcher otherwise
2627
+ // covers workspace-root deps (backendWatcher only watches backend/; node_modules is huge
2628
+ // and intentionally unwatched). Without this, an install done to fix an ENOENT crash — where
2629
+ // the import already exists so no Write tool fires and usedFileTools stays false — never
2630
+ // restarts the backend, leaving it broken until some unrelated edit. npm install runs inside
2631
+ // the agent's turn, so this defers (like every trigger) and lands at turn-complete, after
2632
+ // the install has fully written package.json + node_modules.
2633
+ scheduleBackendRestart(`workspace dependencies changed (${filename})`);
2634
+ }
2619
2635
  if (filename === '.restart') {
2620
2636
  // Consume the trigger file
2621
2637
  try { fs.unlinkSync(path.join(workspaceDir, '.restart')); } catch {}