bloby-bot 0.53.2 → 0.53.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/supervisor/backend.ts +42 -11
- package/supervisor/bloby-agent.ts +6 -0
- package/supervisor/harnesses/claude.ts +7 -0
- package/supervisor/harnesses/codex.ts +14 -0
- package/supervisor/harnesses/pi/index.ts +7 -0
- package/supervisor/harnesses/pi/session.ts +14 -2
- package/supervisor/harnesses/types.ts +2 -0
- package/supervisor/index.ts +44 -28
package/package.json
CHANGED
package/supervisor/backend.ts
CHANGED
|
@@ -131,21 +131,52 @@ export function stopBackend(): Promise<void> {
|
|
|
131
131
|
const dying = child;
|
|
132
132
|
child = null;
|
|
133
133
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
134
|
+
const promise = new Promise<void>((resolve) => {
|
|
135
|
+
let killTimer: ReturnType<typeof setTimeout> | null = null;
|
|
136
|
+
let finished = false;
|
|
137
|
+
const done = () => {
|
|
138
|
+
if (finished) return; // exit + SIGKILL paths can both fire; run once
|
|
139
|
+
finished = true;
|
|
140
|
+
if (killTimer) clearTimeout(killTimer);
|
|
141
|
+
// Only release the shared guard if it still points at THIS stop. A later stopBackend()
|
|
142
|
+
// may already have installed its own promise; the 3s safety timer (or a late exit) must
|
|
143
|
+
// never null a *different* stop's guard — that would make isBackendStopping() lie and let
|
|
144
|
+
// a concurrent spawn race the in-flight kill for the port.
|
|
145
|
+
if (stopPromise === promise) stopPromise = null;
|
|
137
146
|
resolve();
|
|
138
|
-
}
|
|
147
|
+
};
|
|
148
|
+
dying.once('exit', done);
|
|
139
149
|
dying.kill();
|
|
140
|
-
// Safety: force kill after 3s if SIGTERM doesn't
|
|
141
|
-
setTimeout(() => {
|
|
142
|
-
try { dying.kill('SIGKILL'); } catch {}
|
|
143
|
-
stopPromise = null;
|
|
144
|
-
resolve();
|
|
145
|
-
}, 3000);
|
|
150
|
+
// Safety: force kill after 3s if SIGTERM doesn't land.
|
|
151
|
+
killTimer = setTimeout(() => { try { dying.kill('SIGKILL'); } catch {} done(); }, 3000);
|
|
146
152
|
});
|
|
153
|
+
stopPromise = promise;
|
|
154
|
+
|
|
155
|
+
return promise;
|
|
156
|
+
}
|
|
147
157
|
|
|
148
|
-
|
|
158
|
+
let restartInFlight: Promise<void> | null = null;
|
|
159
|
+
let rerunRequested = false;
|
|
160
|
+
|
|
161
|
+
/** Serialized + coalescing backend restart — the single funnel for every deliberate restart
|
|
162
|
+
* (file watcher, turn-complete, scheduler pulse, channel manager). Concurrent callers share
|
|
163
|
+
* one in-flight restart; a request that arrives mid-restart triggers exactly one more
|
|
164
|
+
* stop→spawn cycle afterward, so the final backend was spawned after the latest request. This
|
|
165
|
+
* removes the double-spawn-onto-contended-port race of independent stopBackend().then(spawn) chains. */
|
|
166
|
+
export function restartBackend(port: number): Promise<void> {
|
|
167
|
+
if (restartInFlight) {
|
|
168
|
+
rerunRequested = true;
|
|
169
|
+
return restartInFlight;
|
|
170
|
+
}
|
|
171
|
+
restartInFlight = (async () => {
|
|
172
|
+
do {
|
|
173
|
+
rerunRequested = false;
|
|
174
|
+
resetBackendRestarts();
|
|
175
|
+
await stopBackend();
|
|
176
|
+
spawnBackend(port);
|
|
177
|
+
} while (rerunRequested);
|
|
178
|
+
})().finally(() => { restartInFlight = null; });
|
|
179
|
+
return restartInFlight;
|
|
149
180
|
}
|
|
150
181
|
|
|
151
182
|
export function isBackendAlive(): boolean {
|
|
@@ -86,6 +86,12 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
86
86
|
return Object.values(HARNESSES).some((h) => h.isConversationBusy(conversationId));
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
/** True if ANY conversation in ANY harness is mid-turn. Lets the supervisor defer backend
|
|
90
|
+
* restarts during channel/Alexa turns, which don't set the dashboard's agentQueryActive flag. */
|
|
91
|
+
export function anyConversationBusy(): boolean {
|
|
92
|
+
return Object.values(HARNESSES).some((h) => h.anyConversationBusy());
|
|
93
|
+
}
|
|
94
|
+
|
|
89
95
|
export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
|
|
90
96
|
for (const h of Object.values(HARNESSES)) {
|
|
91
97
|
if (h.hasConversation(conversationId)) {
|
|
@@ -538,6 +538,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
538
538
|
return liveConversations.get(conversationId)?.busy || false;
|
|
539
539
|
}
|
|
540
540
|
|
|
541
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
542
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
543
|
+
export function anyConversationBusy(): boolean {
|
|
544
|
+
for (const c of liveConversations.values()) if (c.busy) return true;
|
|
545
|
+
return false;
|
|
546
|
+
}
|
|
547
|
+
|
|
541
548
|
/** Stop a specific background sub-agent task */
|
|
542
549
|
export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
|
|
543
550
|
const conv = liveConversations.get(conversationId);
|
|
@@ -365,7 +365,14 @@ async function startTurn(conv: CodexConversation, content: string, savedFiles?:
|
|
|
365
365
|
await conv.rpc.request('turn/start', params);
|
|
366
366
|
} catch (err: any) {
|
|
367
367
|
conv.busy = false;
|
|
368
|
+
conv.currentTurnId = null;
|
|
368
369
|
conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
|
|
370
|
+
// turn/start produced no turn, so no turn/completed will arrive to clear the supervisor's
|
|
371
|
+
// agentQueryActive (set on bot:typing above). Left as-is, that wedges true forever:
|
|
372
|
+
// backend auto-heal is deferred indefinitely and chat is stuck showing "typing". Tear the
|
|
373
|
+
// conversation down so bot:conversation-ended fires (which, unlike bot:turn-complete, does
|
|
374
|
+
// NOT trigger a backend restart) — the next user message cold-starts a fresh thread.
|
|
375
|
+
teardownConversation(conv.id);
|
|
369
376
|
}
|
|
370
377
|
}
|
|
371
378
|
|
|
@@ -633,6 +640,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
633
640
|
return conversations.get(conversationId)?.busy ?? false;
|
|
634
641
|
}
|
|
635
642
|
|
|
643
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
644
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
645
|
+
export function anyConversationBusy(): boolean {
|
|
646
|
+
for (const c of conversations.values()) if (c.busy) return true;
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
|
|
636
650
|
export async function startConversation(
|
|
637
651
|
conversationId: string,
|
|
638
652
|
model: string,
|
|
@@ -320,6 +320,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
320
320
|
return liveConversations.get(conversationId)?.busy || false;
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
324
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
325
|
+
export function anyConversationBusy(): boolean {
|
|
326
|
+
for (const c of liveConversations.values()) if (c.busy) return true;
|
|
327
|
+
return false;
|
|
328
|
+
}
|
|
329
|
+
|
|
323
330
|
/** Pi has no sub-agents yet; provided for interface compatibility. */
|
|
324
331
|
export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
|
|
325
332
|
// no-op for Phase 1
|
|
@@ -220,8 +220,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
220
220
|
if (toolUses.length === 0 && !pendingInterleave) break;
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
|
|
223
|
+
// Emit text_end only on a clean turn (don't persist a half-baked answer from an errored
|
|
224
|
+
// turn). But ALWAYS emit turn_complete on a non-aborted turn — including the errored path
|
|
225
|
+
// — so the supervisor clears agentQueryActive (set on turn_started). Skipping it on error
|
|
226
|
+
// wedged the flag true: backend auto-heal stayed deferred and chat stuck in "typing" until
|
|
227
|
+
// the next successful turn. The 'error' event was already emitted by runOneRound, so the
|
|
228
|
+
// user still sees the failure. Aborted turns are torn down via bot:conversation-ended.
|
|
229
|
+
if (!init.abortController.signal.aborted) {
|
|
230
|
+
if (!turnErrored && accumulatedText) {
|
|
225
231
|
init.onEvent({ type: 'text_end', text: accumulatedText });
|
|
226
232
|
}
|
|
227
233
|
const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
|
|
@@ -238,6 +244,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
238
244
|
} catch (err: any) {
|
|
239
245
|
log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
|
|
240
246
|
init.onEvent({ type: 'error', error: err?.message || String(err) });
|
|
247
|
+
// A thrown turn emitted no turn_complete either — clear agentQueryActive so auto-heal
|
|
248
|
+
// and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
|
|
249
|
+
// usedFileTools=false is the safe default (it only governs whether to auto-restart now).
|
|
250
|
+
if (!init.abortController.signal.aborted) {
|
|
251
|
+
init.onEvent({ type: 'turn_complete', usedFileTools: false });
|
|
252
|
+
}
|
|
241
253
|
}
|
|
242
254
|
}
|
|
243
255
|
},
|
|
@@ -57,6 +57,8 @@ export interface Harness {
|
|
|
57
57
|
endConversation(conversationId: string): void;
|
|
58
58
|
endAllConversations(): void;
|
|
59
59
|
isConversationBusy(conversationId: string): boolean;
|
|
60
|
+
/** True if ANY conversation in this harness is mid-turn (no id — used to defer backend restarts). */
|
|
61
|
+
anyConversationBusy(): boolean;
|
|
60
62
|
stopSubAgentTask(conversationId: string, taskId: string): Promise<void>;
|
|
61
63
|
warmUpForLiveConversation(
|
|
62
64
|
model: string,
|
package/supervisor/index.ts
CHANGED
|
@@ -11,12 +11,12 @@ import { log } from '../shared/logger.js';
|
|
|
11
11
|
import { startTunnel, stopTunnel, isTunnelAlive, restartTunnel, startNamedTunnel, restartNamedTunnel } from './tunnel.js';
|
|
12
12
|
import { createWorkerApp } from '../worker/index.js';
|
|
13
13
|
import { closeDb, getSession, getSetting } from '../worker/db.js';
|
|
14
|
-
import { spawnBackend, stopBackend, getBackendPort, isBackendAlive, isBackendStopping,
|
|
14
|
+
import { spawnBackend, stopBackend, restartBackend, getBackendPort, isBackendAlive, isBackendStopping, setBackendEnv } from './backend.js';
|
|
15
15
|
import { handleAgentQuery, type AgentQueryRequest } from './agent-api.js';
|
|
16
16
|
import { updateTunnelUrl, startHeartbeat, stopHeartbeat, disconnect } from '../shared/relay.js';
|
|
17
17
|
import {
|
|
18
18
|
startConversation, hasConversation, endConversation, endAllConversations,
|
|
19
|
-
isConversationBusy, stopSubAgentTask,
|
|
19
|
+
isConversationBusy, anyConversationBusy, stopSubAgentTask,
|
|
20
20
|
startBlobyAgentQuery, stopBlobyAgentQuery,
|
|
21
21
|
warmUpForLiveConversation,
|
|
22
22
|
type RecentMessage,
|
|
@@ -1271,8 +1271,7 @@ mint();
|
|
|
1271
1271
|
const result = await handleAgentQuery(parsed);
|
|
1272
1272
|
|
|
1273
1273
|
if (result.usedFileTools) {
|
|
1274
|
-
|
|
1275
|
-
stopBackend().then(() => spawnBackend(backendPort));
|
|
1274
|
+
void doRestart();
|
|
1276
1275
|
broadcastBloby('app:hmr-update', {});
|
|
1277
1276
|
}
|
|
1278
1277
|
|
|
@@ -1931,11 +1930,8 @@ mint();
|
|
|
1931
1930
|
currentStreamBuffer = '';
|
|
1932
1931
|
|
|
1933
1932
|
if (eventData.usedFileTools || pendingBackendRestart) {
|
|
1934
|
-
log.info('[orchestrator] Restarting backend (file tools used)');
|
|
1935
|
-
|
|
1936
|
-
if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
|
|
1937
|
-
resetBackendRestarts();
|
|
1938
|
-
stopBackend().then(() => spawnBackend(backendPort));
|
|
1933
|
+
log.info('[orchestrator] Restarting backend (file tools used / pending watcher change)');
|
|
1934
|
+
void doRestart();
|
|
1939
1935
|
}
|
|
1940
1936
|
if (pendingUpdate) {
|
|
1941
1937
|
pendingUpdate = false;
|
|
@@ -2536,11 +2532,7 @@ mint();
|
|
|
2536
2532
|
startScheduler({
|
|
2537
2533
|
broadcastBloby,
|
|
2538
2534
|
workerApi,
|
|
2539
|
-
restartBackend:
|
|
2540
|
-
resetBackendRestarts();
|
|
2541
|
-
await stopBackend();
|
|
2542
|
-
spawnBackend(backendPort);
|
|
2543
|
-
},
|
|
2535
|
+
restartBackend: () => doRestart(),
|
|
2544
2536
|
getModel: () => loadConfig().ai.model,
|
|
2545
2537
|
});
|
|
2546
2538
|
|
|
@@ -2548,11 +2540,7 @@ mint();
|
|
|
2548
2540
|
const channelManager = new ChannelManager({
|
|
2549
2541
|
broadcastBloby,
|
|
2550
2542
|
workerApi,
|
|
2551
|
-
restartBackend:
|
|
2552
|
-
resetBackendRestarts();
|
|
2553
|
-
await stopBackend();
|
|
2554
|
-
spawnBackend(backendPort);
|
|
2555
|
-
},
|
|
2543
|
+
restartBackend: () => doRestart(),
|
|
2556
2544
|
getModel: () => loadConfig().ai.model,
|
|
2557
2545
|
});
|
|
2558
2546
|
|
|
@@ -2586,21 +2574,39 @@ mint();
|
|
|
2586
2574
|
const backendDir = path.join(workspaceDir, 'backend');
|
|
2587
2575
|
let backendRestartTimer: ReturnType<typeof setTimeout> | null = null;
|
|
2588
2576
|
|
|
2577
|
+
/** Single funnel for every DELIBERATE backend restart (file watcher, turn-complete, agent-api
|
|
2578
|
+
* one-shot, scheduler pulse, channel manager). Clears the deferred-restart flag and the
|
|
2579
|
+
* debounce timer, then delegates to backend.ts's serialized + coalescing restartBackend so
|
|
2580
|
+
* concurrent triggers can never double-spawn onto the contended port. */
|
|
2581
|
+
function doRestart(): Promise<void> {
|
|
2582
|
+
pendingBackendRestart = false;
|
|
2583
|
+
if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
|
|
2584
|
+
return restartBackend(backendPort);
|
|
2585
|
+
}
|
|
2586
|
+
|
|
2587
|
+
/** True while any surface is mid-turn. Dashboard chat sets agentQueryActive; WhatsApp/Alexa
|
|
2588
|
+
* turns instead set the harness conv.busy (they don't touch agentQueryActive), so we must
|
|
2589
|
+
* check both — otherwise an agent editing the backend over a channel would get the backend
|
|
2590
|
+
* restarted out from under it mid-turn. */
|
|
2591
|
+
const aTurnIsActive = () => agentQueryActive || anyConversationBusy();
|
|
2592
|
+
|
|
2589
2593
|
function scheduleBackendRestart(reason: string) {
|
|
2590
|
-
if (
|
|
2591
|
-
//
|
|
2594
|
+
if (aTurnIsActive()) {
|
|
2595
|
+
// A turn is working — don't restart now; flush at turn-complete (createSharedChatOnMessage)
|
|
2596
|
+
// or via the channel manager's own post-turn restart.
|
|
2592
2597
|
pendingBackendRestart = true;
|
|
2593
2598
|
return;
|
|
2594
2599
|
}
|
|
2595
|
-
// Skip if a stop/restart is already in progress (
|
|
2600
|
+
// Skip if a stop/restart is already in progress (that restart owns the spawn).
|
|
2596
2601
|
if (isBackendStopping()) return;
|
|
2597
2602
|
if (backendRestartTimer) clearTimeout(backendRestartTimer);
|
|
2598
|
-
backendRestartTimer = setTimeout(
|
|
2599
|
-
|
|
2603
|
+
backendRestartTimer = setTimeout(() => {
|
|
2604
|
+
backendRestartTimer = null;
|
|
2605
|
+
// Re-check at fire time: a turn may have started during the 1s debounce window.
|
|
2606
|
+
if (aTurnIsActive()) { pendingBackendRestart = true; return; }
|
|
2607
|
+
if (isBackendStopping()) return;
|
|
2600
2608
|
log.info(`[watcher] ${reason} — restarting backend...`);
|
|
2601
|
-
|
|
2602
|
-
await stopBackend();
|
|
2603
|
-
spawnBackend(backendPort);
|
|
2609
|
+
void doRestart();
|
|
2604
2610
|
}, 1000);
|
|
2605
2611
|
}
|
|
2606
2612
|
|
|
@@ -2610,12 +2616,22 @@ mint();
|
|
|
2610
2616
|
scheduleBackendRestart(`Backend file changed: ${filename}`);
|
|
2611
2617
|
});
|
|
2612
2618
|
|
|
2613
|
-
// Watch workspace root for .env
|
|
2619
|
+
// Watch workspace root for .env, dependency, and .restart/.update changes
|
|
2614
2620
|
const workspaceWatcher = fs.watch(workspaceDir, (_event, filename) => {
|
|
2615
2621
|
if (!filename) return;
|
|
2616
2622
|
if (filename === '.env') {
|
|
2617
2623
|
scheduleBackendRestart('.env changed');
|
|
2618
2624
|
}
|
|
2625
|
+
if (filename === 'package.json' || filename === 'package-lock.json') {
|
|
2626
|
+
// The agent ran `npm install` to add/fix a backend dependency. Neither watcher otherwise
|
|
2627
|
+
// covers workspace-root deps (backendWatcher only watches backend/; node_modules is huge
|
|
2628
|
+
// and intentionally unwatched). Without this, an install done to fix an ENOENT crash — where
|
|
2629
|
+
// the import already exists so no Write tool fires and usedFileTools stays false — never
|
|
2630
|
+
// restarts the backend, leaving it broken until some unrelated edit. npm install runs inside
|
|
2631
|
+
// the agent's turn, so this defers (like every trigger) and lands at turn-complete, after
|
|
2632
|
+
// the install has fully written package.json + node_modules.
|
|
2633
|
+
scheduleBackendRestart(`workspace dependencies changed (${filename})`);
|
|
2634
|
+
}
|
|
2619
2635
|
if (filename === '.restart') {
|
|
2620
2636
|
// Consume the trigger file
|
|
2621
2637
|
try { fs.unlinkSync(path.join(workspaceDir, '.restart')); } catch {}
|