bloby-bot 0.53.2 → 0.53.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/supervisor/backend.ts +65 -11
- package/supervisor/bloby-agent.ts +6 -0
- package/supervisor/harnesses/claude.ts +7 -0
- package/supervisor/harnesses/codex.ts +14 -0
- package/supervisor/harnesses/pi/index.ts +7 -0
- package/supervisor/harnesses/pi/session.ts +14 -2
- package/supervisor/harnesses/types.ts +2 -0
- package/supervisor/index.ts +144 -28
- package/supervisor/public/what-happened.mp4 +0 -0
- package/supervisor/public/what-happened.webm +0 -0
package/package.json
CHANGED
package/supervisor/backend.ts
CHANGED
|
@@ -8,6 +8,10 @@ let child: ChildProcess | null = null;
|
|
|
8
8
|
let restarts = 0;
|
|
9
9
|
let lastSpawnTime = 0;
|
|
10
10
|
let intentionallyStopped = false;
|
|
11
|
+
// True once the backend has crash-looped past MAX_RESTARTS and given up — i.e. it's down and
|
|
12
|
+
// will NOT come back without the user fixing the code. The supervisor shows the "backend down"
|
|
13
|
+
// interstitial in this state. Cleared on every spawn attempt (a deliberate restart is "trying again").
|
|
14
|
+
let gaveUp = false;
|
|
11
15
|
const MAX_RESTARTS = 3;
|
|
12
16
|
const STABLE_THRESHOLD = 30_000; // 30s — if backend ran this long, it wasn't a crash loop
|
|
13
17
|
|
|
@@ -39,6 +43,7 @@ export function spawnBackend(port: number): ChildProcess {
|
|
|
39
43
|
const backendPath = path.join(WORKSPACE_DIR, 'backend', 'index.ts');
|
|
40
44
|
lastSpawnTime = Date.now();
|
|
41
45
|
intentionallyStopped = false;
|
|
46
|
+
gaveUp = false;
|
|
42
47
|
|
|
43
48
|
// Clear log file on each restart — only keeps current run
|
|
44
49
|
try { fs.writeFileSync(LOG_FILE, ''); } catch {}
|
|
@@ -106,6 +111,7 @@ export function spawnBackend(port: number): ChildProcess {
|
|
|
106
111
|
log.info(`Restarting backend (${restarts}/${MAX_RESTARTS}, delay ${delay}ms)...`);
|
|
107
112
|
setTimeout(() => spawnBackend(port), delay);
|
|
108
113
|
} else {
|
|
114
|
+
gaveUp = true;
|
|
109
115
|
log.error('Backend failed too many times. Use Bloby chat to debug.');
|
|
110
116
|
}
|
|
111
117
|
});
|
|
@@ -131,27 +137,75 @@ export function stopBackend(): Promise<void> {
|
|
|
131
137
|
const dying = child;
|
|
132
138
|
child = null;
|
|
133
139
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
140
|
+
const promise = new Promise<void>((resolve) => {
|
|
141
|
+
let killTimer: ReturnType<typeof setTimeout> | null = null;
|
|
142
|
+
let finished = false;
|
|
143
|
+
const done = () => {
|
|
144
|
+
if (finished) return; // exit + SIGKILL paths can both fire; run once
|
|
145
|
+
finished = true;
|
|
146
|
+
if (killTimer) clearTimeout(killTimer);
|
|
147
|
+
// Only release the shared guard if it still points at THIS stop. A later stopBackend()
|
|
148
|
+
// may already have installed its own promise; the 3s safety timer (or a late exit) must
|
|
149
|
+
// never null a *different* stop's guard — that would make isBackendStopping() lie and let
|
|
150
|
+
// a concurrent spawn race the in-flight kill for the port.
|
|
151
|
+
if (stopPromise === promise) stopPromise = null;
|
|
137
152
|
resolve();
|
|
138
|
-
}
|
|
153
|
+
};
|
|
154
|
+
dying.once('exit', done);
|
|
139
155
|
dying.kill();
|
|
140
|
-
// Safety: force kill after 3s if SIGTERM doesn't
|
|
141
|
-
setTimeout(() => {
|
|
142
|
-
try { dying.kill('SIGKILL'); } catch {}
|
|
143
|
-
stopPromise = null;
|
|
144
|
-
resolve();
|
|
145
|
-
}, 3000);
|
|
156
|
+
// Safety: force kill after 3s if SIGTERM doesn't land.
|
|
157
|
+
killTimer = setTimeout(() => { try { dying.kill('SIGKILL'); } catch {} done(); }, 3000);
|
|
146
158
|
});
|
|
159
|
+
stopPromise = promise;
|
|
147
160
|
|
|
148
|
-
return
|
|
161
|
+
return promise;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
let restartInFlight: Promise<void> | null = null;
|
|
165
|
+
let rerunRequested = false;
|
|
166
|
+
|
|
167
|
+
/** Serialized + coalescing backend restart — the single funnel for every deliberate restart
|
|
168
|
+
* (file watcher, turn-complete, scheduler pulse, channel manager). Concurrent callers share
|
|
169
|
+
* one in-flight restart; a request that arrives mid-restart triggers exactly one more
|
|
170
|
+
* stop→spawn cycle afterward, so the final backend was spawned after the latest request. This
|
|
171
|
+
* removes the double-spawn-onto-contended-port race of independent stopBackend().then(spawn) chains. */
|
|
172
|
+
export function restartBackend(port: number): Promise<void> {
|
|
173
|
+
if (restartInFlight) {
|
|
174
|
+
rerunRequested = true;
|
|
175
|
+
return restartInFlight;
|
|
176
|
+
}
|
|
177
|
+
restartInFlight = (async () => {
|
|
178
|
+
do {
|
|
179
|
+
rerunRequested = false;
|
|
180
|
+
resetBackendRestarts();
|
|
181
|
+
await stopBackend();
|
|
182
|
+
spawnBackend(port);
|
|
183
|
+
} while (rerunRequested);
|
|
184
|
+
})().finally(() => { restartInFlight = null; });
|
|
185
|
+
return restartInFlight;
|
|
149
186
|
}
|
|
150
187
|
|
|
151
188
|
export function isBackendAlive(): boolean {
|
|
152
189
|
return child !== null && child.exitCode === null;
|
|
153
190
|
}
|
|
154
191
|
|
|
192
|
+
/** True when the backend has crash-looped past MAX_RESTARTS and given up — down and not
|
|
193
|
+
* coming back without a code fix. Drives the supervisor's "backend down" interstitial. */
|
|
194
|
+
export function isBackendDead(): boolean {
|
|
195
|
+
return gaveUp;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** Read the tail of the backend log (default 100 lines) for the "copy logs" debug helper. */
|
|
199
|
+
export function readBackendLogTail(maxLines = 100): string {
|
|
200
|
+
try {
|
|
201
|
+
const text = fs.readFileSync(LOG_FILE, 'utf-8');
|
|
202
|
+
const lines = text.split('\n');
|
|
203
|
+
return lines.slice(-maxLines).join('\n').trim();
|
|
204
|
+
} catch {
|
|
205
|
+
return '';
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
155
209
|
export function isBackendStopping(): boolean {
|
|
156
210
|
return stopPromise !== null;
|
|
157
211
|
}
|
|
@@ -86,6 +86,12 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
86
86
|
return Object.values(HARNESSES).some((h) => h.isConversationBusy(conversationId));
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
/** True if ANY conversation in ANY harness is mid-turn. Lets the supervisor defer backend
|
|
90
|
+
* restarts during channel/Alexa turns, which don't set the dashboard's agentQueryActive flag. */
|
|
91
|
+
export function anyConversationBusy(): boolean {
|
|
92
|
+
return Object.values(HARNESSES).some((h) => h.anyConversationBusy());
|
|
93
|
+
}
|
|
94
|
+
|
|
89
95
|
export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
|
|
90
96
|
for (const h of Object.values(HARNESSES)) {
|
|
91
97
|
if (h.hasConversation(conversationId)) {
|
|
@@ -538,6 +538,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
538
538
|
return liveConversations.get(conversationId)?.busy || false;
|
|
539
539
|
}
|
|
540
540
|
|
|
541
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
542
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
543
|
+
export function anyConversationBusy(): boolean {
|
|
544
|
+
for (const c of liveConversations.values()) if (c.busy) return true;
|
|
545
|
+
return false;
|
|
546
|
+
}
|
|
547
|
+
|
|
541
548
|
/** Stop a specific background sub-agent task */
|
|
542
549
|
export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
|
|
543
550
|
const conv = liveConversations.get(conversationId);
|
|
@@ -365,7 +365,14 @@ async function startTurn(conv: CodexConversation, content: string, savedFiles?:
|
|
|
365
365
|
await conv.rpc.request('turn/start', params);
|
|
366
366
|
} catch (err: any) {
|
|
367
367
|
conv.busy = false;
|
|
368
|
+
conv.currentTurnId = null;
|
|
368
369
|
conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
|
|
370
|
+
// turn/start produced no turn, so no turn/completed will arrive to clear the supervisor's
|
|
371
|
+
// agentQueryActive (set on bot:typing above). Left as-is, that wedges true forever:
|
|
372
|
+
// backend auto-heal is deferred indefinitely and chat is stuck showing "typing". Tear the
|
|
373
|
+
// conversation down so bot:conversation-ended fires (which, unlike bot:turn-complete, does
|
|
374
|
+
// NOT trigger a backend restart) — the next user message cold-starts a fresh thread.
|
|
375
|
+
teardownConversation(conv.id);
|
|
369
376
|
}
|
|
370
377
|
}
|
|
371
378
|
|
|
@@ -633,6 +640,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
633
640
|
return conversations.get(conversationId)?.busy ?? false;
|
|
634
641
|
}
|
|
635
642
|
|
|
643
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
644
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
645
|
+
export function anyConversationBusy(): boolean {
|
|
646
|
+
for (const c of conversations.values()) if (c.busy) return true;
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
|
|
636
650
|
export async function startConversation(
|
|
637
651
|
conversationId: string,
|
|
638
652
|
model: string,
|
|
@@ -320,6 +320,13 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
320
320
|
return liveConversations.get(conversationId)?.busy || false;
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
+
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
324
|
+
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
325
|
+
export function anyConversationBusy(): boolean {
|
|
326
|
+
for (const c of liveConversations.values()) if (c.busy) return true;
|
|
327
|
+
return false;
|
|
328
|
+
}
|
|
329
|
+
|
|
323
330
|
/** Pi has no sub-agents yet; provided for interface compatibility. */
|
|
324
331
|
export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
|
|
325
332
|
// no-op for Phase 1
|
|
@@ -220,8 +220,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
220
220
|
if (toolUses.length === 0 && !pendingInterleave) break;
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
|
|
223
|
+
// Emit text_end only on a clean turn (don't persist a half-baked answer from an errored
|
|
224
|
+
// turn). But ALWAYS emit turn_complete on a non-aborted turn — including the errored path
|
|
225
|
+
// — so the supervisor clears agentQueryActive (set on turn_started). Skipping it on error
|
|
226
|
+
// wedged the flag true: backend auto-heal stayed deferred and chat stuck in "typing" until
|
|
227
|
+
// the next successful turn. The 'error' event was already emitted by runOneRound, so the
|
|
228
|
+
// user still sees the failure. Aborted turns are torn down via bot:conversation-ended.
|
|
229
|
+
if (!init.abortController.signal.aborted) {
|
|
230
|
+
if (!turnErrored && accumulatedText) {
|
|
225
231
|
init.onEvent({ type: 'text_end', text: accumulatedText });
|
|
226
232
|
}
|
|
227
233
|
const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
|
|
@@ -238,6 +244,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
238
244
|
} catch (err: any) {
|
|
239
245
|
log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
|
|
240
246
|
init.onEvent({ type: 'error', error: err?.message || String(err) });
|
|
247
|
+
// A thrown turn emitted no turn_complete either — clear agentQueryActive so auto-heal
|
|
248
|
+
// and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
|
|
249
|
+
// usedFileTools=false is the safe default (it only governs whether to auto-restart now).
|
|
250
|
+
if (!init.abortController.signal.aborted) {
|
|
251
|
+
init.onEvent({ type: 'turn_complete', usedFileTools: false });
|
|
252
|
+
}
|
|
241
253
|
}
|
|
242
254
|
}
|
|
243
255
|
},
|
|
@@ -57,6 +57,8 @@ export interface Harness {
|
|
|
57
57
|
endConversation(conversationId: string): void;
|
|
58
58
|
endAllConversations(): void;
|
|
59
59
|
isConversationBusy(conversationId: string): boolean;
|
|
60
|
+
/** True if ANY conversation in this harness is mid-turn (no id — used to defer backend restarts). */
|
|
61
|
+
anyConversationBusy(): boolean;
|
|
60
62
|
stopSubAgentTask(conversationId: string, taskId: string): Promise<void>;
|
|
61
63
|
warmUpForLiveConversation(
|
|
62
64
|
model: string,
|
package/supervisor/index.ts
CHANGED
|
@@ -11,12 +11,12 @@ import { log } from '../shared/logger.js';
|
|
|
11
11
|
import { startTunnel, stopTunnel, isTunnelAlive, restartTunnel, startNamedTunnel, restartNamedTunnel } from './tunnel.js';
|
|
12
12
|
import { createWorkerApp } from '../worker/index.js';
|
|
13
13
|
import { closeDb, getSession, getSetting } from '../worker/db.js';
|
|
14
|
-
import { spawnBackend, stopBackend, getBackendPort, isBackendAlive, isBackendStopping,
|
|
14
|
+
import { spawnBackend, stopBackend, restartBackend, getBackendPort, isBackendAlive, isBackendStopping, isBackendDead, readBackendLogTail, setBackendEnv } from './backend.js';
|
|
15
15
|
import { handleAgentQuery, type AgentQueryRequest } from './agent-api.js';
|
|
16
16
|
import { updateTunnelUrl, startHeartbeat, stopHeartbeat, disconnect } from '../shared/relay.js';
|
|
17
17
|
import {
|
|
18
18
|
startConversation, hasConversation, endConversation, endAllConversations,
|
|
19
|
-
isConversationBusy, stopSubAgentTask,
|
|
19
|
+
isConversationBusy, anyConversationBusy, stopSubAgentTask,
|
|
20
20
|
startBlobyAgentQuery, stopBlobyAgentQuery,
|
|
21
21
|
warmUpForLiveConversation,
|
|
22
22
|
type RecentMessage,
|
|
@@ -69,6 +69,8 @@ const PLATFORM_ASSETS = new Set([
|
|
|
69
69
|
'/pi-logo.svg',
|
|
70
70
|
'/codex.svg',
|
|
71
71
|
'/manifest.json',
|
|
72
|
+
'/what-happened.webm',
|
|
73
|
+
'/what-happened.mp4',
|
|
72
74
|
]);
|
|
73
75
|
|
|
74
76
|
// Directory-prefix platform assets — anything under these is served from supervisor/public/.
|
|
@@ -250,6 +252,77 @@ const RECOVERING_HTML = `<!DOCTYPE html><html style="background:#222122"><head><
|
|
|
250
252
|
</div><script>setTimeout(function(){location.reload()},3000)</script>
|
|
251
253
|
<script src="/bloby/widget.js"></script></body></html>`;
|
|
252
254
|
|
|
255
|
+
/** Interstitial shown (by the supervisor, not the workspace) when the workspace backend has
|
|
256
|
+
* crash-looped and given up. Replaces proxying the dashboard SPA to Vite — which would 503 on
|
|
257
|
+
* every /app/api call and, for the common workspace-lock template, misread "no backend" as
|
|
258
|
+
* "no password set" and show the lock-setup screen. Embeds the Bloby chat widget so the user
|
|
259
|
+
* can ask the agent to fix it inline, a "copy logs" button (last 100 backend log lines baked in
|
|
260
|
+
* at render time), and a poll that reloads into the real dashboard once the backend is back. */
|
|
261
|
+
function backendDownPage(logTail: string): string {
|
|
262
|
+
// Embed logs as a JS string literal; escape `<` so a stray `</script>` in the logs can't break out.
|
|
263
|
+
const logs = JSON.stringify(logTail && logTail.length ? logTail : '(no backend logs were captured)').replace(/</g, '\\u003c');
|
|
264
|
+
return `<!DOCTYPE html>
|
|
265
|
+
<html lang="en"><head>
|
|
266
|
+
<meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
|
|
267
|
+
<title>Backend down · Bloby</title>
|
|
268
|
+
<style>
|
|
269
|
+
*{margin:0;padding:0;box-sizing:border-box}
|
|
270
|
+
body{font-family:system-ui,-apple-system,'Segoe UI',sans-serif;background:#0a0a0b;color:#e4e4e7;display:flex;align-items:center;justify-content:center;min-height:100dvh;padding:1.5rem;overflow:hidden}
|
|
271
|
+
.c{text-align:center;max-width:480px;width:100%;animation:fade-up .6s ease-out both}
|
|
272
|
+
.video-wrap{position:relative;width:200px;height:200px;margin:0 auto 1.4rem;display:flex;align-items:center;justify-content:center}
|
|
273
|
+
.video-wrap::before{content:'';position:absolute;inset:-20px;background:radial-gradient(circle,rgba(1,102,255,0.18) 0%,transparent 60%);filter:blur(20px);animation:glow 3s ease-in-out infinite}
|
|
274
|
+
.video-wrap video{position:relative;width:100%;height:100%;object-fit:contain;pointer-events:none;border-radius:50%}
|
|
275
|
+
h1{font-size:1.55rem;font-weight:700;margin-bottom:.6rem;background:linear-gradient(135deg,#0166FF,#009AFE,#4AEEFF);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}
|
|
276
|
+
p{color:#a1a1aa;line-height:1.6;margin-bottom:.5rem;font-size:.95rem}
|
|
277
|
+
.lead{color:#e4e4e7;font-size:1rem}
|
|
278
|
+
.actions{margin-top:1.3rem}
|
|
279
|
+
button{font:inherit;cursor:pointer;border-radius:10px;padding:.65rem 1.2rem;font-size:.9rem;font-weight:600;border:none;background:linear-gradient(135deg,#0166FF,#0069FE);color:#fff;transition:filter .15s}
|
|
280
|
+
button:hover{filter:brightness(1.12)}
|
|
281
|
+
.sub{font-size:.82rem;color:#71717a;display:inline-flex;align-items:center;gap:.5rem;background:#18181b;border:1px solid #27272a;border-radius:9999px;padding:.35rem .9rem;margin-top:1.1rem}
|
|
282
|
+
.sub .dot{width:8px;height:8px;border-radius:50%;background:linear-gradient(135deg,#0166FF,#009AFE);box-shadow:0 0 8px rgba(1,102,255,.6);animation:pulse 1.6s ease-in-out infinite}
|
|
283
|
+
.badge{display:block;font-size:.7rem;color:#52525b;margin-top:1.3rem}
|
|
284
|
+
@keyframes pulse{0%,100%{opacity:1;transform:scale(1)}50%{opacity:.45;transform:scale(.85)}}
|
|
285
|
+
@keyframes glow{0%,100%{opacity:.55;transform:scale(1)}50%{opacity:1;transform:scale(1.08)}}
|
|
286
|
+
@keyframes fade-up{0%{opacity:0;transform:translateY(12px)}100%{opacity:1;transform:translateY(0)}}
|
|
287
|
+
</style></head>
|
|
288
|
+
<body><div class="c">
|
|
289
|
+
<div class="video-wrap"><video autoplay loop muted playsinline>
|
|
290
|
+
<source src="/what-happened.webm" type="video/webm">
|
|
291
|
+
<source src="/what-happened.mp4" type="video/mp4">
|
|
292
|
+
</video></div>
|
|
293
|
+
<h1>Your app's backend is down</h1>
|
|
294
|
+
<p class="lead">The workspace server crashed and couldn't restart on its own.</p>
|
|
295
|
+
<p>Ask your agent to fix it — the chat is right here in the corner. Tap below to copy the logs so it can debug faster.</p>
|
|
296
|
+
<div class="actions"><button id="copyBtn">Copy logs for your agent</button></div>
|
|
297
|
+
<div class="sub"><span class="dot"></span><span id="statusText">Watching for recovery…</span></div>
|
|
298
|
+
<span class="badge">Powered by Bloby</span>
|
|
299
|
+
</div>
|
|
300
|
+
<script>
|
|
301
|
+
(function(){
|
|
302
|
+
var LOGS = ${logs};
|
|
303
|
+
var btn = document.getElementById('copyBtn'), statusEl = document.getElementById('statusText');
|
|
304
|
+
btn.addEventListener('click', function(){
|
|
305
|
+
var text = 'My workspace backend crashed and will not start. Find and fix the root cause. Last backend logs:\\n\\n' + LOGS;
|
|
306
|
+
function ok(){ btn.textContent = '✓ Copied — paste it to your agent'; setTimeout(function(){ btn.textContent = 'Copy logs for your agent'; }, 2600); }
|
|
307
|
+
function fallback(){ var ta=document.createElement('textarea'); ta.value=text; ta.style.position='fixed'; ta.style.opacity='0'; document.body.appendChild(ta); ta.select(); try{ document.execCommand('copy'); ok(); }catch(e){ btn.textContent='Copy failed — open the logs manually'; } document.body.removeChild(ta); }
|
|
308
|
+
if (navigator.clipboard && navigator.clipboard.writeText) { navigator.clipboard.writeText(text).then(ok).catch(fallback); } else { fallback(); }
|
|
309
|
+
});
|
|
310
|
+
var attempt = 0;
|
|
311
|
+
function retry(){
|
|
312
|
+
attempt++;
|
|
313
|
+
fetch('/__bloby/backend-status', { cache:'no-store' })
|
|
314
|
+
.then(function(r){ return r.json(); })
|
|
315
|
+
.then(function(s){ if (s && s.alive) { location.reload(); } else { schedule(); } })
|
|
316
|
+
.catch(schedule);
|
|
317
|
+
}
|
|
318
|
+
function schedule(){ statusEl.textContent = 'Watching for recovery… (checked ' + attempt + 'x)'; setTimeout(retry, Math.min(4000, 1500 + attempt*250)); }
|
|
319
|
+
setTimeout(retry, 2500);
|
|
320
|
+
})();
|
|
321
|
+
</script>
|
|
322
|
+
<script src="/bloby/widget.js"></script>
|
|
323
|
+
</body></html>`;
|
|
324
|
+
}
|
|
325
|
+
|
|
253
326
|
/** Kill any stale process holding a port. Ensures clean startup after crashes/updates. */
|
|
254
327
|
function killPort(port: number): void {
|
|
255
328
|
try {
|
|
@@ -464,6 +537,15 @@ export async function startSupervisor() {
|
|
|
464
537
|
return;
|
|
465
538
|
}
|
|
466
539
|
|
|
540
|
+
// Backend liveness for the "backend down" interstitial's recovery poll. Supervisor-served
|
|
541
|
+
// (not proxied) so it answers even when the workspace backend is dead, and independent of
|
|
542
|
+
// whatever routes the user's backend happens to define.
|
|
543
|
+
if (req.url === '/__bloby/backend-status') {
|
|
544
|
+
res.writeHead(200, { 'Content-Type': 'application/json', 'Cache-Control': 'no-store' });
|
|
545
|
+
res.end(JSON.stringify({ alive: isBackendAlive(), dead: isBackendDead() }));
|
|
546
|
+
return;
|
|
547
|
+
}
|
|
548
|
+
|
|
467
549
|
// App API routes → proxy to user's backend server
|
|
468
550
|
if (req.url?.startsWith('/app/api')) {
|
|
469
551
|
const backendPath = req.url.replace(/^\/app/, '');
|
|
@@ -1271,8 +1353,7 @@ mint();
|
|
|
1271
1353
|
const result = await handleAgentQuery(parsed);
|
|
1272
1354
|
|
|
1273
1355
|
if (result.usedFileTools) {
|
|
1274
|
-
|
|
1275
|
-
stopBackend().then(() => spawnBackend(backendPort));
|
|
1356
|
+
void doRestart();
|
|
1276
1357
|
broadcastBloby('app:hmr-update', {});
|
|
1277
1358
|
}
|
|
1278
1359
|
|
|
@@ -1697,6 +1778,24 @@ mint();
|
|
|
1697
1778
|
} catch { /* fall through to Vite */ }
|
|
1698
1779
|
}
|
|
1699
1780
|
|
|
1781
|
+
// Workspace backend has crash-looped and given up → serve the "backend down" interstitial
|
|
1782
|
+
// for dashboard DOCUMENT navigations, instead of proxying to Vite (which serves the user's
|
|
1783
|
+
// SPA that then 503s on every /app/api call and, for the common workspace-lock template,
|
|
1784
|
+
// misreads the dead backend as "no password set" and shows the lock-setup screen). Scoped to
|
|
1785
|
+
// top-level navigations only (not assets/HMR/XHR) and only when the backend has truly given
|
|
1786
|
+
// up — never during a normal 1–2s restart. The chat PWA (/bloby/*) is served earlier and is
|
|
1787
|
+
// unaffected.
|
|
1788
|
+
const wantsHtml = req.method === 'GET' && (
|
|
1789
|
+
req.headers['sec-fetch-dest'] === 'document' ||
|
|
1790
|
+
req.headers['sec-fetch-mode'] === 'navigate' ||
|
|
1791
|
+
(!req.headers['sec-fetch-dest'] && String(req.headers['accept'] || '').includes('text/html'))
|
|
1792
|
+
);
|
|
1793
|
+
if (wantsHtml && isBackendDead()) {
|
|
1794
|
+
res.writeHead(503, { 'Content-Type': 'text/html', 'Cache-Control': 'no-store, no-cache, must-revalidate' });
|
|
1795
|
+
res.end(backendDownPage(readBackendLogTail(100)));
|
|
1796
|
+
return;
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1700
1799
|
// Everything else → proxy to dashboard Vite dev server
|
|
1701
1800
|
console.log(`[supervisor] → dashboard Vite :${vitePorts.dashboard} | ${req.method} ${(req.url || '').split('?')[0]}`);
|
|
1702
1801
|
const proxy = http.request(
|
|
@@ -1931,11 +2030,8 @@ mint();
|
|
|
1931
2030
|
currentStreamBuffer = '';
|
|
1932
2031
|
|
|
1933
2032
|
if (eventData.usedFileTools || pendingBackendRestart) {
|
|
1934
|
-
log.info('[orchestrator] Restarting backend (file tools used)');
|
|
1935
|
-
|
|
1936
|
-
if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
|
|
1937
|
-
resetBackendRestarts();
|
|
1938
|
-
stopBackend().then(() => spawnBackend(backendPort));
|
|
2033
|
+
log.info('[orchestrator] Restarting backend (file tools used / pending watcher change)');
|
|
2034
|
+
void doRestart();
|
|
1939
2035
|
}
|
|
1940
2036
|
if (pendingUpdate) {
|
|
1941
2037
|
pendingUpdate = false;
|
|
@@ -2536,11 +2632,7 @@ mint();
|
|
|
2536
2632
|
startScheduler({
|
|
2537
2633
|
broadcastBloby,
|
|
2538
2634
|
workerApi,
|
|
2539
|
-
restartBackend:
|
|
2540
|
-
resetBackendRestarts();
|
|
2541
|
-
await stopBackend();
|
|
2542
|
-
spawnBackend(backendPort);
|
|
2543
|
-
},
|
|
2635
|
+
restartBackend: () => doRestart(),
|
|
2544
2636
|
getModel: () => loadConfig().ai.model,
|
|
2545
2637
|
});
|
|
2546
2638
|
|
|
@@ -2548,11 +2640,7 @@ mint();
|
|
|
2548
2640
|
const channelManager = new ChannelManager({
|
|
2549
2641
|
broadcastBloby,
|
|
2550
2642
|
workerApi,
|
|
2551
|
-
restartBackend:
|
|
2552
|
-
resetBackendRestarts();
|
|
2553
|
-
await stopBackend();
|
|
2554
|
-
spawnBackend(backendPort);
|
|
2555
|
-
},
|
|
2643
|
+
restartBackend: () => doRestart(),
|
|
2556
2644
|
getModel: () => loadConfig().ai.model,
|
|
2557
2645
|
});
|
|
2558
2646
|
|
|
@@ -2586,21 +2674,39 @@ mint();
|
|
|
2586
2674
|
const backendDir = path.join(workspaceDir, 'backend');
|
|
2587
2675
|
let backendRestartTimer: ReturnType<typeof setTimeout> | null = null;
|
|
2588
2676
|
|
|
2677
|
+
/** Single funnel for every DELIBERATE backend restart (file watcher, turn-complete, agent-api
|
|
2678
|
+
* one-shot, scheduler pulse, channel manager). Clears the deferred-restart flag and the
|
|
2679
|
+
* debounce timer, then delegates to backend.ts's serialized + coalescing restartBackend so
|
|
2680
|
+
* concurrent triggers can never double-spawn onto the contended port. */
|
|
2681
|
+
function doRestart(): Promise<void> {
|
|
2682
|
+
pendingBackendRestart = false;
|
|
2683
|
+
if (backendRestartTimer) { clearTimeout(backendRestartTimer); backendRestartTimer = null; }
|
|
2684
|
+
return restartBackend(backendPort);
|
|
2685
|
+
}
|
|
2686
|
+
|
|
2687
|
+
/** True while any surface is mid-turn. Dashboard chat sets agentQueryActive; WhatsApp/Alexa
|
|
2688
|
+
* turns instead set the harness conv.busy (they don't touch agentQueryActive), so we must
|
|
2689
|
+
* check both — otherwise an agent editing the backend over a channel would get the backend
|
|
2690
|
+
* restarted out from under it mid-turn. */
|
|
2691
|
+
const aTurnIsActive = () => agentQueryActive || anyConversationBusy();
|
|
2692
|
+
|
|
2589
2693
|
function scheduleBackendRestart(reason: string) {
|
|
2590
|
-
if (
|
|
2591
|
-
//
|
|
2694
|
+
if (aTurnIsActive()) {
|
|
2695
|
+
// A turn is working — don't restart now; flush at turn-complete (createSharedChatOnMessage)
|
|
2696
|
+
// or via the channel manager's own post-turn restart.
|
|
2592
2697
|
pendingBackendRestart = true;
|
|
2593
2698
|
return;
|
|
2594
2699
|
}
|
|
2595
|
-
// Skip if a stop/restart is already in progress (
|
|
2700
|
+
// Skip if a stop/restart is already in progress (that restart owns the spawn).
|
|
2596
2701
|
if (isBackendStopping()) return;
|
|
2597
2702
|
if (backendRestartTimer) clearTimeout(backendRestartTimer);
|
|
2598
|
-
backendRestartTimer = setTimeout(
|
|
2599
|
-
|
|
2703
|
+
backendRestartTimer = setTimeout(() => {
|
|
2704
|
+
backendRestartTimer = null;
|
|
2705
|
+
// Re-check at fire time: a turn may have started during the 1s debounce window.
|
|
2706
|
+
if (aTurnIsActive()) { pendingBackendRestart = true; return; }
|
|
2707
|
+
if (isBackendStopping()) return;
|
|
2600
2708
|
log.info(`[watcher] ${reason} — restarting backend...`);
|
|
2601
|
-
|
|
2602
|
-
await stopBackend();
|
|
2603
|
-
spawnBackend(backendPort);
|
|
2709
|
+
void doRestart();
|
|
2604
2710
|
}, 1000);
|
|
2605
2711
|
}
|
|
2606
2712
|
|
|
@@ -2610,12 +2716,22 @@ mint();
|
|
|
2610
2716
|
scheduleBackendRestart(`Backend file changed: ${filename}`);
|
|
2611
2717
|
});
|
|
2612
2718
|
|
|
2613
|
-
// Watch workspace root for .env
|
|
2719
|
+
// Watch workspace root for .env, dependency, and .restart/.update changes
|
|
2614
2720
|
const workspaceWatcher = fs.watch(workspaceDir, (_event, filename) => {
|
|
2615
2721
|
if (!filename) return;
|
|
2616
2722
|
if (filename === '.env') {
|
|
2617
2723
|
scheduleBackendRestart('.env changed');
|
|
2618
2724
|
}
|
|
2725
|
+
if (filename === 'package.json' || filename === 'package-lock.json') {
|
|
2726
|
+
// The agent ran `npm install` to add/fix a backend dependency. Neither watcher otherwise
|
|
2727
|
+
// covers workspace-root deps (backendWatcher only watches backend/; node_modules is huge
|
|
2728
|
+
// and intentionally unwatched). Without this, an install done to fix an ENOENT crash — where
|
|
2729
|
+
// the import already exists so no Write tool fires and usedFileTools stays false — never
|
|
2730
|
+
// restarts the backend, leaving it broken until some unrelated edit. npm install runs inside
|
|
2731
|
+
// the agent's turn, so this defers (like every trigger) and lands at turn-complete, after
|
|
2732
|
+
// the install has fully written package.json + node_modules.
|
|
2733
|
+
scheduleBackendRestart(`workspace dependencies changed (${filename})`);
|
|
2734
|
+
}
|
|
2619
2735
|
if (filename === '.restart') {
|
|
2620
2736
|
// Consume the trigger file
|
|
2621
2737
|
try { fs.unlinkSync(path.join(workspaceDir, '.restart')); } catch {}
|
|
Binary file
|
|
Binary file
|