bloby-bot 0.65.3 → 0.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/supervisor/agents/prompts/coder.txt +3 -1
- package/supervisor/backend.ts +46 -3
- package/supervisor/bloby-agent.ts +4 -0
- package/supervisor/channels/manager.ts +7 -0
- package/supervisor/frontend-log.ts +80 -0
- package/supervisor/harnesses/claude.ts +7 -0
- package/supervisor/harnesses/codex.ts +6 -0
- package/supervisor/harnesses/pi/index.ts +6 -0
- package/supervisor/harnesses/types.ts +4 -0
- package/supervisor/index.ts +308 -36
- package/supervisor/scheduler.ts +4 -1
- package/supervisor/vite-dev.ts +30 -2
- package/supervisor/workspace-guard.js +89 -3
- package/worker/prompts/bloby-system-prompt-codex.txt +13 -8
- package/worker/prompts/bloby-system-prompt-pi.txt +13 -8
- package/worker/prompts/bloby-system-prompt.txt +13 -8
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bloby-bot",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.66.0",
|
|
4
4
|
"releaseNotes": [
|
|
5
5
|
"1. Fix: image (and audio) attachments now render in chat again — /api/files is fetched with the auth token instead of a raw <img> src that 401'd after the endpoint hardening",
|
|
6
6
|
"2. Affects chat thumbnails, the image lightbox, voice-note playback, and agent image cards",
|
|
@@ -105,7 +105,9 @@ The supervisor manages the backend process:
|
|
|
105
105
|
- Editing `.ts`, `.js`, or `.json` files in `backend/` → auto-restart
|
|
106
106
|
- Editing `.env` → auto-restart
|
|
107
107
|
- After your turn ends, if you used Write or Edit tools → auto-restart
|
|
108
|
-
- The backend does NOT restart mid-turn — edits are batched
|
|
108
|
+
- The backend does NOT auto-restart mid-turn — edits are batched (multi-file changes apply atomically)
|
|
109
|
+
- To restart and verify a fix WITHIN your turn (after edits are saved): `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/restart-backend -d '{"wait":true}'` — it waits for the backend to be healthy and returns `{"healthy":...,"logs":"..."}`, so you can then curl the backend to confirm the fix
|
|
110
|
+
- Read backend logs: `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200`
|
|
109
111
|
|
|
110
112
|
**NEVER** kill processes, run `bloby start`, or run `npm start` directly.
|
|
111
113
|
|
package/supervisor/backend.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { spawn, type ChildProcess } from 'child_process';
|
|
2
|
+
import http from 'http';
|
|
2
3
|
import fs from 'fs';
|
|
3
4
|
import path from 'path';
|
|
4
5
|
import { PKG_DIR, WORKSPACE_DIR } from '../shared/paths.js';
|
|
@@ -39,6 +40,10 @@ export function setBackendGiveUpHandler(fn: () => void): void {
|
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
const LOG_FILE = path.join(WORKSPACE_DIR, '.backend.log');
|
|
43
|
+
// Holds the LAST crashed run's output. spawnBackend truncates LOG_FILE on every (re)spawn, so an
|
|
44
|
+
// agent reading .backend.log right after an auto-restart would otherwise see only the fresh (often
|
|
45
|
+
// empty) run and lose the originating error. The crash exit handler copies LOG_FILE here first.
|
|
46
|
+
const LOG_FILE_PREV = LOG_FILE + '.prev';
|
|
42
47
|
|
|
43
48
|
export function getBackendPort(basePort: number): number {
|
|
44
49
|
return basePort + 4;
|
|
@@ -114,6 +119,11 @@ export function spawnBackend(port: number): ChildProcess {
|
|
|
114
119
|
// Supervisor called stopBackend() — don't auto-restart
|
|
115
120
|
if (intentionallyStopped) return;
|
|
116
121
|
|
|
122
|
+
// Preserve the just-crashed run's output before the next spawnBackend truncates LOG_FILE, so a
|
|
123
|
+
// post-bounce read (agent or interstitial) can still fetch the originating error via ?prev=1.
|
|
124
|
+
// Only crashes reach here (intentional stops returned above), so .prev always holds the last crash.
|
|
125
|
+
try { fs.copyFileSync(LOG_FILE, LOG_FILE_PREV); } catch {}
|
|
126
|
+
|
|
117
127
|
// Any unexpected exit (crash, SIGTERM, OOM, null code) — restart
|
|
118
128
|
log.warn(`Backend exited unexpectedly (code ${code})`);
|
|
119
129
|
// Track crashes in a rolling window (backstop for the 30s-reset crash-loop hole).
|
|
@@ -216,10 +226,12 @@ export function isBackendDead(): boolean {
|
|
|
216
226
|
return gaveUp;
|
|
217
227
|
}
|
|
218
228
|
|
|
219
|
-
/** Read the tail of the backend log (default 100 lines) for the "copy logs" debug helper
|
|
220
|
-
|
|
229
|
+
/** Read the tail of the backend log (default 100 lines) for the "copy logs" debug helper and the
|
|
230
|
+
* agent's GET /__bloby/control/logs/backend endpoint. Pass prev=true to read the last CRASHED run
|
|
231
|
+
* (.backend.log.prev) — useful right after an auto-restart, when the live log is a fresh run. */
|
|
232
|
+
export function readBackendLogTail(maxLines = 100, prev = false): string {
|
|
221
233
|
try {
|
|
222
|
-
const text = fs.readFileSync(LOG_FILE, 'utf-8');
|
|
234
|
+
const text = fs.readFileSync(prev ? LOG_FILE_PREV : LOG_FILE, 'utf-8');
|
|
223
235
|
const lines = text.split('\n');
|
|
224
236
|
return lines.slice(-maxLines).join('\n').trim();
|
|
225
237
|
} catch {
|
|
@@ -227,6 +239,37 @@ export function readBackendLogTail(maxLines = 100): string {
|
|
|
227
239
|
}
|
|
228
240
|
}
|
|
229
241
|
|
|
242
|
+
/** True if the backend was (re)spawned within the last ~2s — so callers can tell the agent that a
|
|
243
|
+
* near-empty log tail is a fresh-spawn artifact, not the absence of an error. */
|
|
244
|
+
export function backendJustSpawned(): boolean {
|
|
245
|
+
return Date.now() - lastSpawnTime < 2000;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/** Resolve true as soon as the backend's HTTP port is ACCEPTING connections (any response — even a
|
|
249
|
+
* 404 — means the port is bound and serving), false if it never comes up within timeoutMs or the
|
|
250
|
+
* backend gives up first. This is the REAL readiness signal that the restart-and-verify endpoint
|
|
251
|
+
* returns to the agent: isBackendAlive() only means the child process was spawned, not that it has
|
|
252
|
+
* bound its port, so it reports "alive" during the startup window when requests still 503. */
|
|
253
|
+
export function probeBackendReady(port: number, timeoutMs = 15000): Promise<boolean> {
|
|
254
|
+
const deadline = Date.now() + timeoutMs;
|
|
255
|
+
return new Promise((resolve) => {
|
|
256
|
+
const attempt = () => {
|
|
257
|
+
if (gaveUp) return resolve(false); // crash-looped past the limit — it's not coming up
|
|
258
|
+
const req = http.request(
|
|
259
|
+
{ host: '127.0.0.1', port, path: '/', method: 'GET', timeout: 2000 },
|
|
260
|
+
(res) => { res.resume(); resolve(true); }, // any HTTP response = port is listening
|
|
261
|
+
);
|
|
262
|
+
req.on('error', () => {
|
|
263
|
+
if (Date.now() >= deadline) return resolve(false);
|
|
264
|
+
setTimeout(attempt, 250);
|
|
265
|
+
});
|
|
266
|
+
req.on('timeout', () => { try { req.destroy(); } catch {} }); // → 'error' → retry/deadline
|
|
267
|
+
req.end();
|
|
268
|
+
};
|
|
269
|
+
attempt();
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
|
|
230
273
|
export function isBackendStopping(): boolean {
|
|
231
274
|
return stopPromise !== null;
|
|
232
275
|
}
|
|
@@ -88,6 +88,10 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
88
88
|
|
|
89
89
|
/** True if ANY conversation in ANY harness is mid-turn. Lets the supervisor defer backend
|
|
90
90
|
* restarts during channel/Alexa turns, which don't set the dashboard's agentQueryActive flag. */
|
|
91
|
+
export function anyOneShotActive(): boolean {
|
|
92
|
+
return Object.values(HARNESSES).some((h) => h.anyOneShotActive());
|
|
93
|
+
}
|
|
94
|
+
|
|
91
95
|
export function anyConversationBusy(): boolean {
|
|
92
96
|
return Object.values(HARNESSES).some((h) => h.anyConversationBusy());
|
|
93
97
|
}
|
|
@@ -54,6 +54,8 @@ interface ChannelManagerOpts {
|
|
|
54
54
|
workerApi: (path: string, method?: string, body?: any) => Promise<any>;
|
|
55
55
|
restartBackend: () => void;
|
|
56
56
|
getModel: () => string;
|
|
57
|
+
/** Fired after a channel turn ends — the supervisor uses it to flush a queued self-update. */
|
|
58
|
+
onTurnComplete?: () => void;
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
interface ActiveAgentQuery {
|
|
@@ -1063,6 +1065,7 @@ export class ChannelManager {
|
|
|
1063
1065
|
// the dashboard's typing indicator would stay on forever.
|
|
1064
1066
|
if (type === 'bot:turn-complete') {
|
|
1065
1067
|
if (eventData.usedFileTools) this.opts.restartBackend();
|
|
1068
|
+
this.opts.onTurnComplete?.(); // flush a queued self-update after a channel turn
|
|
1066
1069
|
broadcastBloby('bot:idle', { conversationId: convId });
|
|
1067
1070
|
return;
|
|
1068
1071
|
}
|
|
@@ -1071,6 +1074,7 @@ export class ChannelManager {
|
|
|
1071
1074
|
// conversation under the same convId starts clean.
|
|
1072
1075
|
if (type === 'bot:conversation-ended') {
|
|
1073
1076
|
this.clearRoutes(convId);
|
|
1077
|
+
this.opts.onTurnComplete?.(); // flush a queued self-update if this turn ended by exception
|
|
1074
1078
|
return;
|
|
1075
1079
|
}
|
|
1076
1080
|
|
|
@@ -1216,12 +1220,14 @@ export class ChannelManager {
|
|
|
1216
1220
|
|
|
1217
1221
|
if (type === 'bot:turn-complete') {
|
|
1218
1222
|
if (eventData.usedFileTools) this.opts.restartBackend();
|
|
1223
|
+
this.opts.onTurnComplete?.(); // flush a queued self-update after a channel turn
|
|
1219
1224
|
broadcastBloby('bot:idle', { conversationId: convId });
|
|
1220
1225
|
return;
|
|
1221
1226
|
}
|
|
1222
1227
|
|
|
1223
1228
|
if (type === 'bot:conversation-ended') {
|
|
1224
1229
|
this.clearRoutes(convId);
|
|
1230
|
+
this.opts.onTurnComplete?.(); // flush a queued self-update if this turn ended by exception
|
|
1225
1231
|
return;
|
|
1226
1232
|
}
|
|
1227
1233
|
|
|
@@ -1375,6 +1381,7 @@ export class ChannelManager {
|
|
|
1375
1381
|
if (type === 'bot:done') {
|
|
1376
1382
|
this.activeAgents.delete(agentKey);
|
|
1377
1383
|
if (eventData.usedFileTools) this.opts.restartBackend();
|
|
1384
|
+
this.opts.onTurnComplete?.(); // flush a queued self-update after a channel turn
|
|
1378
1385
|
this.processQueue();
|
|
1379
1386
|
}
|
|
1380
1387
|
},
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server-side frontend log ring — the data source behind GET /__bloby/control/logs/frontend
|
|
3
|
+
* (the agent's "tail frontend / devtools logs") and the friendly "Copy error" flow.
|
|
4
|
+
*
|
|
5
|
+
* Two independent producers feed ONE in-memory ring, so the tail is never empty regardless of
|
|
6
|
+
* how the frontend broke:
|
|
7
|
+
* 1. The Vite dev server's customLogger (supervisor/vite-dev.ts) — COMPILE/transform errors,
|
|
8
|
+
* captured even when the browser never ran a line of JS (hard compile failure / blank page).
|
|
9
|
+
* 2. The browser (supervisor/workspace-guard.js) POSTing window.onerror / unhandledrejection /
|
|
10
|
+
* console.error / console.warn / Vite-overlay text to POST /__bloby/control/fe-log — RUNTIME
|
|
11
|
+
* errors, which Vite never sees.
|
|
12
|
+
*
|
|
13
|
+
* Memory-only by design: the agent reads it over the loopback endpoint (no workspace file to grow
|
|
14
|
+
* unbounded, pollute the dir, or self-trigger Vite's watcher). It is the current session's frontend
|
|
15
|
+
* error trail; a supervisor restart clears it (frontend errors are transient by nature).
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
export type FrontendLogKind =
|
|
19
|
+
| 'error'
|
|
20
|
+
| 'unhandledrejection'
|
|
21
|
+
| 'console.error'
|
|
22
|
+
| 'console.warn'
|
|
23
|
+
| 'vite-error'
|
|
24
|
+
| 'vite-warn'
|
|
25
|
+
| 'vite-overlay';
|
|
26
|
+
|
|
27
|
+
export interface FrontendLogEntry {
|
|
28
|
+
t: number;
|
|
29
|
+
kind: FrontendLogKind;
|
|
30
|
+
text: string;
|
|
31
|
+
stack?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const RING_MAX = 500;
|
|
35
|
+
const TEXT_CAP = 4000; // per-field clamp so one giant stack can't blow the ring's memory
|
|
36
|
+
|
|
37
|
+
const ring: FrontendLogEntry[] = [];
|
|
38
|
+
|
|
39
|
+
// Collapse the same message arriving repeatedly in a short window. The guard re-evaluates the Vite
|
|
40
|
+
// overlay on a 1.5s tick, and a crash loop can spam identical errors — without this the ring fills
|
|
41
|
+
// with one repeated line and pushes out the useful history.
|
|
42
|
+
let lastKey = '';
|
|
43
|
+
let lastAt = 0;
|
|
44
|
+
|
|
45
|
+
/** Append one frontend log entry to the ring. Best-effort, never throws, drops empty text.
|
|
46
|
+
* text is newline-stripped: the browser-facing POST /__bloby/control/fe-log endpoint is
|
|
47
|
+
* unauthenticated, and tailFrontendLog renders one entry per line — an embedded newline would let a
|
|
48
|
+
* remote caller forge a fake `<ts> [kind] ...` line that the (Bash-capable) agent reads as genuine.
|
|
49
|
+
* Collapsing newlines to a marker keeps each entry to exactly one line. (stack keeps its newlines:
|
|
50
|
+
* the renderer indents every stack line, so it can't masquerade as an un-indented log header.) */
|
|
51
|
+
export function appendFrontendLog(kind: FrontendLogKind, text: string, stack?: string): void {
|
|
52
|
+
const clean = (text == null ? '' : String(text)).slice(0, TEXT_CAP).replace(/[\r\n]+/g, ' ⏎ ').trim();
|
|
53
|
+
if (!clean) return;
|
|
54
|
+
const stk = stack ? String(stack).slice(0, TEXT_CAP) : undefined;
|
|
55
|
+
|
|
56
|
+
const key = kind + '|' + clean;
|
|
57
|
+
const now = Date.now();
|
|
58
|
+
if (key === lastKey && now - lastAt < 4000) { lastAt = now; return; }
|
|
59
|
+
lastKey = key;
|
|
60
|
+
lastAt = now;
|
|
61
|
+
|
|
62
|
+
ring.push({ t: now, kind, text: clean, stack: stk });
|
|
63
|
+
while (ring.length > RING_MAX) ring.shift();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/** Render the last `maxLines` ring lines as text (newest last). Each entry is one header line
|
|
67
|
+
* (`<ISO ts> [kind] text`) plus optional indented stack lines. */
|
|
68
|
+
export function tailFrontendLog(maxLines = 100): string {
|
|
69
|
+
const lines: string[] = [];
|
|
70
|
+
for (const e of ring) {
|
|
71
|
+
lines.push(`${new Date(e.t).toISOString()} [${e.kind}] ${e.text}`);
|
|
72
|
+
if (e.stack) lines.push(' ' + e.stack.replace(/\n/g, '\n '));
|
|
73
|
+
}
|
|
74
|
+
return lines.slice(-Math.max(0, maxLines)).join('\n');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Number of entries currently buffered (surfaced as `clients`-independent count). */
|
|
78
|
+
export function getFrontendLogCount(): number {
|
|
79
|
+
return ring.length;
|
|
80
|
+
}
|
|
@@ -550,6 +550,13 @@ export function anyConversationBusy(): boolean {
|
|
|
550
550
|
return false;
|
|
551
551
|
}
|
|
552
552
|
|
|
553
|
+
/** True while any one-shot startBlobyAgentQuery (pulse/cron, customer WhatsApp) is in flight.
|
|
554
|
+
* These register only in activeQueries (cleared in a finally), not liveConversations, so
|
|
555
|
+
* anyConversationBusy() can't see them. */
|
|
556
|
+
export function anyOneShotActive(): boolean {
|
|
557
|
+
return activeQueries.size > 0;
|
|
558
|
+
}
|
|
559
|
+
|
|
553
560
|
/** Stop a specific background sub-agent task */
|
|
554
561
|
export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
|
|
555
562
|
const conv = liveConversations.get(conversationId);
|
|
@@ -887,6 +887,12 @@ export function isConversationBusy(conversationId: string): boolean {
|
|
|
887
887
|
|
|
888
888
|
/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
|
|
889
889
|
* backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
|
|
890
|
+
/** Codex one-shot queries (startBlobyAgentQuery) reuse the conversations map and set conv.busy via
|
|
891
|
+
* startTurn, so anyConversationBusy() already covers them — nothing extra to track here. */
|
|
892
|
+
export function anyOneShotActive(): boolean {
|
|
893
|
+
return false;
|
|
894
|
+
}
|
|
895
|
+
|
|
890
896
|
export function anyConversationBusy(): boolean {
|
|
891
897
|
for (const c of conversations.values()) if (c.busy) return true;
|
|
892
898
|
return false;
|
|
@@ -344,6 +344,12 @@ export async function warmUpForLiveConversation(
|
|
|
344
344
|
|
|
345
345
|
const activeQueries = new Map<string, AbortController>();
|
|
346
346
|
|
|
347
|
+
/** True while any one-shot startBlobyAgentQuery is in flight (cleared in a finally). These don't
|
|
348
|
+
* register as live conversations, so anyConversationBusy() can't see them. */
|
|
349
|
+
export function anyOneShotActive(): boolean {
|
|
350
|
+
return activeQueries.size > 0;
|
|
351
|
+
}
|
|
352
|
+
|
|
347
353
|
/**
|
|
348
354
|
* One-shot text query — used by customer WhatsApp + scheduler. Uses the
|
|
349
355
|
* provider stream directly (no async queue), drains it, emits the same
|
|
@@ -59,6 +59,10 @@ export interface Harness {
|
|
|
59
59
|
isConversationBusy(conversationId: string): boolean;
|
|
60
60
|
/** True if ANY conversation in this harness is mid-turn (no id — used to defer backend restarts). */
|
|
61
61
|
anyConversationBusy(): boolean;
|
|
62
|
+
/** True if ANY one-shot query (startBlobyAgentQuery: pulse/cron, customer WhatsApp) is in flight.
|
|
63
|
+
* These do NOT register as live conversations, so anyConversationBusy() can't see them — the
|
|
64
|
+
* supervisor ORs this in so a queued self-update / backend restart defers past one-shot turns too. */
|
|
65
|
+
anyOneShotActive(): boolean;
|
|
62
66
|
stopSubAgentTask(conversationId: string, taskId: string): Promise<void>;
|
|
63
67
|
warmUpForLiveConversation(
|
|
64
68
|
model: string,
|
package/supervisor/index.ts
CHANGED
|
@@ -11,12 +11,13 @@ import { log } from '../shared/logger.js';
|
|
|
11
11
|
import { startTunnel, stopTunnel, isTunnelAlive, restartTunnel, startNamedTunnel, restartNamedTunnel } from './tunnel.js';
|
|
12
12
|
import { createWorkerApp } from '../worker/index.js';
|
|
13
13
|
import { closeDb, getSession, getSetting } from '../worker/db.js';
|
|
14
|
-
import { spawnBackend, stopBackend, restartBackend, getBackendPort, isBackendAlive, isBackendStopping, isBackendDead, readBackendLogTail, setBackendEnv, setBackendGiveUpHandler } from './backend.js';
|
|
14
|
+
import { spawnBackend, stopBackend, restartBackend, getBackendPort, isBackendAlive, isBackendStopping, isBackendDead, readBackendLogTail, setBackendEnv, setBackendGiveUpHandler, probeBackendReady, backendJustSpawned } from './backend.js';
|
|
15
|
+
import { appendFrontendLog, tailFrontendLog, getFrontendLogCount, type FrontendLogKind } from './frontend-log.js';
|
|
15
16
|
import { handleAgentQuery, type AgentQueryRequest } from './agent-api.js';
|
|
16
17
|
import { updateTunnelUrl, startHeartbeat, stopHeartbeat, disconnect } from '../shared/relay.js';
|
|
17
18
|
import {
|
|
18
19
|
startConversation, hasConversation, endConversation, endAllConversations,
|
|
19
|
-
isConversationBusy, anyConversationBusy, stopSubAgentTask,
|
|
20
|
+
isConversationBusy, anyConversationBusy, anyOneShotActive, stopSubAgentTask,
|
|
20
21
|
startBlobyAgentQuery, stopBlobyAgentQuery,
|
|
21
22
|
warmUpForLiveConversation,
|
|
22
23
|
type RecentMessage,
|
|
@@ -44,6 +45,24 @@ process.on('unhandledRejection', (reason) => {
|
|
|
44
45
|
const DIST_BLOBY = path.join(PKG_DIR, 'dist-bloby');
|
|
45
46
|
const SUPERVISOR_PUBLIC = path.join(PKG_DIR, 'supervisor', 'public');
|
|
46
47
|
|
|
48
|
+
// Self-update coordination. The marker persists a queued update across a supervisor restart that
|
|
49
|
+
// happens between the agent's request and the turn-complete flush (in-memory pendingUpdate alone
|
|
50
|
+
// would be lost). attempts + a TTL bound the boot-resume retry so a persistently-failing update
|
|
51
|
+
// can't loop on every boot. See queueUpdate/flushPendingUpdate/runDeferredUpdate.
|
|
52
|
+
const UPDATE_MARKER = path.join(DATA_DIR, '.update-pending');
|
|
53
|
+
const UPDATE_MAX_ATTEMPTS = 2;
|
|
54
|
+
const UPDATE_MARKER_TTL_MS = 30 * 60_000; // 30 min — a marker older than this is cleared, not retried
|
|
55
|
+
|
|
56
|
+
/** True for the loopback, non-tunnel requests the local agent makes to the /__bloby/control/* and
|
|
57
|
+
* channel-mutation endpoints. Identical trust model to the channel mutation guard: cloudflared
|
|
58
|
+
* forwards over loopback so the IP check alone is a no-op behind the relay — we also reject any
|
|
59
|
+
* request carrying cloudflared's cf-connecting-ip/cf-ray (tunnel-origin) headers. */
|
|
60
|
+
function isLoopbackAgentReq(req: http.IncomingMessage): boolean {
|
|
61
|
+
const ip = req.socket.remoteAddress || '';
|
|
62
|
+
const isLoopback = ip === '127.0.0.1' || ip === '::1' || ip === '::ffff:127.0.0.1';
|
|
63
|
+
return isLoopback && !req.headers['cf-connecting-ip'] && !req.headers['cf-ray'];
|
|
64
|
+
}
|
|
65
|
+
|
|
47
66
|
// Proactive context recycling. The chat runs as one long-lived agent session per
|
|
48
67
|
// conversation (so the user can keep talking while the agent works). That session's
|
|
49
68
|
// context grows every turn and would eventually hit the wall. But continuity does NOT
|
|
@@ -390,6 +409,12 @@ export async function startSupervisor() {
|
|
|
390
409
|
const internalSecret = crypto.randomBytes(16).toString('hex');
|
|
391
410
|
const agentSecret = crypto.randomBytes(32).toString('hex');
|
|
392
411
|
|
|
412
|
+
// Expose the supervisor's own HTTP port to EVERY child subprocess via our own process.env —
|
|
413
|
+
// notably the agent harness (claude/codex/pi all spread ...process.env), whose Bash tool curls
|
|
414
|
+
// the /__bloby/control/* surface as http://127.0.0.1:$SUPERVISOR_PORT/... . Previously this was
|
|
415
|
+
// injected ONLY into the backend (setBackendEnv below), so the agent had no reliable port var.
|
|
416
|
+
process.env.SUPERVISOR_PORT = String(config.port);
|
|
417
|
+
|
|
393
418
|
// Inject agent secret + supervisor port into workspace backend env
|
|
394
419
|
setBackendEnv({
|
|
395
420
|
BLOBY_AGENT_SECRET: agentSecret,
|
|
@@ -620,6 +645,161 @@ export async function startSupervisor() {
|
|
|
620
645
|
return;
|
|
621
646
|
}
|
|
622
647
|
|
|
648
|
+
// ── Agent control surface (/__bloby/control/*) ──────────────────────────────────────────────
|
|
649
|
+
// The Bloby agent drives backend restarts, self-update, and log tails through these endpoints
|
|
650
|
+
// instead of the old lossy fs.watch trigger files (.restart/.update). Every call returns a
|
|
651
|
+
// SYNCHRONOUS JSON ack — that explicit acknowledgment is the reliability fix (no silent drops).
|
|
652
|
+
// The agent curls http://127.0.0.1:$SUPERVISOR_PORT/__bloby/control/... (SUPERVISOR_PORT is
|
|
653
|
+
// injected into its env). All routes are loopback-only (same cf-reject guard as the channel
|
|
654
|
+
// mutations) so they are NEVER reachable over the public tunnel — EXCEPT fe-log, which the
|
|
655
|
+
// user's browser posts to (write-only, capped). Served here, before auth and the Vite catch-all,
|
|
656
|
+
// so they answer even when the backend/Vite are down.
|
|
657
|
+
if (req.url?.startsWith('/__bloby/control/')) {
|
|
658
|
+
const ctlPath = req.url.split('?')[0];
|
|
659
|
+
const ctlQuery = new URLSearchParams(req.url.split('?')[1] || '');
|
|
660
|
+
|
|
661
|
+
// POST /__bloby/control/fe-log — browser → supervisor frontend-error ingest. NOT loopback-
|
|
662
|
+
// gated (the workspace-guard posts from the user's browser, possibly over the tunnel).
|
|
663
|
+
// Write-only, size-capped, no read-back, no side effects → worst case is capped log spam.
|
|
664
|
+
if (ctlPath === '/__bloby/control/fe-log' && req.method === 'POST') {
|
|
665
|
+
let feBody = '';
|
|
666
|
+
let feTooBig = false;
|
|
667
|
+
// Cap by DROPPING the payload once oversize (not req.destroy(), which fires neither 'end' nor
|
|
668
|
+
// 'error' → the response would never be sent). Keep reading to a clean 'end' and 204 always.
|
|
669
|
+
req.on('data', (chunk: Buffer) => {
|
|
670
|
+
if (feTooBig) return;
|
|
671
|
+
feBody += chunk.toString();
|
|
672
|
+
if (feBody.length > 16_384) { feTooBig = true; feBody = ''; }
|
|
673
|
+
});
|
|
674
|
+
req.on('end', () => {
|
|
675
|
+
if (!feTooBig) {
|
|
676
|
+
try {
|
|
677
|
+
const parsed = JSON.parse(feBody);
|
|
678
|
+
const entries = Array.isArray(parsed?.entries) ? parsed.entries.slice(-40) : [];
|
|
679
|
+
const allowed = ['error', 'unhandledrejection', 'console.error', 'console.warn', 'vite-overlay'];
|
|
680
|
+
for (const e of entries) {
|
|
681
|
+
if (e && typeof e.text === 'string') {
|
|
682
|
+
const kind = (allowed.includes(e.kind) ? e.kind : 'error') as FrontendLogKind;
|
|
683
|
+
appendFrontendLog(kind, e.text, typeof e.stack === 'string' ? e.stack : undefined);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
} catch {}
|
|
687
|
+
}
|
|
688
|
+
try { res.writeHead(204); res.end(); } catch {}
|
|
689
|
+
});
|
|
690
|
+
req.on('error', () => { try { res.writeHead(204); res.end(); } catch {} });
|
|
691
|
+
return;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
// Every other control route is loopback-only (agent-driven, can restart/update the instance).
|
|
695
|
+
if (!isLoopbackAgentReq(req)) {
|
|
696
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
697
|
+
res.end(JSON.stringify({ ok: false, error: 'This control endpoint is localhost-only.' }));
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
res.setHeader('Cache-Control', 'no-store');
|
|
701
|
+
|
|
702
|
+
// GET /__bloby/control/logs/backend?lines=N[&prev=1] — tail the current (or last-crashed) run.
|
|
703
|
+
if (ctlPath === '/__bloby/control/logs/backend' && req.method === 'GET') {
|
|
704
|
+
const lines = Math.max(1, Math.min(1000, parseInt(ctlQuery.get('lines') || '100', 10) || 100));
|
|
705
|
+
const prev = ctlQuery.get('prev') === '1';
|
|
706
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
707
|
+
res.end(JSON.stringify({ ok: true, lines, prev, justRestarted: backendJustSpawned(), log: readBackendLogTail(lines, prev) }));
|
|
708
|
+
return;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// GET /__bloby/control/logs/frontend?lines=N — runtime + console + Vite-compile frontend errors.
|
|
712
|
+
if (ctlPath === '/__bloby/control/logs/frontend' && req.method === 'GET') {
|
|
713
|
+
const lines = Math.max(1, Math.min(1000, parseInt(ctlQuery.get('lines') || '100', 10) || 100));
|
|
714
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
715
|
+
res.end(JSON.stringify({ ok: true, lines, entries: getFrontendLogCount(), log: tailFrontendLog(lines) }));
|
|
716
|
+
return;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
// GET /__bloby/control/update-status — is a queued update running / did it fail?
|
|
720
|
+
if (ctlPath === '/__bloby/control/update-status' && req.method === 'GET') {
|
|
721
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
722
|
+
res.end(JSON.stringify({ ok: true, ...getUpdateStatus() }));
|
|
723
|
+
return;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// POST /__bloby/control/update — queue a self-update (acknowledged, idempotent, deferred).
|
|
727
|
+
if (ctlPath === '/__bloby/control/update' && req.method === 'POST') {
|
|
728
|
+
const r = queueUpdate();
|
|
729
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
730
|
+
res.end(JSON.stringify({
|
|
731
|
+
ok: true,
|
|
732
|
+
queued: r.queued || r.alreadyQueued,
|
|
733
|
+
alreadyQueued: r.alreadyQueued,
|
|
734
|
+
retrying: r.retrying,
|
|
735
|
+
deferred: r.deferred,
|
|
736
|
+
message: r.alreadyQueued
|
|
737
|
+
? 'An update is already queued or running.'
|
|
738
|
+
: r.retrying
|
|
739
|
+
? 'A previous update attempt failed — re-queued; it retries after your turn ends. Check update-status (state:failed exposes the prior error in logTail).'
|
|
740
|
+
: 'Update queued — it runs after your turn ends. You will NOT die mid-turn; finish your turn normally. The page is unresponsive ~1–2 min while Bloby restarts on the new version. Check update-status after.',
|
|
741
|
+
}));
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// POST /__bloby/control/restart-backend { wait?:bool=true, timeoutMs?:num=15000, logLines?:num=60 }
|
|
746
|
+
// Restarts the backend through the existing serialized doRestart() funnel and (when wait) blocks
|
|
747
|
+
// until the backend's PORT is listening — so the agent can restart-and-verify WITHIN its turn.
|
|
748
|
+
if (ctlPath === '/__bloby/control/restart-backend' && req.method === 'POST') {
|
|
749
|
+
let rbBody = '';
|
|
750
|
+
let rbTooBig = false;
|
|
751
|
+
// Cap by dropping the payload (not req.destroy(), which would fire neither 'end' nor 'error'
|
|
752
|
+
// → no JSON ack ever sent, violating the endpoint contract). Always answer from 'end'.
|
|
753
|
+
req.on('data', (chunk: Buffer) => {
|
|
754
|
+
if (rbTooBig) return;
|
|
755
|
+
rbBody += chunk.toString();
|
|
756
|
+
if (rbBody.length > 4096) { rbTooBig = true; rbBody = ''; }
|
|
757
|
+
});
|
|
758
|
+
req.on('end', async () => {
|
|
759
|
+
if (rbTooBig) {
|
|
760
|
+
try { res.writeHead(413, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ ok: false, error: 'Request body too large.' })); } catch {}
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
let rbOpts: any = {};
|
|
764
|
+
try { rbOpts = rbBody ? JSON.parse(rbBody) : {}; } catch {}
|
|
765
|
+
const wait = rbOpts.wait !== false; // default true
|
|
766
|
+
const timeoutMs = Math.max(1000, Math.min(30_000, Number(rbOpts.timeoutMs) || 15_000));
|
|
767
|
+
const logLines = Math.max(0, Math.min(400, Number(rbOpts.logLines) || 60));
|
|
768
|
+
const wasDead = isBackendDead(); // had crash-looped & given up BEFORE this explicit restart
|
|
769
|
+
const started = Date.now();
|
|
770
|
+
try {
|
|
771
|
+
await doRestart(); // resetBackendRestarts + serialized stop→spawn; preserves all invariants
|
|
772
|
+
} catch (err: any) {
|
|
773
|
+
try { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ ok: false, restarted: false, error: String(err?.message || err) })); } catch {}
|
|
774
|
+
return;
|
|
775
|
+
}
|
|
776
|
+
const listening = wait ? await probeBackendReady(backendPort, timeoutMs) : isBackendAlive();
|
|
777
|
+
const gaveUp = isBackendDead();
|
|
778
|
+
try {
|
|
779
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
780
|
+
res.end(JSON.stringify({
|
|
781
|
+
ok: true,
|
|
782
|
+
restarted: true,
|
|
783
|
+
healthy: listening && !gaveUp,
|
|
784
|
+
listening,
|
|
785
|
+
gaveUp,
|
|
786
|
+
wasDead,
|
|
787
|
+
// If it gave up AGAIN, restarting won't help — tell the agent to fix the code, not re-restart.
|
|
788
|
+
hint: gaveUp ? 'Backend crash-looped and gave up again — restarting will not fix it. Read the logs and fix the code.' : undefined,
|
|
789
|
+
waitedMs: Date.now() - started,
|
|
790
|
+
logs: logLines ? readBackendLogTail(logLines) : undefined,
|
|
791
|
+
}));
|
|
792
|
+
} catch {}
|
|
793
|
+
});
|
|
794
|
+
req.on('error', () => { try { if (!res.headersSent) { res.writeHead(500); res.end(); } } catch {} });
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
799
|
+
res.end(JSON.stringify({ ok: false, error: 'Unknown control endpoint.' }));
|
|
800
|
+
return;
|
|
801
|
+
}
|
|
802
|
+
|
|
623
803
|
// App API routes → proxy to user's backend server
|
|
624
804
|
if (req.url?.startsWith('/app/api')) {
|
|
625
805
|
const backendPath = req.url.replace(/^\/app/, '');
|
|
@@ -2851,12 +3031,7 @@ ${alreadyLinked ? '' : `
|
|
|
2851
3031
|
log.info('[orchestrator] Restarting backend (file tools used / pending watcher change)');
|
|
2852
3032
|
void doRestart();
|
|
2853
3033
|
}
|
|
2854
|
-
|
|
2855
|
-
pendingUpdate = false;
|
|
2856
|
-
log.info('[orchestrator] Ending conversation before update...');
|
|
2857
|
-
endConversation(convId);
|
|
2858
|
-
runDeferredUpdate();
|
|
2859
|
-
}
|
|
3034
|
+
flushPendingUpdate(); // run a queued self-update now that this dashboard turn has ended
|
|
2860
3035
|
|
|
2861
3036
|
// Proactive session recycling (see CONTEXT_RECYCLE_TOKENS). Only when the
|
|
2862
3037
|
// harness reports the session idle (no queued message) — and this handler runs
|
|
@@ -2883,6 +3058,9 @@ ${alreadyLinked ? '' : `
|
|
|
2883
3058
|
currentStreamConvId = null;
|
|
2884
3059
|
currentStreamBuffer = '';
|
|
2885
3060
|
channelManager.clearRoutes(convId);
|
|
3061
|
+
// A turn that ended by exception/recycle (not a clean bot:turn-complete) must still flush a
|
|
3062
|
+
// queued self-update — otherwise it'd wait for the next turn/reboot. Self-defers + idempotent.
|
|
3063
|
+
flushPendingUpdate();
|
|
2886
3064
|
return;
|
|
2887
3065
|
}
|
|
2888
3066
|
|
|
@@ -3408,16 +3586,90 @@ ${alreadyLinked ? '' : `
|
|
|
3408
3586
|
// Track whether an agent is actively processing — file watcher defers restarts during active turns
|
|
3409
3587
|
let agentQueryActive = false;
|
|
3410
3588
|
let pendingBackendRestart = false; // Set when file watcher fires during agent turn
|
|
3411
|
-
let pendingUpdate = false; //
|
|
3589
|
+
let pendingUpdate = false; // An update is queued; runs at the next turn-complete (flushPendingUpdate)
|
|
3590
|
+
let updateInProgress = false; // The update child has actually been spawned — idempotency guard
|
|
3412
3591
|
|
|
3413
3592
|
// Note: with live conversations, agentQueryActive is true while the agent processes a message
|
|
3414
3593
|
// and false when it's idle (waiting for next message). The live conversation stays alive between messages.
|
|
3415
3594
|
|
|
3416
|
-
//
|
|
3417
|
-
//
|
|
3418
|
-
|
|
3419
|
-
|
|
3595
|
+
// ── Self-update marker (persists a queued update across a supervisor restart in the request→flush
|
|
3596
|
+
// window) ──────────────────────────────────────────────────────────────────────────────────
|
|
3597
|
+
function readUpdateMarker(): { queuedAt: number; attempts: number } | null {
|
|
3598
|
+
try {
|
|
3599
|
+
const m = JSON.parse(fs.readFileSync(UPDATE_MARKER, 'utf-8'));
|
|
3600
|
+
if (m && typeof m.queuedAt === 'number') return { queuedAt: m.queuedAt, attempts: Number(m.attempts) || 0 };
|
|
3601
|
+
} catch {}
|
|
3602
|
+
return null;
|
|
3603
|
+
}
|
|
3604
|
+
function writeUpdateMarker(m: { queuedAt: number; attempts: number }): void {
|
|
3605
|
+
try { fs.writeFileSync(UPDATE_MARKER, JSON.stringify(m)); } catch {}
|
|
3606
|
+
}
|
|
3607
|
+
function clearUpdateMarker(): void {
|
|
3608
|
+
try { fs.unlinkSync(UPDATE_MARKER); } catch {}
|
|
3609
|
+
}
|
|
3610
|
+
|
|
3611
|
+
/** Queue a self-update. Acknowledged + idempotent (the core fix vs the old fire-and-forget
|
|
3612
|
+
* `touch .update`). The update RUNS at the next turn-complete so the agent's current turn finishes
|
|
3613
|
+
* first (it does NOT die mid-turn). When truly idle it flushes on the next tick. */
|
|
3614
|
+
function queueUpdate(): { queued: boolean; alreadyQueued: boolean; deferred: boolean; retrying: boolean } {
|
|
3615
|
+
if (updateInProgress) return { queued: false, alreadyQueued: true, deferred: false, retrying: false };
|
|
3616
|
+
const marker = readUpdateMarker();
|
|
3617
|
+
const alreadyQueued = pendingUpdate; // genuinely already waiting to run
|
|
3618
|
+
const retrying = !pendingUpdate && !!marker && marker.attempts > 0; // a prior attempt failed; re-queue it
|
|
3619
|
+
pendingUpdate = true;
|
|
3620
|
+
if (!marker) writeUpdateMarker({ queuedAt: Date.now(), attempts: 0 });
|
|
3621
|
+
flushPendingUpdate(); // self-defers — runs now only if nothing is mid-turn
|
|
3622
|
+
return { queued: true, alreadyQueued, deferred: aTurnIsActive(), retrying };
|
|
3623
|
+
}
|
|
3624
|
+
|
|
3625
|
+
/** Run the queued update once NO turn is active on any surface. Deferred one tick so the
|
|
3626
|
+
* just-completed turn's in-flight flags (agentQueryActive / conv.busy / activeQueries) have
|
|
3627
|
+
* cleared first: that lets the completing turn's OWN queued update fire, while still never tearing
|
|
3628
|
+
* down a concurrent dashboard / channel / one-shot turn. If something else is still active it
|
|
3629
|
+
* stays pending and re-fires at the next turn-complete or boot-resume (idempotent, marker-backed). */
|
|
3630
|
+
function flushPendingUpdate(): void {
|
|
3631
|
+
if (!pendingUpdate || updateInProgress) return;
|
|
3632
|
+
setImmediate(() => {
|
|
3633
|
+
if (!pendingUpdate || updateInProgress || aTurnIsActive()) return;
|
|
3634
|
+
pendingUpdate = false;
|
|
3635
|
+
try { for (const cid of Array.from(clientConvs.values())) if (hasConversation(cid)) endConversation(cid); } catch {}
|
|
3636
|
+
runDeferredUpdate();
|
|
3637
|
+
});
|
|
3638
|
+
}
|
|
3639
|
+
|
|
3640
|
+
/** Status for GET /__bloby/control/update-status — lets the agent confirm a queued update actually
|
|
3641
|
+
* ran / read update.log on failure (a successful update ends in process.exit + daemon restart, so
|
|
3642
|
+
* the agent sees a connection drop then a new version on reconnect). */
|
|
3643
|
+
function getUpdateStatus(): { state: 'idle' | 'queued' | 'running' | 'failed'; attempts: number; logTail: string } {
|
|
3644
|
+
const marker = readUpdateMarker();
|
|
3645
|
+
let state: 'idle' | 'queued' | 'running' | 'failed';
|
|
3646
|
+
if (updateInProgress) state = 'running';
|
|
3647
|
+
else if (pendingUpdate) state = 'queued';
|
|
3648
|
+
else if (marker && marker.attempts > 0) state = 'failed'; // a prior attempt failed; retries at next turn/boot
|
|
3649
|
+
else if (marker) state = 'queued';
|
|
3650
|
+
else state = 'idle';
|
|
3651
|
+
let logTail = '';
|
|
3652
|
+
try { logTail = fs.readFileSync(path.join(DATA_DIR, 'update.log'), 'utf-8').split('\n').slice(-60).join('\n').trim(); } catch {}
|
|
3653
|
+
return { state, attempts: marker?.attempts ?? 0, logTail };
|
|
3654
|
+
}
|
|
3655
|
+
|
|
3656
|
+
// Run bloby update as a child process. BLOBY_SELF_UPDATE=1 tells bin/cli.js to skip daemon
|
|
3657
|
+
// stop/restart — the supervisor exits after the update finishes, and systemd (Restart=on-failure)
|
|
3658
|
+
// or launchd (KeepAlive.SuccessfulExit=false) restarts us with the new code. The marker's attempts
|
|
3659
|
+
// counter bounds retries (the TTL is enforced only on the boot-resume path so it can't strand a
|
|
3660
|
+
// legit update queued early in a >TTL-long turn).
|
|
3420
3661
|
function runDeferredUpdate() {
|
|
3662
|
+
if (updateInProgress) { log.info('Update already in progress — skipping duplicate trigger'); return; }
|
|
3663
|
+
const marker = readUpdateMarker() || { queuedAt: Date.now(), attempts: 0 };
|
|
3664
|
+
if (marker.attempts >= UPDATE_MAX_ATTEMPTS) {
|
|
3665
|
+
log.error(`Self-update failed ${marker.attempts}× — giving up. Run \`bloby update\` manually or check ${path.join(DATA_DIR, 'update.log')}`);
|
|
3666
|
+
clearUpdateMarker();
|
|
3667
|
+
try { broadcastBloby('backend:failed', { message: 'Self-update failed repeatedly. Ask your human to run `bloby update`.' }); } catch {}
|
|
3668
|
+
return;
|
|
3669
|
+
}
|
|
3670
|
+
updateInProgress = true;
|
|
3671
|
+
writeUpdateMarker({ queuedAt: marker.queuedAt, attempts: marker.attempts + 1 });
|
|
3672
|
+
|
|
3421
3673
|
const cliPath = path.join(PKG_DIR, 'bin', 'cli.js');
|
|
3422
3674
|
const updateLog = path.join(DATA_DIR, 'update.log');
|
|
3423
3675
|
log.info('Deferred update triggered — running bloby update...');
|
|
@@ -3430,17 +3682,38 @@ ${alreadyLinked ? '' : `
|
|
|
3430
3682
|
child.on('exit', (code) => {
|
|
3431
3683
|
try { fs.closeSync(logFd); } catch {}
|
|
3432
3684
|
if (code === 0) {
|
|
3685
|
+
clearUpdateMarker(); // success (updated or already-latest) — don't re-run on the next boot
|
|
3433
3686
|
log.ok('Update completed — restarting with new version...');
|
|
3434
|
-
process.exit(1); // non-zero triggers daemon manager to restart us
|
|
3687
|
+
process.exit(1); // non-zero triggers daemon manager to restart us onto the new code
|
|
3435
3688
|
} else {
|
|
3436
|
-
|
|
3689
|
+
// Leave the marker so the next boot retries (bounded by attempts); allow another flush now.
|
|
3690
|
+
updateInProgress = false;
|
|
3691
|
+
log.error(`Update process exited with code ${code} — see ${updateLog}. Will retry on next restart (attempt ${marker.attempts + 1}/${UPDATE_MAX_ATTEMPTS}).`);
|
|
3437
3692
|
}
|
|
3438
3693
|
});
|
|
3694
|
+
child.on('error', (err) => {
|
|
3695
|
+
try { fs.closeSync(logFd); } catch {}
|
|
3696
|
+
updateInProgress = false;
|
|
3697
|
+
log.error(`Update process failed to start: ${err.message}`);
|
|
3698
|
+
});
|
|
3439
3699
|
} catch (err) {
|
|
3700
|
+
updateInProgress = false;
|
|
3440
3701
|
log.error(`Deferred update failed: ${err instanceof Error ? err.message : err}`);
|
|
3441
3702
|
}
|
|
3442
3703
|
}
|
|
3443
3704
|
|
|
3705
|
+
/** On boot, resume an update that was queued but never ran (supervisor died in the request→flush
|
|
3706
|
+
* window). Safe to auto-run: bin/cli.js update version-checks and no-ops if already latest, and
|
|
3707
|
+
* the marker's TTL + attempts cap prevent a restart loop. */
|
|
3708
|
+
function resumePendingUpdateOnBoot(): void {
|
|
3709
|
+
const marker = readUpdateMarker();
|
|
3710
|
+
if (!marker) return;
|
|
3711
|
+
if (Date.now() - marker.queuedAt > UPDATE_MARKER_TTL_MS) { clearUpdateMarker(); return; }
|
|
3712
|
+
log.info('Found a pending update from before restart — resuming...');
|
|
3713
|
+
pendingUpdate = true;
|
|
3714
|
+
flushPendingUpdate(); // no active turn at boot
|
|
3715
|
+
}
|
|
3716
|
+
|
|
3444
3717
|
// Tell the live chat when the backend gives up — the dashboard interstitial covers page loads,
|
|
3445
3718
|
// but an already-open chat client gets an explicit event it can surface ("ask me to fix the backend").
|
|
3446
3719
|
setBackendGiveUpHandler(() => {
|
|
@@ -3456,6 +3729,7 @@ ${alreadyLinked ? '' : `
|
|
|
3456
3729
|
workerApi,
|
|
3457
3730
|
restartBackend: () => doRestart(),
|
|
3458
3731
|
getModel: () => loadConfig().ai.model,
|
|
3732
|
+
onTurnComplete: () => { if (pendingBackendRestart) void doRestart(); flushPendingUpdate(); }, // flush a deferred backend restart + queued self-update after a pulse/cron turn
|
|
3459
3733
|
});
|
|
3460
3734
|
|
|
3461
3735
|
// Initialize channel manager (WhatsApp, Telegram, etc.)
|
|
@@ -3464,6 +3738,7 @@ ${alreadyLinked ? '' : `
|
|
|
3464
3738
|
workerApi,
|
|
3465
3739
|
restartBackend: () => doRestart(),
|
|
3466
3740
|
getModel: () => loadConfig().ai.model,
|
|
3741
|
+
onTurnComplete: () => { if (pendingBackendRestart) void doRestart(); flushPendingUpdate(); }, // flush a deferred backend restart + queued self-update after a channel turn
|
|
3467
3742
|
});
|
|
3468
3743
|
|
|
3469
3744
|
// Broadcast channel status changes to all connected chat clients
|
|
@@ -3506,11 +3781,12 @@ ${alreadyLinked ? '' : `
|
|
|
3506
3781
|
return restartBackend(backendPort);
|
|
3507
3782
|
}
|
|
3508
3783
|
|
|
3509
|
-
/** True while any surface is mid-turn. Dashboard chat sets agentQueryActive; WhatsApp/Alexa
|
|
3510
|
-
* turns
|
|
3511
|
-
*
|
|
3512
|
-
*
|
|
3513
|
-
|
|
3784
|
+
/** True while any surface is mid-turn. Dashboard chat sets agentQueryActive; WhatsApp/Alexa live
|
|
3785
|
+
* turns set the harness conv.busy; pulse/cron + customer-WhatsApp ONE-SHOT turns set neither
|
|
3786
|
+
* (they live in the harness activeQueries map) — so we check all three. Otherwise an agent
|
|
3787
|
+
* editing the backend over any of these surfaces, or queuing a self-update from one, would get
|
|
3788
|
+
* the backend restarted / the supervisor exited out from under it mid-turn. */
|
|
3789
|
+
const aTurnIsActive = () => agentQueryActive || anyConversationBusy() || anyOneShotActive();
|
|
3514
3790
|
|
|
3515
3791
|
function scheduleBackendRestart(reason: string) {
|
|
3516
3792
|
if (aTurnIsActive()) {
|
|
@@ -3577,26 +3853,19 @@ ${alreadyLinked ? '' : `
|
|
|
3577
3853
|
scheduleBackendRestart(`workspace dependencies changed (${filename})`);
|
|
3578
3854
|
}
|
|
3579
3855
|
if (filename === '.restart') {
|
|
3580
|
-
//
|
|
3856
|
+
// DEPRECATED fallback — agents now use POST /__bloby/control/restart-backend (synchronous ack,
|
|
3857
|
+
// no lossy fs.watch). Kept so a human/external script touching .restart still works.
|
|
3581
3858
|
try { fs.unlinkSync(path.join(workspaceDir, '.restart')); } catch {}
|
|
3582
|
-
scheduleBackendRestart('.restart trigger');
|
|
3859
|
+
scheduleBackendRestart('.restart trigger (deprecated)');
|
|
3583
3860
|
}
|
|
3584
3861
|
if (filename === '.update') {
|
|
3585
|
-
//
|
|
3862
|
+
// DEPRECATED fallback — agents now use POST /__bloby/control/update (acknowledged + idempotent).
|
|
3863
|
+
// Route through queueUpdate(), which carries every fix the old inline path lacked: the
|
|
3864
|
+
// idempotency guard (the watcher's own unlink re-fires this event → double-spawn), the
|
|
3865
|
+
// aTurnIsActive() gate (was agentQueryActive-only → fired mid-turn on pulse/channel turns), the
|
|
3866
|
+
// persisted marker, and the all-surface turn-complete flush.
|
|
3586
3867
|
try { fs.unlinkSync(path.join(workspaceDir, '.update')); } catch {}
|
|
3587
|
-
|
|
3588
|
-
pendingUpdate = true;
|
|
3589
|
-
log.info('Update requested — deferring until agent turn ends');
|
|
3590
|
-
} else {
|
|
3591
|
-
// End any live conversations before updating
|
|
3592
|
-
for (const cid of Array.from(clientConvs.values())) {
|
|
3593
|
-
if (hasConversation(cid)) {
|
|
3594
|
-
log.info(`[update] Ending conversation ${cid} before update`);
|
|
3595
|
-
endConversation(cid);
|
|
3596
|
-
}
|
|
3597
|
-
}
|
|
3598
|
-
runDeferredUpdate();
|
|
3599
|
-
}
|
|
3868
|
+
queueUpdate();
|
|
3600
3869
|
}
|
|
3601
3870
|
}
|
|
3602
3871
|
|
|
@@ -3619,6 +3888,9 @@ ${alreadyLinked ? '' : `
|
|
|
3619
3888
|
armBackendWatcher();
|
|
3620
3889
|
armWorkspaceWatcher();
|
|
3621
3890
|
|
|
3891
|
+
// Resume a self-update that was queued but never ran (supervisor died in the request→flush window).
|
|
3892
|
+
resumePendingUpdateOnBoot();
|
|
3893
|
+
|
|
3622
3894
|
// WebSocket liveness heartbeat — ping the app + chat WS clients every 30s and terminate any
|
|
3623
3895
|
// that missed the previous pong (half-open sockets that never fired 'close'). Terminating fires
|
|
3624
3896
|
// 'close', which runs the existing map/subscription cleanup. Scoped to our two WSS only (Vite's
|
package/supervisor/scheduler.ts
CHANGED
|
@@ -34,6 +34,8 @@ interface SchedulerOpts {
|
|
|
34
34
|
workerApi: (path: string, method?: string, body?: any) => Promise<any>;
|
|
35
35
|
restartBackend: () => void;
|
|
36
36
|
getModel: () => string;
|
|
37
|
+
/** Fired after a pulse/cron turn ends — the supervisor uses it to flush a queued self-update. */
|
|
38
|
+
onTurnComplete?: () => void;
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
// State
|
|
@@ -120,7 +122,7 @@ function cronMatchesNow(schedule: string): boolean {
|
|
|
120
122
|
|
|
121
123
|
function triggerAgent(prompt: string, label: string, onComplete?: () => void) {
|
|
122
124
|
if (!schedulerOpts) return;
|
|
123
|
-
const { broadcastBloby, workerApi, restartBackend, getModel } = schedulerOpts;
|
|
125
|
+
const { broadcastBloby, workerApi, restartBackend, getModel, onTurnComplete } = schedulerOpts;
|
|
124
126
|
const timestamp = Date.now();
|
|
125
127
|
const convId = label.startsWith('pulse') ? `pulse-${timestamp}` : `cron-${label}-${timestamp}`;
|
|
126
128
|
const model = getModel();
|
|
@@ -230,6 +232,7 @@ function triggerAgent(prompt: string, label: string, onComplete?: () => void) {
|
|
|
230
232
|
log.info(`[scheduler] File tools used — restarting backend`);
|
|
231
233
|
restartBackend();
|
|
232
234
|
}
|
|
235
|
+
onTurnComplete?.(); // flush a queued self-update now this pulse/cron turn has ended
|
|
233
236
|
onComplete?.();
|
|
234
237
|
}
|
|
235
238
|
|
package/supervisor/vite-dev.ts
CHANGED
|
@@ -1,11 +1,37 @@
|
|
|
1
|
-
import { createServer as createViteServer, type ViteDevServer } from 'vite';
|
|
1
|
+
import { createServer as createViteServer, createLogger, type ViteDevServer } from 'vite';
|
|
2
2
|
import type http from 'http';
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { PKG_DIR } from '../shared/paths.js';
|
|
5
5
|
import { log } from '../shared/logger.js';
|
|
6
|
+
import { appendFrontendLog } from './frontend-log.js';
|
|
6
7
|
|
|
7
8
|
let dashboardVite: ViteDevServer | null = null;
|
|
8
9
|
|
|
10
|
+
const stripAnsi = (s: string) => String(s).replace(/\x1b\[[0-9;]*m/g, '');
|
|
11
|
+
|
|
12
|
+
/** A Vite logger that mirrors error/warn to stdout (preserving the human-facing boot/HMR logs) AND
|
|
13
|
+
* captures them into the server-side frontend ring, so GET /__bloby/control/logs/frontend surfaces
|
|
14
|
+
* COMPILE/transform errors even when the browser never ran a line of JS (hard compile failure). */
|
|
15
|
+
function makeCaptureLogger() {
|
|
16
|
+
const logger = createLogger('info');
|
|
17
|
+
const origError = logger.error.bind(logger);
|
|
18
|
+
const origWarn = logger.warn.bind(logger);
|
|
19
|
+
const origWarnOnce = logger.warnOnce.bind(logger);
|
|
20
|
+
logger.error = (msg, opts) => {
|
|
21
|
+
try { appendFrontendLog('vite-error', stripAnsi(msg), opts?.error?.stack ? stripAnsi(opts.error.stack) : undefined); } catch {}
|
|
22
|
+
origError(msg, opts);
|
|
23
|
+
};
|
|
24
|
+
logger.warn = (msg, opts) => {
|
|
25
|
+
try { appendFrontendLog('vite-warn', stripAnsi(msg)); } catch {}
|
|
26
|
+
origWarn(msg, opts);
|
|
27
|
+
};
|
|
28
|
+
logger.warnOnce = (msg, opts) => {
|
|
29
|
+
try { appendFrontendLog('vite-warn', stripAnsi(msg)); } catch {}
|
|
30
|
+
origWarnOnce(msg, opts);
|
|
31
|
+
};
|
|
32
|
+
return logger;
|
|
33
|
+
}
|
|
34
|
+
|
|
9
35
|
export async function startViteDevServers(supervisorPort: number, hmrServer: http.Server): Promise<{ dashboard: number }> {
|
|
10
36
|
const ports = {
|
|
11
37
|
dashboard: supervisorPort + 2,
|
|
@@ -25,7 +51,9 @@ export async function startViteDevServers(supervisorPort: number, hmrServer: htt
|
|
|
25
51
|
// so it works both locally (localhost:3000) and through the relay (riven.bloby.bot:443).
|
|
26
52
|
hmr: { server: hmrServer },
|
|
27
53
|
},
|
|
28
|
-
|
|
54
|
+
// customLogger captures compile/transform errors into the frontend ring (and still prints
|
|
55
|
+
// them); it supersedes logLevel, which Vite ignores when a customLogger is provided.
|
|
56
|
+
customLogger: makeCaptureLogger(),
|
|
29
57
|
});
|
|
30
58
|
await dashboardVite.listen();
|
|
31
59
|
} catch (err) {
|
|
@@ -321,6 +321,71 @@
|
|
|
321
321
|
hideStyle.textContent = 'vite-error-overlay{display:none!important}';
|
|
322
322
|
(document.head || document.documentElement).appendChild(hideStyle);
|
|
323
323
|
|
|
324
|
+
/* ── 2a. Frontend error capture → supervisor ──────────────────────────────
|
|
325
|
+
Captures EVERY break class — runtime window.onerror, unhandledrejection, console.error/warn,
|
|
326
|
+
and the Vite compile overlay — into a ring. The ring backs (1) the "Copy error" button, so it
|
|
327
|
+
is never empty even for runtime/black-screen breaks (no Vite overlay), and (2) a debounced POST
|
|
328
|
+
to /__bloby/control/fe-log, so the agent's `…/control/logs/frontend` tail is never empty either.
|
|
329
|
+
Before this, only Vite's compile overlay was readable and the listeners captured nothing. */
|
|
330
|
+
var feRing = []; // last ~120 entries, for the Copy button
|
|
331
|
+
var feUnsent = []; // not-yet-POSTed entries
|
|
332
|
+
var feFlushTimer = null;
|
|
333
|
+
function safeStringify(o) { try { return JSON.stringify(o); } catch (e) { return String(o); } }
|
|
334
|
+
function pushFe(kind, text, stack) {
|
|
335
|
+
if (text == null) return;
|
|
336
|
+
text = String(text).slice(0, 4000).trim();
|
|
337
|
+
if (!text) return;
|
|
338
|
+
if (text.indexOf(VITE_SUPPRESS_MARK) !== -1) return; // benign HMR-reconnect marker, not an app error
|
|
339
|
+
var last = feRing[feRing.length - 1];
|
|
340
|
+
if (last && last.kind === kind && last.text === text) return; // collapse repeats (1.5s overlay tick)
|
|
341
|
+
var entry = { kind: kind, text: text, stack: stack ? String(stack).slice(0, 4000) : undefined, t: Date.now() };
|
|
342
|
+
feRing.push(entry); if (feRing.length > 120) feRing.shift();
|
|
343
|
+
feUnsent.push(entry);
|
|
344
|
+
if (!feFlushTimer) feFlushTimer = setTimeout(flushFe, 1000);
|
|
345
|
+
}
|
|
346
|
+
function flushFe() {
|
|
347
|
+
feFlushTimer = null;
|
|
348
|
+
if (!feUnsent.length) return;
|
|
349
|
+
var batch = feUnsent.splice(0, feUnsent.length);
|
|
350
|
+
try {
|
|
351
|
+
fetch('/__bloby/control/fe-log', {
|
|
352
|
+
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
|
353
|
+
body: JSON.stringify({ entries: batch }), keepalive: true, cache: 'no-store',
|
|
354
|
+
}).catch(function () {});
|
|
355
|
+
} catch (e) {}
|
|
356
|
+
}
|
|
357
|
+
function feTail(n) {
|
|
358
|
+
return feRing.slice(-(n || 6)).map(function (e) {
|
|
359
|
+
return '[' + e.kind + '] ' + e.text + (e.stack ? '\n' + e.stack : '');
|
|
360
|
+
}).join('\n\n');
|
|
361
|
+
}
|
|
362
|
+
// Patch console.error/console.warn once — the ONLY source for the "frontend devtools" tail.
|
|
363
|
+
['error', 'warn'].forEach(function (level) {
|
|
364
|
+
var orig = console[level];
|
|
365
|
+
if (typeof orig !== 'function' || orig.__blobyPatched) return;
|
|
366
|
+
var patched = function () {
|
|
367
|
+
try {
|
|
368
|
+
var parts = [];
|
|
369
|
+
for (var i = 0; i < arguments.length; i++) {
|
|
370
|
+
var a = arguments[i];
|
|
371
|
+
parts.push(a instanceof Error ? (a.message + (a.stack ? '\n' + a.stack : ''))
|
|
372
|
+
: (a && typeof a === 'object' ? safeStringify(a) : String(a)));
|
|
373
|
+
}
|
|
374
|
+
pushFe('console.' + level, parts.join(' '));
|
|
375
|
+
} catch (e) {}
|
|
376
|
+
return orig.apply(console, arguments);
|
|
377
|
+
};
|
|
378
|
+
patched.__blobyPatched = true;
|
|
379
|
+
console[level] = patched;
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
// Flush buffered errors before the page goes away — a runtime error very often immediately precedes
|
|
383
|
+
// a reload, and this guard itself reloads aggressively (backend-down poll, vite-stale recovery).
|
|
384
|
+
// keepalive (set in flushFe) lets the POST complete during unload, so the agent's frontend-log tail
|
|
385
|
+
// doesn't miss the error that broke the page.
|
|
386
|
+
window.addEventListener('pagehide', flushFe);
|
|
387
|
+
document.addEventListener('visibilitychange', function () { if (document.visibilityState === 'hidden') flushFe(); });
|
|
388
|
+
|
|
324
389
|
var overlay = null;
|
|
325
390
|
var dismissed = false;
|
|
326
391
|
var lastErr = '';
|
|
@@ -359,7 +424,11 @@
|
|
|
359
424
|
|
|
360
425
|
var copyBtn = d.querySelector('#__bloby_fe_copy');
|
|
361
426
|
copyBtn.addEventListener('click', function () {
|
|
362
|
-
|
|
427
|
+
// Re-scrape Vite's overlay at click time (it may have populated since the overlay was built),
|
|
428
|
+
// then fall back to the captured ring — so runtime errors / black screens (no Vite overlay)
|
|
429
|
+
// still copy real text instead of "(no details captured)".
|
|
430
|
+
var detail = readViteError() || lastErr || feTail(6);
|
|
431
|
+
var text = 'A screen in my app has a frontend error. Find and fix the root cause. Error:\n\n' + (detail || '(no error text was captured — read the frontend logs to investigate)');
|
|
363
432
|
function ok() { copyBtn.textContent = '✓ Copied — paste it to your agent'; setTimeout(function () { copyBtn.textContent = 'Copy error for your agent'; }, 2600); }
|
|
364
433
|
function fb() { var ta = document.createElement('textarea'); ta.value = text; ta.style.position = 'fixed'; ta.style.opacity = '0'; document.body.appendChild(ta); ta.select(); try { document.execCommand('copy'); ok(); } catch (e) {} document.body.removeChild(ta); }
|
|
365
434
|
if (navigator.clipboard && navigator.clipboard.writeText) navigator.clipboard.writeText(text).then(ok).catch(fb); else fb();
|
|
@@ -392,7 +461,7 @@
|
|
|
392
461
|
function evaluate() {
|
|
393
462
|
if (appLooksBroken()) {
|
|
394
463
|
var err = readViteError();
|
|
395
|
-
if (err) lastErr = err;
|
|
464
|
+
if (err) { lastErr = err; pushFe('vite-overlay', err); }
|
|
396
465
|
if (!overlay && !dismissed) overlay = buildOverlay();
|
|
397
466
|
} else {
|
|
398
467
|
// App healthy (or recovered) — drop our overlay and re-arm for the next episode.
|
|
@@ -406,7 +475,19 @@
|
|
|
406
475
|
// recovery; window errors flip sawError so real load failures surface fast.
|
|
407
476
|
new MutationObserver(evaluate).observe(document.body, { childList: true });
|
|
408
477
|
setInterval(evaluate, 1500);
|
|
409
|
-
window.addEventListener('error', function () {
|
|
478
|
+
window.addEventListener('error', function (e) {
|
|
479
|
+
// Capture the actual error text + stack. Runtime errors render NO Vite overlay, so before this
|
|
480
|
+
// they were completely invisible to the copy button / logs. Skip resource-load errors (e.target
|
|
481
|
+
// is an element, with no e.error/e.message) — they carry no useful app-error text.
|
|
482
|
+
try {
|
|
483
|
+
if (e && (e.error || e.message)) {
|
|
484
|
+
var emsg = e.message || (e.error && e.error.message) || 'Error';
|
|
485
|
+
if (e.filename) emsg += ' @ ' + e.filename + ':' + (e.lineno || 0) + ':' + (e.colno || 0);
|
|
486
|
+
pushFe('error', emsg, e.error && e.error.stack);
|
|
487
|
+
}
|
|
488
|
+
} catch (err) {}
|
|
489
|
+
sawError = true; evaluate();
|
|
490
|
+
});
|
|
410
491
|
window.addEventListener('unhandledrejection', function (e) {
|
|
411
492
|
// The reconnect-reload suppressor (job 5) throws a marker error inside Vite's HMR
|
|
412
493
|
// dispatch; it surfaces here on every tunnel blip. Not an app error — swallow it.
|
|
@@ -414,6 +495,11 @@
|
|
|
414
495
|
var msg = e && e.reason && e.reason.message;
|
|
415
496
|
if (msg && String(msg).indexOf(VITE_SUPPRESS_MARK) !== -1) { e.preventDefault(); return; }
|
|
416
497
|
} catch (err) {}
|
|
498
|
+
try {
|
|
499
|
+
var reason = e && e.reason;
|
|
500
|
+
var rtext = reason && (reason.message || reason);
|
|
501
|
+
pushFe('unhandledrejection', rtext != null ? String(rtext) : 'Unhandled promise rejection', reason && reason.stack);
|
|
502
|
+
} catch (err2) {}
|
|
417
503
|
sawError = true; evaluate();
|
|
418
504
|
});
|
|
419
505
|
evaluate();
|
|
@@ -167,17 +167,16 @@ Notify your human only if importance is 7+ — otherwise log results silently.
|
|
|
167
167
|
|
|
168
168
|
**Check version:** `cat ~/.bloby/VERSION` (current) vs `npm view bloby-bot version` (latest).
|
|
169
169
|
|
|
170
|
-
**To update:**
|
|
171
|
-
NOT die. Finish your turn normally.
|
|
170
|
+
**To update:** Run `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`. It returns `{"ok":true,"queued":true}` — that ack confirms the update is queued and WILL run after your turn ends. You will NOT die mid-turn. Finish your turn normally. Verify afterwards with `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update-status`.
|
|
172
171
|
|
|
173
172
|
**On PULSE:** Occasionally check for updates (not every pulse — once every few hours). If a new version exists:
|
|
174
173
|
1. Read release notes: `npm view bloby-bot releaseNotes --json`
|
|
175
|
-
2. `
|
|
174
|
+
2. `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
176
175
|
3. Save to daily notes: "Updated from vX to vY" + notable changes to talk with your human later "Btw I updated myself this night"
|
|
177
176
|
|
|
178
177
|
**When user asks to update:**
|
|
179
178
|
1. Check version, if already latest say so
|
|
180
|
-
2. Otherwise read release notes, `
|
|
179
|
+
2. Otherwise read release notes, then `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
181
180
|
3. Tell your human what is new and that the page will be unresponsive for up to 2 minutes while this happens.
|
|
182
181
|
|
|
183
182
|
## Task Files — `tasks/`
|
|
@@ -584,7 +583,8 @@ When the dashboard shows a black screen or Vite logs an error, READ the error be
|
|
|
584
583
|
- **`Error: ENOENT: no such file or directory, open '.../node_modules/<pkg>/...'`** → The package is **physically missing on disk**. The fix is `npm install <pkg>` — full stop. Clearing `.vite/deps/`, touching files, adding dummy deps, or restarting will NOT recreate the missing file.
|
|
585
584
|
- **`Failed to resolve import "<pkg>"`** with no path in the error → Same diagnosis: package isn't installed. Run `npm install <pkg>`.
|
|
586
585
|
- **Pre-bundling / optimizer errors that don't reference a missing path, OR errors that persist after dependency changes** → Vite's dep cache is stale. Clear it: `rm -rf workspace/node_modules/.vite` and reload. (This is the ONLY case where clearing the cache helps.)
|
|
587
|
-
- **Backend crash loop** →
|
|
586
|
+
- **Backend crash loop** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free; add `&prev=1` for the last crashed run). Don't guess.
|
|
587
|
+
- **Black screen / frontend runtime error with no obvious Vite message** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/frontend?lines=100` — captured window errors, unhandled rejections, console.error/warn, and Vite overlay text (the same data behind the "Copy error for your agent" button).
|
|
588
588
|
|
|
589
589
|
If you've tried a fix and the same error recurs, do NOT try a variation of the same fix. Re-diagnose from the error message, or stop and ask your human to restart bloby. See "Stop looping" below.
|
|
590
590
|
|
|
@@ -595,12 +595,17 @@ The supervisor manages the backend process. You don't need to manage it yourself
|
|
|
595
595
|
**Auto-restart triggers (you don't need to do anything):**
|
|
596
596
|
- Editing `.ts`, `.js`, or `.json` files in `backend/` → auto-restart
|
|
597
597
|
- Editing `.env` → auto-restart with the new values
|
|
598
|
-
- Creating a `.restart` file → force restart: `touch .restart` (file is auto-deleted)
|
|
599
598
|
- After your turn ends, if you used Write or Edit tools → auto-restart
|
|
600
599
|
|
|
601
|
-
**During your turn:**
|
|
600
|
+
**During your turn (batched, atomic):** By default the backend does NOT auto-restart mid-turn — your edits are batched and applied together when the turn ends, so a multi-file change is never served half-written.
|
|
602
601
|
|
|
603
|
-
**
|
|
602
|
+
**Restart and verify WITHIN your turn (preferred when you need to test a fix):** After your backend edits are fully saved, run:
|
|
603
|
+
```
|
|
604
|
+
curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/restart-backend -d '{"wait":true}'
|
|
605
|
+
```
|
|
606
|
+
It restarts the backend and BLOCKS until the port is healthy, then returns `{"ok":true,"healthy":true,"listening":true,"gaveUp":false,"logs":"..."}`. Now curl your own backend (e.g. `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/app/api/...`) to confirm the fix — all in this turn. If `healthy:false` or `gaveUp:true`, read the returned `logs` and fix the code. Only restart AFTER your edits are saved.
|
|
607
|
+
|
|
608
|
+
**If the backend crashes:** It auto-restarts up to 3 times, then gives up. To see the error, run `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free — even right after a bounce; add `&prev=1` to read the last *crashed* run if the current tail looks empty). Then fix the code.
|
|
604
609
|
|
|
605
610
|
**NEVER do these:**
|
|
606
611
|
- Never `kill` processes or run `pkill`/`killall` — you don't manage the supervisor or its children
|
|
@@ -167,17 +167,16 @@ Notify your human only if importance is 7+ — otherwise log results silently.
|
|
|
167
167
|
|
|
168
168
|
**Check version:** `cat ~/.bloby/VERSION` (current) vs `npm view bloby-bot version` (latest).
|
|
169
169
|
|
|
170
|
-
**To update:**
|
|
171
|
-
NOT die. Finish your turn normally.
|
|
170
|
+
**To update:** Run `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`. It returns `{"ok":true,"queued":true}` — that ack confirms the update is queued and WILL run after your turn ends. You will NOT die mid-turn. Finish your turn normally. Verify afterwards with `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update-status`.
|
|
172
171
|
|
|
173
172
|
**On PULSE:** Occasionally check for updates (not every pulse — once every few hours). If a new version exists:
|
|
174
173
|
1. Read release notes: `npm view bloby-bot releaseNotes --json`
|
|
175
|
-
2. `
|
|
174
|
+
2. `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
176
175
|
3. Save to daily notes: "Updated from vX to vY" + notable changes to talk with your human later "Btw I updated myself this night"
|
|
177
176
|
|
|
178
177
|
**When user asks to update:**
|
|
179
178
|
1. Check version, if already latest say so
|
|
180
|
-
2. Otherwise read release notes, `
|
|
179
|
+
2. Otherwise read release notes, then `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
181
180
|
3. Tell your human what is new and that the page will be unresponsive for up to 2 minutes while this happens.
|
|
182
181
|
|
|
183
182
|
## Task Files — `tasks/`
|
|
@@ -584,7 +583,8 @@ When the dashboard shows a black screen or Vite logs an error, READ the error be
|
|
|
584
583
|
- **`Error: ENOENT: no such file or directory, open '.../node_modules/<pkg>/...'`** → The package is **physically missing on disk**. The fix is `npm install <pkg>` — full stop. Clearing `.vite/deps/`, touching files, adding dummy deps, or restarting will NOT recreate the missing file.
|
|
585
584
|
- **`Failed to resolve import "<pkg>"`** with no path in the error → Same diagnosis: package isn't installed. Run `npm install <pkg>`.
|
|
586
585
|
- **Pre-bundling / optimizer errors that don't reference a missing path, OR errors that persist after dependency changes** → Vite's dep cache is stale. Clear it: `rm -rf workspace/node_modules/.vite` and reload. (This is the ONLY case where clearing the cache helps.)
|
|
587
|
-
- **Backend crash loop** →
|
|
586
|
+
- **Backend crash loop** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free; add `&prev=1` for the last crashed run). Don't guess.
|
|
587
|
+
- **Black screen / frontend runtime error with no obvious Vite message** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/frontend?lines=100` — captured window errors, unhandled rejections, console.error/warn, and Vite overlay text (the same data behind the "Copy error for your agent" button).
|
|
588
588
|
|
|
589
589
|
If you've tried a fix and the same error recurs, do NOT try a variation of the same fix. Re-diagnose from the error message, or stop and ask your human to restart bloby. See "Stop looping" below.
|
|
590
590
|
|
|
@@ -595,12 +595,17 @@ The supervisor manages the backend process. You don't need to manage it yourself
|
|
|
595
595
|
**Auto-restart triggers (you don't need to do anything):**
|
|
596
596
|
- Editing `.ts`, `.js`, or `.json` files in `backend/` → auto-restart
|
|
597
597
|
- Editing `.env` → auto-restart with the new values
|
|
598
|
-
- Creating a `.restart` file → force restart: `touch .restart` (file is auto-deleted)
|
|
599
598
|
- After your turn ends, if you used Write or Edit tools → auto-restart
|
|
600
599
|
|
|
601
|
-
**During your turn:**
|
|
600
|
+
**During your turn (batched, atomic):** By default the backend does NOT auto-restart mid-turn — your edits are batched and applied together when the turn ends, so a multi-file change is never served half-written.
|
|
602
601
|
|
|
603
|
-
**
|
|
602
|
+
**Restart and verify WITHIN your turn (preferred when you need to test a fix):** After your backend edits are fully saved, run:
|
|
603
|
+
```
|
|
604
|
+
curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/restart-backend -d '{"wait":true}'
|
|
605
|
+
```
|
|
606
|
+
It restarts the backend and BLOCKS until the port is healthy, then returns `{"ok":true,"healthy":true,"listening":true,"gaveUp":false,"logs":"..."}`. Now curl your own backend (e.g. `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/app/api/...`) to confirm the fix — all in this turn. If `healthy:false` or `gaveUp:true`, read the returned `logs` and fix the code. Only restart AFTER your edits are saved.
|
|
607
|
+
|
|
608
|
+
**If the backend crashes:** It auto-restarts up to 3 times, then gives up. To see the error, run `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free — even right after a bounce; add `&prev=1` to read the last *crashed* run if the current tail looks empty). Then fix the code.
|
|
604
609
|
|
|
605
610
|
**NEVER do these:**
|
|
606
611
|
- Never `kill` processes or run `pkill`/`killall` — you don't manage the supervisor or its children
|
|
@@ -167,17 +167,16 @@ Notify your human only if importance is 7+ — otherwise log results silently.
|
|
|
167
167
|
|
|
168
168
|
**Check version:** `cat ~/.bloby/VERSION` (current) vs `npm view bloby-bot version` (latest).
|
|
169
169
|
|
|
170
|
-
**To update:**
|
|
171
|
-
NOT die. Finish your turn normally.
|
|
170
|
+
**To update:** Run `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`. It returns `{"ok":true,"queued":true}` — that ack confirms the update is queued and WILL run after your turn ends. You will NOT die mid-turn. Finish your turn normally. Verify afterwards with `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update-status`.
|
|
172
171
|
|
|
173
172
|
**On PULSE:** Occasionally check for updates (not every pulse — once every few hours). If a new version exists:
|
|
174
173
|
1. Read release notes: `npm view bloby-bot releaseNotes --json`
|
|
175
|
-
2. `
|
|
174
|
+
2. `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
176
175
|
3. Save to daily notes: "Updated from vX to vY" + notable changes to talk with your human later "Btw I updated myself this night"
|
|
177
176
|
|
|
178
177
|
**When user asks to update:**
|
|
179
178
|
1. Check version, if already latest say so
|
|
180
|
-
2. Otherwise read release notes, `
|
|
179
|
+
2. Otherwise read release notes, then `curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/update`
|
|
181
180
|
3. Tell your human what is new and that the page will be unresponsive for up to 2 minutes while this happens.
|
|
182
181
|
|
|
183
182
|
## Task Files — `tasks/`
|
|
@@ -584,7 +583,8 @@ When the dashboard shows a black screen or Vite logs an error, READ the error be
|
|
|
584
583
|
- **`Error: ENOENT: no such file or directory, open '.../node_modules/<pkg>/...'`** → The package is **physically missing on disk**. The fix is `npm install <pkg>` — full stop. Clearing `.vite/deps/`, touching files, adding dummy deps, or restarting will NOT recreate the missing file.
|
|
585
584
|
- **`Failed to resolve import "<pkg>"`** with no path in the error → Same diagnosis: package isn't installed. Run `npm install <pkg>`.
|
|
586
585
|
- **Pre-bundling / optimizer errors that don't reference a missing path, OR errors that persist after dependency changes** → Vite's dep cache is stale. Clear it: `rm -rf workspace/node_modules/.vite` and reload. (This is the ONLY case where clearing the cache helps.)
|
|
587
|
-
- **Backend crash loop** →
|
|
586
|
+
- **Backend crash loop** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free; add `&prev=1` for the last crashed run). Don't guess.
|
|
587
|
+
- **Black screen / frontend runtime error with no obvious Vite message** → `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/frontend?lines=100` — captured window errors, unhandled rejections, console.error/warn, and Vite overlay text (the same data behind the "Copy error for your agent" button).
|
|
588
588
|
|
|
589
589
|
If you've tried a fix and the same error recurs, do NOT try a variation of the same fix. Re-diagnose from the error message, or stop and ask your human to restart bloby. See "Stop looping" below.
|
|
590
590
|
|
|
@@ -595,12 +595,17 @@ The supervisor manages the backend process. You don't need to manage it yourself
|
|
|
595
595
|
**Auto-restart triggers (you don't need to do anything):**
|
|
596
596
|
- Editing `.ts`, `.js`, or `.json` files in `backend/` → auto-restart
|
|
597
597
|
- Editing `.env` → auto-restart with the new values
|
|
598
|
-
- Creating a `.restart` file → force restart: `touch .restart` (file is auto-deleted)
|
|
599
598
|
- After your turn ends, if you used Write or Edit tools → auto-restart
|
|
600
599
|
|
|
601
|
-
**During your turn:**
|
|
600
|
+
**During your turn (batched, atomic):** By default the backend does NOT auto-restart mid-turn — your edits are batched and applied together when the turn ends, so a multi-file change is never served half-written.
|
|
602
601
|
|
|
603
|
-
**
|
|
602
|
+
**Restart and verify WITHIN your turn (preferred when you need to test a fix):** After your backend edits are fully saved, run:
|
|
603
|
+
```
|
|
604
|
+
curl -s -X POST http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/restart-backend -d '{"wait":true}'
|
|
605
|
+
```
|
|
606
|
+
It restarts the backend and BLOCKS until the port is healthy, then returns `{"ok":true,"healthy":true,"listening":true,"gaveUp":false,"logs":"..."}`. Now curl your own backend (e.g. `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/app/api/...`) to confirm the fix — all in this turn. If `healthy:false` or `gaveUp:true`, read the returned `logs` and fix the code. Only restart AFTER your edits are saved.
|
|
607
|
+
|
|
608
|
+
**If the backend crashes:** It auto-restarts up to 3 times, then gives up. To see the error, run `curl -s http://127.0.0.1:${SUPERVISOR_PORT:-7400}/__bloby/control/logs/backend?lines=200` (race-free — even right after a bounce; add `&prev=1` to read the last *crashed* run if the current tail looks empty). Then fix the code.
|
|
604
609
|
|
|
605
610
|
**NEVER do these:**
|
|
606
611
|
- Never `kill` processes or run `pkill`/`killall` — you don't manage the supervisor or its children
|