@venturewild/workspace 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@venturewild/workspace",
3
- "version": "0.2.3",
3
+ "version": "0.3.0",
4
4
  "description": "Claude Code Web — Replit/Lovable-style chat-first browser UI that wraps the AI agent already installed on your machine.",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -42,10 +42,10 @@
42
42
  },
43
43
  "optionalDependencies": {
44
44
  "@homebridge/node-pty-prebuilt-multiarch": "0.13.1",
45
- "@venturewild/workspace-daemon-darwin-arm64": "0.1.2",
46
- "@venturewild/workspace-daemon-darwin-x64": "0.1.2",
47
- "@venturewild/workspace-daemon-linux-x64": "0.1.2",
48
- "@venturewild/workspace-daemon-win32-x64": "0.1.2"
45
+ "@venturewild/workspace-daemon-darwin-arm64": "0.1.3",
46
+ "@venturewild/workspace-daemon-darwin-x64": "0.1.3",
47
+ "@venturewild/workspace-daemon-linux-x64": "0.1.3",
48
+ "@venturewild/workspace-daemon-win32-x64": "0.1.3"
49
49
  },
50
50
  "devDependencies": {
51
51
  "@testing-library/jest-dom": "^6.9.1",
@@ -23,6 +23,9 @@ import { appendLine, listLogs, tailFile } from '../src/logpaths.mjs';
23
23
  import { runDoctor, renderDoctor, writeDoctorBundle } from '../src/doctor.mjs';
24
24
  import { enableOperator, disableOperator, operatorStatus } from '../src/operator.mjs';
25
25
  import { loadObservabilityConsent, setObservabilityConsent } from '../src/observability.mjs';
26
+ import {
27
+ grantConsent, revokeConsent, consentStatus, readAudit, MAX_TIER, MAX_GRANT_MINUTES, OPERATE_TIER,
28
+ } from '../src/support-consent.mjs';
26
29
  import {
27
30
  AutoUpdater, PACKAGE_NAME, npmInstall, recordUpdate,
28
31
  loadUpdateSettings, setUpdateEnabled, setUpdateChannel,
@@ -59,6 +62,10 @@ Usage:
59
62
  wild-workspace operator enable let the wild-workspace team help with your install (mints a token)
60
63
  wild-workspace operator disable revoke the support token
61
64
  wild-workspace operator status is the support channel on?
65
+ wild-workspace support allow --tier 1 --minutes 60 allow time-boxed support (works even when offline)
66
+ wild-workspace support revoke revoke support access now
67
+ wild-workspace support status is support access on, and for how long?
68
+ wild-workspace support audit show what support has done (the audit feed)
62
69
  wild-workspace observability [on|off|status] share session + install health so we can help (default on; never chat content)
63
70
  wild-workspace update [apply] check for / install a newer version (auto by default)
64
71
  wild-workspace update on|off toggle background auto-update
@@ -98,6 +105,8 @@ function parseArgs(argv) {
98
105
  else if (arg === '--yes' || arg === '-y') { opts.yes = true; }
99
106
  else if (arg === '--kind') { opts.kind = argv[++i]; }
100
107
  else if (arg === '--limit') { opts.limit = Number(argv[++i]); }
108
+ else if (arg === '--tier') { opts.tier = Number(argv[++i]); }
109
+ else if (arg === '--minutes') { opts.minutes = Number(argv[++i]); }
101
110
  else if (arg.startsWith('--')) {
102
111
  // ignore unknown flags
103
112
  } else {
@@ -667,6 +676,91 @@ async function runUpdateCommand(opts) {
667
676
  }
668
677
  }
669
678
 
679
+ // `wild-workspace support [allow|revoke|status]` — Phase 3 consent for the
680
+ // daemon-hosted, out-of-band support channel (reachable even when your workspace
681
+ // is offline). OFF by default + time-boxed; the bmo-sync daemon enforces it
682
+ // locally and revoke is instant (this writes/deletes ~/.wild-workspace/
683
+ // support-consent.json, which the daemon re-reads). Runs as its own process so it
684
+ // works when the server is down.
685
+ function fmtRemaining(ms) {
686
+ const m = Math.round(ms / 60000);
687
+ if (m < 60) return `${m} min`;
688
+ return `${Math.floor(m / 60)}h ${m % 60}m`;
689
+ }
690
+ async function runSupportCommand(action = 'status', opts = {}) {
691
+ const gdir = globalDir();
692
+
693
+ if (action === 'allow') {
694
+ const tier = opts.tier || 1;
695
+ const minutes = opts.minutes || 60;
696
+ if (tier < 1 || tier > MAX_TIER) {
697
+ console.error(
698
+ `tier must be 1..${MAX_TIER} (1 = read-only diagnostics/logs, 2 = curated fixes, ` +
699
+ `3 = let support operate your agent, 4 = raw shell).`,
700
+ );
701
+ process.exitCode = 1;
702
+ return;
703
+ }
704
+ // Tiers 3–4 let support OPERATE the machine (drive the agent / run commands).
705
+ // Require an explicit --yes so it's never a slip of the keyboard.
706
+ if (tier >= OPERATE_TIER && !opts.yes) {
707
+ console.error(
708
+ `tier ${tier} lets VentureWild support ${tier >= 4 ? 'run shell commands on' : 'operate the agent on'} ` +
709
+ `your machine (time-boxed, audited, revocable). Re-run with --yes to confirm:\n` +
710
+ ` wild-workspace support allow --tier ${tier} --minutes ${minutes} --yes`,
711
+ );
712
+ process.exitCode = 1;
713
+ return;
714
+ }
715
+ const rec = grantConsent(gdir, { tier, minutes });
716
+ if (!rec) {
717
+ console.error(`Could not write support consent to ${gdir}.`);
718
+ process.exitCode = 1;
719
+ return;
720
+ }
721
+ const tierDesc =
722
+ rec.tier >= 4
723
+ ? 'read diagnostics/logs + curated fixes + operate your agent + run raw shell commands'
724
+ : rec.tier >= 3
725
+ ? 'read diagnostics/logs + curated fixes + operate your agent on a task'
726
+ : rec.tier >= 2
727
+ ? 'read diagnostics/logs + run curated fixes (restart sync, relink, reinstall)'
728
+ : 'read diagnostics + logs only';
729
+ console.log(`✓ VentureWild support allowed at tier ${rec.tier} for ${fmtRemaining(rec.expiresAt - rec.grantedAt)} (max ${MAX_GRANT_MINUTES / 60}h).`);
730
+ console.log(` tier ${rec.tier} = ${tierDesc}.`);
731
+ console.log(' It auto-expires; revoke anytime with: wild-workspace support revoke');
732
+ return;
733
+ }
734
+ if (action === 'revoke') {
735
+ const removed = revokeConsent(gdir);
736
+ console.log(removed ? '✓ support access revoked.' : 'support access was not enabled.');
737
+ return;
738
+ }
739
+ if (action === 'audit') {
740
+ const entries = readAudit(gdir, { limit: opts.limit || 20 });
741
+ if (!entries.length) {
742
+ console.log('no support actions recorded yet.');
743
+ return;
744
+ }
745
+ console.log('recent support actions (newest first):');
746
+ for (const e of entries) {
747
+ const when = e.ts ? new Date(e.ts).toISOString() : '?';
748
+ console.log(` ${when} ${e.ok ? '✓' : '✗'} ${e.action} (tier ${e.tier ?? '?'})`);
749
+ }
750
+ return;
751
+ }
752
+ // status (default)
753
+ const s = consentStatus(gdir);
754
+ if (s.enabled) {
755
+ console.log(`support access: ON (tier ${s.tier}) — ${fmtRemaining(s.remainingMs)} remaining.`);
756
+ console.log(' revoke : wild-workspace support revoke');
757
+ } else {
758
+ console.log('support access: OFF (default).');
759
+ console.log(' allow : wild-workspace support allow --tier 1 --minutes 60');
760
+ }
761
+ console.log(` file : ${s.file}`);
762
+ }
763
+
670
764
  // `wild-workspace operator [enable|disable|status]` — the consented support
671
765
  // channel (docs/SECURITY.md). OFF by default; `enable` mints a token to hand to
672
766
  // the wild-workspace team so they can diagnose + run a fixed set of safe fixes.
@@ -805,6 +899,9 @@ async function main() {
805
899
  if (opts.positional[0] === 'update') {
806
900
  return runUpdateCommand(opts);
807
901
  }
902
+ if (opts.positional[0] === 'support') {
903
+ return runSupportCommand(opts.positional[1], opts);
904
+ }
808
905
  if (opts.positional[0] === 'operator') {
809
906
  return runOperatorCommand(opts.positional[1], opts);
810
907
  }
@@ -54,7 +54,13 @@ function readSecretsFile(file) {
54
54
  try {
55
55
  const parsed = JSON.parse(fs.readFileSync(file, 'utf8'));
56
56
  if (parsed && parsed.partnerToken && parsed.shareSecret) {
57
- return { partnerToken: parsed.partnerToken, shareSecret: parsed.shareSecret };
57
+ return {
58
+ partnerToken: parsed.partnerToken,
59
+ shareSecret: parsed.shareSecret,
60
+ // Phase 4: the loopback support-channel secret (may be absent on installs
61
+ // created before Phase 4 — loadOrCreateSecrets backfills it).
62
+ supportChannelSecret: parsed.supportChannelSecret,
63
+ };
58
64
  }
59
65
  } catch {
60
66
  // missing / unreadable / malformed
@@ -68,7 +74,22 @@ function loadOrCreateSecrets(dataDir, env = process.env) {
68
74
 
69
75
  // 1. The stable per-install location wins.
70
76
  const stable = readSecretsFile(stablePath);
71
- if (stable) return stable;
77
+ if (stable) {
78
+ // Backfill the Phase-4 support-channel secret on installs created earlier,
79
+ // WITHOUT rotating the login-signing tokens (which would log everyone out).
80
+ // Re-read + rewrite the raw file so any other fields are preserved.
81
+ if (!stable.supportChannelSecret) {
82
+ stable.supportChannelSecret = crypto.randomBytes(32).toString('base64url');
83
+ try {
84
+ const raw = JSON.parse(fs.readFileSync(stablePath, 'utf8'));
85
+ raw.supportChannelSecret = stable.supportChannelSecret;
86
+ fs.writeFileSync(stablePath, JSON.stringify(raw, null, 2), { mode: 0o600 });
87
+ } catch {
88
+ // can't persist (read-only fs?) — still usable for this run
89
+ }
90
+ }
91
+ return stable;
92
+ }
72
93
 
73
94
  // 2. Migrate a legacy in-workspace secrets file (preserve its tokens so
74
95
  // pre-existing login cookies keep validating); otherwise generate fresh.
@@ -77,6 +98,11 @@ function loadOrCreateSecrets(dataDir, env = process.env) {
77
98
  partnerToken: crypto.randomBytes(24).toString('base64url'),
78
99
  shareSecret: crypto.randomBytes(32).toString('base64url'),
79
100
  };
101
+ // The support-channel secret can be (re)generated freely — it's not used to
102
+ // sign anything durable, just to authenticate the local daemon → server hop.
103
+ if (!secrets.supportChannelSecret) {
104
+ secrets.supportChannelSecret = crypto.randomBytes(32).toString('base64url');
105
+ }
80
106
 
81
107
  // 3. Persist to the stable location.
82
108
  try {
@@ -305,6 +331,13 @@ export function buildConfig(overrides = {}) {
305
331
  overrides.shareSecret ||
306
332
  env.WILD_WORKSPACE_SHARE_SECRET ||
307
333
  secrets().shareSecret,
334
+ // Phase 4: authenticates the local bmo-sync daemon's loopback call to
335
+ // POST /api/support/agent-task (the daemon reads it from ~/.wild-workspace/
336
+ // secrets.json by the same convention). Never sent to the browser.
337
+ supportChannelSecret:
338
+ overrides.supportChannelSecret ||
339
+ env.WILD_WORKSPACE_SUPPORT_SECRET ||
340
+ secrets().supportChannelSecret,
308
341
  // The operator-channel token — null unless the user explicitly enabled the
309
342
  // channel (`wild-workspace operator enable`). Off by default. Server-side
310
343
  // only; never broadcast to the browser.
@@ -8,7 +8,7 @@ import { Hono } from 'hono';
8
8
  import { serveStatic } from '@hono/node-server/serve-static';
9
9
  import { serve } from '@hono/node-server';
10
10
  import { WebSocketServer } from 'ws';
11
- import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync } from 'node:fs';
11
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, unlinkSync } from 'node:fs';
12
12
  import path from 'node:path';
13
13
  import url from 'node:url';
14
14
  import {
@@ -36,6 +36,7 @@ import { ActivityBus } from './activity.mjs';
36
36
  import { loadIdentity, saveIdentity, markOnboarded, TONES } from './agent-identity.mjs';
37
37
  import { probeAgentReadiness } from './agent-readiness.mjs';
38
38
  import { AutoUpdater, npmInstall, recordUpdate, loadUpdateSettings, PACKAGE_NAME } from './auto-update.mjs';
39
+ import { consentStatus, revokeConsent, readAudit, OPERATE_TIER } from './support-consent.mjs';
39
40
  import { ClaudeLoginSession } from './agent-login.mjs';
40
41
  import { ErrorReporter } from './error-reporter.mjs';
41
42
  import { DaemonBridge } from './daemon.mjs';
@@ -1036,6 +1037,195 @@ export async function createServer(overrides = {}) {
1036
1037
  });
1037
1038
  });
1038
1039
 
1040
+ // Support consent + audit (Phase 3, Pillar E) — the user's view of the
1041
+ // out-of-band support channel: is support allowed (tier/expiry), was it acting
1042
+ // recently, and a feed of everything it did (the daemon writes the audit; this
1043
+ // is the read side the UI banner polls + the one-tap revoke). The daemon is the
1044
+ // enforcement gate; these are the owner-facing surfaces.
1045
+ const SUPPORT_ACTIVE_WINDOW_MS = 120_000; // a recent action = "support acting"
1046
+
1047
+ // Phase 4, Pillar E — the LIVE operate state for the agent-task channel: while
1048
+ // support is driving the agent, the owner must SEE it happening and be able to
1049
+ // stop it mid-flight (the screen-share promise). `agent-task` populates this; the
1050
+ // banner polls it via /api/support/status and can hit /operate/cancel.
1051
+ const OPERATE_LINE_CAP = 40;
1052
+ const OPERATE_LINGER_MS = 12_000; // keep the finished transcript briefly visible
1053
+ let supportOperate = null; // { taskId, startedAt, endedAt, done, lines, session }
1054
+ const operateLineFor = (chunk) => {
1055
+ if (!chunk || typeof chunk !== 'object') return null;
1056
+ if (chunk.type === 'text' && chunk.text) return chunk.text.trim().slice(0, 160);
1057
+ if (chunk.type === 'tool-use' && chunk.name) return `⚙ ${chunk.name}`;
1058
+ if (chunk.type === 'error' && chunk.message) return `⚠ ${String(chunk.message).slice(0, 160)}`;
1059
+ return null;
1060
+ };
1061
+
1062
+ app.get('/api/support/status', (c) => {
1063
+ const forbidden = require(c, 'chat');
1064
+ if (forbidden) return forbidden;
1065
+ const s = consentStatus(globalDir());
1066
+ const audit = readAudit(globalDir(), { limit: 1 });
1067
+ const last = audit[0] || null;
1068
+ const active = Boolean(last && Date.now() - (last.ts || 0) < SUPPORT_ACTIVE_WINDOW_MS);
1069
+ const operate = supportOperate
1070
+ ? {
1071
+ active: !supportOperate.done,
1072
+ taskId: supportOperate.taskId,
1073
+ startedAt: supportOperate.startedAt,
1074
+ lines: supportOperate.lines.slice(-12),
1075
+ }
1076
+ : null;
1077
+ return c.json({
1078
+ enabled: s.enabled,
1079
+ tier: s.tier,
1080
+ expiresAt: s.expiresAt,
1081
+ remainingMs: s.remainingMs,
1082
+ active,
1083
+ operate,
1084
+ lastAction: last ? { action: last.action, ts: last.ts, ok: last.ok } : null,
1085
+ });
1086
+ });
1087
+
1088
+ app.get('/api/support/audit', (c) => {
1089
+ const forbidden = require(c, 'chat');
1090
+ if (forbidden) return forbidden;
1091
+ const limit = Math.min(200, Math.max(1, Number(c.req.query('limit')) || 50));
1092
+ return c.json({ audit: readAudit(globalDir(), { limit }) });
1093
+ });
1094
+
1095
+ app.post('/api/support/revoke', (c) => {
1096
+ const forbidden = require(c, 'chatWrite');
1097
+ if (forbidden) return forbidden;
1098
+ const removed = revokeConsent(globalDir());
1099
+ appendLine('operator', `support consent revoked via UI removed=${removed}`);
1100
+ activityBus.publish({ type: 'support-revoked', at: Date.now() });
1101
+ return c.json({ revoked: removed });
1102
+ });
1103
+
1104
+ // Phase 4 (Pillar E): stop a running support agent-task mid-flight (the banner's
1105
+ // "Stop" button). Owner-gated like revoke. Kills the local AgentSession; the
1106
+ // daemon's streaming relay ends, and the operator's SSE closes.
1107
+ app.post('/api/support/operate/cancel', (c) => {
1108
+ const forbidden = require(c, 'chatWrite');
1109
+ if (forbidden) return forbidden;
1110
+ const had = Boolean(supportOperate && !supportOperate.done);
1111
+ try {
1112
+ supportOperate?.session?.cancel();
1113
+ } catch {
1114
+ /* best-effort */
1115
+ }
1116
+ if (had) appendLine('operator', `agent-task ${supportOperate?.taskId} stopped by owner`);
1117
+ return c.json({ cancelled: had });
1118
+ });
1119
+
1120
+ // Phase 4 (agent-mediated operation, server-up path). The bmo-sync daemon's
1121
+ // tier-3 `agent-task` action calls this over GENUINE LOOPBACK when :5173 is up,
1122
+ // so the support task runs with the user's real agent + workspace context
1123
+ // (the daemon-spawn fallback for when :5173 is down is PR 4.2). Triple-gated:
1124
+ // 1. loopback-only — a tunneled visitor 404s (never reachable publicly);
1125
+ // 2. the daemon-shared support-channel secret (config.supportChannelSecret);
1126
+ // 3. a LIVE tier-3 consent grant — defense in depth (the daemon already gates,
1127
+ // but we re-check so a direct hit can't operate un-consented).
1128
+ // Streams the agent's chunks back as NDJSON (one JSON object per line) for the
1129
+ // daemon to relay upward as `control_progress` frames.
1130
+ app.post('/api/support/agent-task', async (c) => {
1131
+ if (!isGenuineLoopback(c)) return c.json({ error: 'not_found' }, 404);
1132
+ const secret = c.req.header('x-support-secret');
1133
+ if (!config.supportChannelSecret || secret !== config.supportChannelSecret) {
1134
+ return c.json({ error: 'forbidden' }, 403);
1135
+ }
1136
+ const cs = consentStatus(globalDir());
1137
+ if (!cs.enabled || (cs.tier || 0) < OPERATE_TIER) {
1138
+ return c.json({ error: cs.enabled ? 'tier-insufficient' : 'no-consent' }, 403);
1139
+ }
1140
+ if (!activeAgent) return c.json({ error: 'no-agent' }, 503);
1141
+
1142
+ let body;
1143
+ try {
1144
+ body = await c.req.json();
1145
+ } catch {
1146
+ body = {};
1147
+ }
1148
+ const prompt = typeof body.prompt === 'string' ? body.prompt.trim() : '';
1149
+ if (!prompt) return c.json({ error: 'no-prompt' }, 400);
1150
+ const taskId = typeof body.taskId === 'string' ? body.taskId.slice(0, 64) : 'task';
1151
+ const mode = body.mode === 'plan' ? 'plan' : 'build';
1152
+
1153
+ appendLine('operator', `agent-task ${taskId} start mode=${mode} agent=${activeAgent.id}`);
1154
+ activityBus.publish({ type: 'support-agent-task', taskId, at: Date.now() });
1155
+
1156
+ const enc = new TextEncoder();
1157
+ const session = new AgentSession(activeAgent);
1158
+ // Pillar E: publish the LIVE operate state so the owner's banner shows
1159
+ // "support is operating your agent" + a transcript + a Stop button.
1160
+ supportOperate = { taskId, startedAt: Date.now(), endedAt: null, done: false, lines: [], session };
1161
+ const recordLine = (chunk) => {
1162
+ const line = operateLineFor(chunk);
1163
+ if (!line || !supportOperate) return;
1164
+ supportOperate.lines.push({ ts: Date.now(), text: line });
1165
+ if (supportOperate.lines.length > OPERATE_LINE_CAP) supportOperate.lines.shift();
1166
+ };
1167
+ const SUPPORT_SYS =
1168
+ "You are VentureWild support, operating this machine with the user's explicit, " +
1169
+ 'time-boxed consent. Work the given task end to end, narrate what you do in plain ' +
1170
+ 'language, and stop when it is resolved.';
1171
+
1172
+ const stream = new ReadableStream({
1173
+ start(controller) {
1174
+ let closed = false;
1175
+ const push = (obj) => {
1176
+ if (closed) return;
1177
+ try {
1178
+ controller.enqueue(enc.encode(JSON.stringify(obj) + '\n'));
1179
+ } catch {
1180
+ closed = true;
1181
+ }
1182
+ };
1183
+ session.on('chunk', (chunk) => {
1184
+ recordLine(chunk);
1185
+ push(chunk);
1186
+ });
1187
+ session.on('stderr', (text) => push({ type: 'stderr', text }));
1188
+ session.on('error', (err) => push({ type: 'error', message: String(err?.message || err) }));
1189
+ session.on('end', ({ code }) => {
1190
+ push({ type: 'end', code });
1191
+ appendLine('operator', `agent-task ${taskId} end code=${code}`);
1192
+ // Mark the operate session finished; keep the transcript briefly visible,
1193
+ // then clear it so the banner returns to the resting state.
1194
+ if (supportOperate && supportOperate.taskId === taskId) {
1195
+ supportOperate.done = true;
1196
+ supportOperate.endedAt = Date.now();
1197
+ const finished = supportOperate;
1198
+ setTimeout(() => {
1199
+ if (supportOperate === finished) supportOperate = null;
1200
+ }, OPERATE_LINGER_MS).unref?.();
1201
+ }
1202
+ if (!closed) {
1203
+ closed = true;
1204
+ try {
1205
+ controller.close();
1206
+ } catch {}
1207
+ }
1208
+ });
1209
+ session.send(prompt, {
1210
+ cwd: config.workspaceDir,
1211
+ mode,
1212
+ appendSystemPrompt: SUPPORT_SYS,
1213
+ });
1214
+ },
1215
+ cancel() {
1216
+ // Operator disconnected mid-task (the daemon dropped the relay) — kill it.
1217
+ try {
1218
+ session.cancel();
1219
+ } catch {}
1220
+ },
1221
+ });
1222
+
1223
+ return c.body(stream, 200, {
1224
+ 'Content-Type': 'application/x-ndjson',
1225
+ 'Cache-Control': 'no-cache',
1226
+ });
1227
+ });
1228
+
1039
1229
  // In-app "Sign in to Claude" — drives `claude auth login` in a real PTY so the
1040
1230
  // browser OAuth callback auto-completes and the user never touches a terminal.
1041
1231
  // (See agent-login.mjs.) Claude opens the OAuth URL in the user's browser itself
@@ -1809,6 +1999,22 @@ export async function createServer(overrides = {}) {
1809
1999
  httpServer.once('error', reject);
1810
2000
  });
1811
2001
 
2002
+ // Phase 3: record our pid so the bmo-sync daemon's out-of-band `restart-server`
2003
+ // support action can KILL us (the always-on supervisor then respawns) — even
2004
+ // when :5173 is unreachable from outside. {pid, startedAt, port} lets the killer
2005
+ // validate it's really us before signalling (see the daemon's restart-server).
2006
+ // Skipped under tests (many parallel servers would clobber the shared file).
2007
+ if (process.env.VITEST !== 'true' && config.nodeEnv !== 'test') {
2008
+ try {
2009
+ mkdirSync(globalDir(), { recursive: true });
2010
+ writeFileSync(
2011
+ path.join(globalDir(), 'server.pid'),
2012
+ JSON.stringify({ pid: process.pid, startedAt: Date.now(), port: config.port }),
2013
+ { mode: 0o600 },
2014
+ );
2015
+ } catch { /* best-effort — restart-server falls back to a health probe */ }
2016
+ }
2017
+
1812
2018
  // --- websocket bridge ---
1813
2019
  const wss = new WebSocketServer({ noServer: true });
1814
2020
  httpServer.on('upgrade', async (req, socket, head) => {
@@ -1971,6 +2177,15 @@ export async function createServer(overrides = {}) {
1971
2177
  try { inboxWatcher.stop(); } catch {}
1972
2178
  try { daemonBridge?.stop(); } catch {}
1973
2179
  try { tunnelWatchdog?.stop(); } catch {}
2180
+ // Drop our server.pid marker if it's still ours (best-effort; a crash
2181
+ // leaves a stale one, which restart-server validates pid-alive before using).
2182
+ if (process.env.VITEST !== 'true' && config.nodeEnv !== 'test') {
2183
+ try {
2184
+ const pf = path.join(globalDir(), 'server.pid');
2185
+ const rec = JSON.parse(readFileSync(pf, 'utf8'));
2186
+ if (rec?.pid === process.pid) unlinkSync(pf);
2187
+ } catch { /* gone or not ours */ }
2188
+ }
1974
2189
  // The daemon is deliberately NOT stopped here — it is detached so sync
1975
2190
  // keeps running after wild-workspace closes. `wild-workspace daemon
1976
2191
  // stop` is the explicit off-switch.
@@ -115,6 +115,16 @@ export class WorkspaceSupervisor {
115
115
  autoUpdate = env.WILD_WORKSPACE_NO_AUTOUPDATE !== '1',
116
116
  updatePollMs = 60 * 60 * 1000, // wake hourly; AutoUpdater gates real checks
117
117
  autoUpdaterFactory = null, // test seam: (supervisor) => AutoUpdater-like
118
+ // Phase 3 (Pillar A prerequisite): the always-on supervisor keeps the bmo-sync
119
+ // DAEMON alive too, independent of the workspace server. The daemon hosts the
120
+ // out-of-band support channel (reachable when :5173 is down), so it must not
121
+ // depend on the server being up. The server still ensureRunning()s the daemon
122
+ // at boot (idempotent); this is the keep-alive owner. On by default; kill switch
123
+ // WILD_WORKSPACE_NO_DAEMON_SUPERVISION=1. Only wired in start() (not the unit
124
+ // -test path, which calls daemonTick() directly with an injected factory).
125
+ superviseDaemon = env.WILD_WORKSPACE_NO_DAEMON_SUPERVISION !== '1',
126
+ daemonPollMs = 10000, // probe the daemon every 10s
127
+ daemonSupervisorFactory = null, // test seam: (supervisor) => DaemonSupervisor-like
118
128
  } = {}) {
119
129
  Object.assign(this, {
120
130
  serverEntry, workspaceDir, port, globalDir, node, pollMs,
@@ -122,12 +132,21 @@ export class WorkspaceSupervisor {
122
132
  crashLoopThreshold, diagnosticsImpl,
123
133
  autoRestartOnVersionDrift, versionImpl, installedVersionImpl,
124
134
  autoUpdate, updatePollMs, autoUpdaterFactory,
135
+ superviseDaemon, daemonPollMs, daemonSupervisorFactory,
125
136
  });
126
137
  this.autoUpdater = null;
127
138
  this.updateTimer = null;
139
+ this.daemonSupervisor = null;
140
+ this.daemonTimer = null;
141
+ this._daemonTicking = false;
128
142
  this.logFile = path.join(globalDir, 'supervisor.log');
129
143
  this.serverLogFile = path.join(globalDir, 'server.out.log');
130
144
  this.lockFile = path.join(globalDir, 'supervisor.lock');
145
+ // Phase 3.2: the bmo-sync daemon drops this file (a consented support
146
+ // `restart-server` action) for us to action — so a restart can be triggered
147
+ // out-of-band even when :5173 is wedged. We kill the child; the next tick
148
+ // respawns it from disk (new code loads). Safe: absent file = no-op.
149
+ this.restartRequestFile = path.join(globalDir, 'restart-request.json');
131
150
  this.child = null;
132
151
  this.backoff = backoffStartMs;
133
152
  this.lastSpawn = 0;
@@ -190,8 +209,30 @@ export class WorkspaceSupervisor {
190
209
  return this.child;
191
210
  }
192
211
 
212
+ /**
213
+ * Consume a pending support `restart-server` request (Phase 3.2). Returns true
214
+ * iff a request file was present (and removes it). Reading-then-deleting makes
215
+ * "present" mean "unhandled" — idempotent across ticks.
216
+ */
217
+ consumeRestartRequest() {
218
+ try {
219
+ fs.readFileSync(this.restartRequestFile); // throws if absent
220
+ } catch {
221
+ return false;
222
+ }
223
+ try { fs.unlinkSync(this.restartRequestFile); } catch { /* best-effort */ }
224
+ return true;
225
+ }
226
+
193
227
  /** One supervision step. Returns its decision (exposed for tests). */
194
228
  async tick() {
229
+ // Phase 3.2: a consented support restart request takes priority — kill the
230
+ // child so the next tick respawns it from disk (picks up any new code).
231
+ if (this.consumeRestartRequest()) {
232
+ this.log('restart-server requested (support channel) — restarting');
233
+ this.restartChild();
234
+ return 'restart-requested';
235
+ }
195
236
  if (await this.probeImpl(this.port, this.probeTimeoutMs)) {
196
237
  this.backoff = this.backoffStartMs; // healthy → reset backoff
197
238
  this.spawnCount = 0; // healthy → not a crash loop
@@ -322,6 +363,52 @@ export class WorkspaceSupervisor {
322
363
  .catch((e) => this.log(`auto-update error: ${e?.message || e}`));
323
364
  }
324
365
 
366
+ /**
367
+ * Build the DaemonSupervisor the always-on layer owns. Reads a FRESH config
368
+ * (not the stale module constant) so the account token / relay in effect when
369
+ * always-on starts are used. Lazy import keeps the unit-test path (which never
370
+ * calls start()) free of config + daemon-supervisor. Test seam: factory.
371
+ */
372
+ async buildDaemonSupervisor() {
373
+ if (this.daemonSupervisorFactory) return this.daemonSupervisorFactory(this);
374
+ const [{ buildConfig }, { DaemonSupervisor }] = await Promise.all([
375
+ import('./config.mjs'),
376
+ import('./daemon-supervisor.mjs'),
377
+ ]);
378
+ const config = buildConfig({ workspaceDir: this.workspaceDir, port: this.port });
379
+ return new DaemonSupervisor({
380
+ httpBase: config.daemonHttpUrl,
381
+ globalDir: this.globalDir,
382
+ accountToken: config.accountToken,
383
+ serverUrl: config.bmoSyncServerUrl,
384
+ });
385
+ }
386
+
387
+ /**
388
+ * One daemon-supervision step: if the daemon isn't answering /health, (re)start
389
+ * it. Deliberately INDEPENDENT of server health — the daemon (and its support
390
+ * channel) must stay up even when the server is crashed/mid-upgrade. Re-entrancy
391
+ * guarded so a slow spawn can't overlap the next tick. Never throws.
392
+ */
393
+ async daemonTick() {
394
+ if (!this.daemonSupervisor || this._daemonTicking) return 'skip';
395
+ this._daemonTicking = true;
396
+ try {
397
+ const h = await this.daemonSupervisor.health();
398
+ if (h && h.running) return 'healthy';
399
+ const r = await this.daemonSupervisor.ensureRunning();
400
+ if (r && r.started) { this.log(`daemon respawned (pid=${r.pid})`); return 'respawned'; }
401
+ if (r && r.alreadyRunning) return 'healthy';
402
+ this.log(`daemon down, respawn not started: ${r?.error || 'unknown'}`);
403
+ return 'failed';
404
+ } catch (e) {
405
+ this.log(`daemon-tick error: ${e?.message || e}`);
406
+ return 'error';
407
+ } finally {
408
+ this._daemonTicking = false;
409
+ }
410
+ }
411
+
325
412
  /** Acquire the lock and start the supervision loop. Idempotent across processes. */
326
413
  start() {
327
414
  if (!this.acquireLock()) return { started: false, reason: 'already-running' };
@@ -343,12 +430,24 @@ export class WorkspaceSupervisor {
343
430
  if (kick.unref) kick.unref();
344
431
  }).catch((e) => this.log(`auto-update init error: ${e?.message || e}`));
345
432
  }
433
+
434
+ // Phase 3: keep the bmo-sync daemon alive independent of the server, so the
435
+ // out-of-band support channel survives the server being down.
436
+ if (this.superviseDaemon && this.env.VITEST !== 'true' && this.env.NODE_ENV !== 'test') {
437
+ this.buildDaemonSupervisor().then((d) => {
438
+ this.daemonSupervisor = d;
439
+ this.daemonTimer = setInterval(() => { this.daemonTick().catch((e) => this.log(`daemon-tick error: ${e?.message || e}`)); }, this.daemonPollMs);
440
+ if (this.daemonTimer.unref) this.daemonTimer.unref();
441
+ this.daemonTick().catch(() => {}); // first probe now
442
+ }).catch((e) => this.log(`daemon supervision init error: ${e?.message || e}`));
443
+ }
346
444
  return { started: true };
347
445
  }
348
446
 
349
447
  stop() {
350
448
  if (this.timer) { clearInterval(this.timer); this.timer = null; }
351
449
  if (this.updateTimer) { clearInterval(this.updateTimer); this.updateTimer = null; }
450
+ if (this.daemonTimer) { clearInterval(this.daemonTimer); this.daemonTimer = null; }
352
451
  this.releaseLock();
353
452
  }
354
453
  }