@venturewild/workspace 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ import { resolveDaemonBinary } from './daemon-bin.mjs';
21
21
  import { checkPort } from './preview.mjs';
22
22
  import { loadAccount } from './account.mjs';
23
23
  import { serviceStatus } from './service.mjs';
24
- import { probeHealth } from './supervisor.mjs';
24
+ import { probeHealth, probeHealthVersion } from './supervisor.mjs';
25
25
  import { listLogs, diagnosticsDir } from './logpaths.mjs';
26
26
 
27
27
  const STATUS_ICON = { ok: '✅', warn: '⚠️', fail: '❌', info: 'ℹ️' };
@@ -36,6 +36,27 @@ function nodeMajor(version = process.version) {
36
36
  return m ? Number(m[1]) : 0;
37
37
  }
38
38
 
39
+ // RC3: probe the LIVE public tunnel end-to-end — out to Cloudflare, through the
40
+ // relay, down the daemon's tunnel, back to this server. This is the check the old
41
+ // `doctor` lacked: it only resolved the slug in the registry (claimed in the DB),
42
+ // which stays green even when `<slug>.venturewild.llc` is 502. A 200 here proves
43
+ // the whole chain works; a 5xx/timeout is the exact RC2 "linked but unreachable".
44
+ async function probeTunnel(slug, fetchImpl, timeoutMs = 8000) {
45
+ const url = `https://${encodeURIComponent(slug)}.venturewild.llc/api/health`;
46
+ const ctrl = new AbortController();
47
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs);
48
+ try {
49
+ const res = await fetchImpl(url, { signal: ctrl.signal, headers: { 'cache-control': 'no-cache' } });
50
+ let version = null;
51
+ try { version = (await res.json())?.version || null; } catch { /* non-JSON */ }
52
+ return { reachable: true, status: res.status, version, url };
53
+ } catch (e) {
54
+ return { reachable: false, error: String(e?.message || e), url };
55
+ } finally {
56
+ clearTimeout(timer);
57
+ }
58
+ }
59
+
39
60
  // Reach the bmo-sync registry: resolve the user's slug if linked, else /health.
40
61
  async function probeRegistry(config, fetchImpl) {
41
62
  const base = String(config.bmoSyncServerUrl || '').replace(/\/$/, '');
@@ -71,6 +92,7 @@ export async function runDoctor(opts = {}, deps = {}) {
71
92
  serviceStatus: deps.serviceStatus || serviceStatus,
72
93
  listLogs: deps.listLogs || listLogs,
73
94
  fetchImpl: deps.fetchImpl || ((...a) => globalThis.fetch(...a)),
95
+ probeRunningVersion: deps.probeRunningVersion || probeHealthVersion,
74
96
  };
75
97
  const checks = [];
76
98
  const add = (c) => checks.push(c);
@@ -155,6 +177,26 @@ export async function runDoctor(opts = {}, deps = {}) {
155
177
  : { status: 'ok', detail: 'free', hint: null };
156
178
  });
157
179
 
180
+ // 5b. Running server version vs installed (RC3). `doctor` runs as the
181
+ // freshly-invoked CLI, so APP_VERSION here == the version installed on disk.
182
+ // If a server is answering :port with a DIFFERENT version, it's running stale
183
+ // code from before the last upgrade — the "kept running 0.1.14 after 0.2.1"
184
+ // failure. Surface it so the fix (restart) is obvious instead of invisible.
185
+ await guarded('runningVersion', 'Running version', async () => {
186
+ const running = await d.probeRunningVersion(config.port);
187
+ if (!running) {
188
+ return { status: 'info', detail: `no server answering :${config.port} (not started yet)`, hint: null };
189
+ }
190
+ if (running === APP_VERSION) {
191
+ return { status: 'ok', detail: `v${running} (matches installed)`, hint: null };
192
+ }
193
+ return {
194
+ status: 'warn',
195
+ detail: `running v${running}, but v${APP_VERSION} is installed`,
196
+ hint: 'A workspace server is running older code than what is installed. Restart it (close the app — always-on restarts it clean) to finish the upgrade.',
197
+ };
198
+ });
199
+
158
200
  // 6. Account linked (slug)
159
201
  let account = null;
160
202
  await guarded('account', 'Workspace account linked', async () => {
@@ -181,6 +223,38 @@ export async function runDoctor(opts = {}, deps = {}) {
181
223
  : { status: 'warn', detail: `server returned HTTP ${r.status}`, hint: null };
182
224
  });
183
225
 
226
+ // 7b. Public URL reachable end-to-end (RC3). Only meaningful once linked. This
227
+ // is the half the old doctor was blind to — the registry check above can be
228
+ // green (slug claimed) while this is red (tunnel down). Together they tell the
229
+ // two apart: claimed-but-unreachable ⟹ the daemon link is broken (RC2), the
230
+ // operator/auto-relink path is the fix.
231
+ await guarded('tunnel', 'Public URL reachable', async () => {
232
+ const slug = account?.slug || config.account?.slug || null;
233
+ if (!slug) {
234
+ return { status: 'info', detail: 'not linked — no public URL yet', hint: null };
235
+ }
236
+ const r = await probeTunnel(slug, d.fetchImpl);
237
+ if (!r.reachable) {
238
+ return {
239
+ status: 'fail',
240
+ detail: `${r.url} unreachable: ${r.error}`,
241
+ hint: 'The public link is down. Restart sync (`wild-workspace daemon stop` then `wild-workspace`), or the operator `relink-account` fix.',
242
+ };
243
+ }
244
+ if (r.status >= 500) {
245
+ return {
246
+ status: 'fail',
247
+ detail: `${r.url} returned HTTP ${r.status} (tunnel down — slug claimed but not linked)`,
248
+ hint: 'The daemon is not linked to the relay. Restart sync (`wild-workspace daemon stop` then `wild-workspace`).',
249
+ };
250
+ }
251
+ if (r.status >= 400) {
252
+ // 401/403/404 = the chain works; auth/slug is the nuance, not a tunnel fault.
253
+ return { status: 'warn', detail: `reachable but HTTP ${r.status} (auth/slug check)`, hint: null };
254
+ }
255
+ return { status: 'ok', detail: `live (HTTP ${r.status}${r.version ? `, v${r.version}` : ''})`, hint: null };
256
+ });
257
+
184
258
  // 8. Always-on / autostart
185
259
  await guarded('service', 'Always-on (autostart)', async () => {
186
260
  const s = await d.serviceStatus({ port: config.port }, { probeImpl: (p) => probeHealth(p) });
@@ -35,10 +35,12 @@ import { InboxWatcher } from './inbox.mjs';
35
35
  import { ActivityBus } from './activity.mjs';
36
36
  import { loadIdentity, saveIdentity, markOnboarded, TONES } from './agent-identity.mjs';
37
37
  import { probeAgentReadiness } from './agent-readiness.mjs';
38
+ import { AutoUpdater, npmInstall, recordUpdate, loadUpdateSettings, PACKAGE_NAME } from './auto-update.mjs';
38
39
  import { ClaudeLoginSession } from './agent-login.mjs';
39
40
  import { ErrorReporter } from './error-reporter.mjs';
40
41
  import { DaemonBridge } from './daemon.mjs';
41
42
  import { DaemonSupervisor } from './daemon-supervisor.mjs';
43
+ import { TunnelWatchdog } from './tunnel-watchdog.mjs';
42
44
  import { SyncControl } from './sync.mjs';
43
45
  import { detectPreviewPorts, checkPort } from './preview.mjs';
44
46
  import { createBazaar } from './bazaar/core.mjs';
@@ -47,6 +49,7 @@ import { matchCandidates } from './bazaar/mock-tickup.mjs';
47
49
  import { servePreviewFile, confineBuildDir } from './bazaar/preview-server.mjs';
48
50
  import { TURN_SYSTEM_PROMPT, writeTurnMcpConfig } from './turn-mcp.mjs';
49
51
  import { loadAccount } from './account.mjs';
52
+ import { getOperatorToken } from './operator.mjs';
50
53
  import { runDoctor } from './doctor.mjs';
51
54
  import { appendLine, tailFile, logFile, TAILABLE, globalDir } from './logpaths.mjs';
52
55
  import { SessionReporter } from './session-reporter.mjs';
@@ -172,6 +175,29 @@ export async function createServer(overrides = {}) {
172
175
  .catch((e) => ({ started: false, error: String(e?.message || e) }))
173
176
  : Promise.resolve({ started: false, skipped: true });
174
177
 
178
+ // RC2 tunnel self-heal: when this install is slug-linked (so it's SUPPOSED to be
179
+ // reachable at <slug>.venturewild.llc), watch the public URL end-to-end and
180
+ // relink the daemon if it goes dead while we're locally healthy. Off without a
181
+ // daemon supervisor, without a slug, or under tests. `overrides.tunnelWatchdog`
182
+ // is a test seam (false disables; an object injects options).
183
+ const relinkDaemon = async () => {
184
+ if (!daemonSupervisor) return;
185
+ await daemonSupervisor.stop().catch(() => {});
186
+ await daemonSupervisor.ensureRunning().catch(() => {});
187
+ };
188
+ const tunnelWatchdog =
189
+ overrides.tunnelWatchdog === false ||
190
+ !daemonSupervisor ||
191
+ !config.account?.slug ||
192
+ !config.daemonAutostart
193
+ ? null
194
+ : new TunnelWatchdog({
195
+ publicBaseUrl: `https://${config.account.slug}.venturewild.llc`,
196
+ relink: relinkDaemon,
197
+ log: (m) => log('[tunnel]', m),
198
+ ...(typeof overrides.tunnelWatchdog === 'object' ? overrides.tunnelWatchdog : {}),
199
+ }).start();
200
+
175
201
  // Control plane for bmo-sync folder sharing (pair / detach / invite).
176
202
  // `overrides.syncControl` is a test seam.
177
203
  const syncControl =
@@ -488,6 +514,14 @@ export async function createServer(overrides = {}) {
488
514
  const app = new Hono();
489
515
 
490
516
  // --- auth helpers ---------------------------------------------------------
517
+ // RC1 hot-reload: resolve the operator token LIVE per request. An explicit
518
+ // override/env token (tests, pinned deployments) stays authoritative; otherwise
519
+ // the token file is re-read (TTL-cached) so `operator enable`/`disable` take
520
+ // effect with no server restart — the literal 401 from the first external
521
+ // install. `overrides.operatorDataDir` is unused; the file lives in dataDir.
522
+ const liveOperatorToken = () =>
523
+ config.operatorTokenExplicit ?? getOperatorToken(config.dataDir);
524
+
491
525
  // Classify one raw token into a role. Shared by the Authorization header, the
492
526
  // HttpOnly auth cookie, and the `?t=` query so all three stay consistent.
493
527
  // `allowOperator` is true ONLY for the header path — the operator (support)
@@ -498,7 +532,8 @@ export async function createServer(overrides = {}) {
498
532
  if (token === config.partnerToken) {
499
533
  return { role: ROLES.PARTNER, sub: 'partner', source };
500
534
  }
501
- if (allowOperator && config.operatorToken && token === config.operatorToken) {
535
+ const opToken = allowOperator ? liveOperatorToken() : null;
536
+ if (opToken && token === opToken) {
502
537
  return { role: ROLES.OPERATOR, sub: 'operator', source: source || 'operator-token' };
503
538
  }
504
539
  const payload = await verifyShareToken(token, config.shareSecret);
@@ -985,6 +1020,22 @@ export async function createServer(overrides = {}) {
985
1020
  return c.json({ agent: agentTag(activeAgent), ...verdict });
986
1021
  });
987
1022
 
1023
+ // Auto-update status (Phase 2) — what's running, the channel, on/off, and the
1024
+ // last update outcome (the "updated to vX" note the UI can surface). Read-only;
1025
+ // the toggle/apply levers are the CLI + the operator channel.
1026
+ app.get('/api/update/status', (c) => {
1027
+ const forbidden = require(c, 'chat');
1028
+ if (forbidden) return forbidden;
1029
+ const s = loadUpdateSettings(globalDir());
1030
+ return c.json({
1031
+ current: APP_VERSION,
1032
+ enabled: s.enabled,
1033
+ channel: s.channel,
1034
+ lastCheckAt: s.lastCheckAt || null,
1035
+ lastUpdate: s.lastUpdate || null,
1036
+ });
1037
+ });
1038
+
988
1039
  // In-app "Sign in to Claude" — drives `claude auth login` in a real PTY so the
989
1040
  // browser OAuth callback auto-completes and the user never touches a terminal.
990
1041
  // (See agent-login.mjs.) Claude opens the OAuth URL in the user's browser itself
@@ -1220,7 +1271,9 @@ export async function createServer(overrides = {}) {
1220
1271
  spawn,
1221
1272
  ...(overrides.operatorDeps || {}),
1222
1273
  };
1223
- const operatorEnabled = () => Boolean(config.operatorToken);
1274
+ // Live so `operator enable` (run in a separate CLI process) lights the channel
1275
+ // up without a server restart, and `operator disable` takes it dark (RC1).
1276
+ const operatorEnabled = () => Boolean(liveOperatorToken());
1224
1277
  function auditOperator(c, action, detail) {
1225
1278
  const s = c.get('session') || {};
1226
1279
  appendLine('operator', `${action} by=${s.sub || 'operator'} src=${s.source || '-'} ${detail || ''}`.trim());
@@ -1267,6 +1320,24 @@ export async function createServer(overrides = {}) {
1267
1320
  child?.on?.('exit', (code) => appendLine('operator', `reinstall-daemon exited code=${code}`));
1268
1321
  return { started: true, pid: child?.pid || null, command: `${cmd} i -g @venturewild/workspace` };
1269
1322
  },
1323
+ // Phase 2: check the user's channel and install a newer version if one exists.
1324
+ // The always-on supervisor's version-drift auto-restart (RC1b) then loads it;
1325
+ // the supervisor also owns autonomous health-gated rollback. This is the
1326
+ // remote-support trigger for the same flow (Phase 3 capability).
1327
+ 'update-now': async () => {
1328
+ const gdir = globalDir();
1329
+ const check = await (operatorDeps.checkUpdate
1330
+ ? operatorDeps.checkUpdate()
1331
+ : new AutoUpdater({ globalDir: gdir }).check());
1332
+ if (!check.latest) return { ok: false, reason: 'registry-unreachable', current: check.current };
1333
+ if (!check.available) return { ok: true, updated: false, current: check.current, latest: check.latest };
1334
+ appendLine('operator', `update-now installing ${check.current} → ${check.latest} (${check.channel})`);
1335
+ const res = await (operatorDeps.npmInstall || npmInstall)(`${PACKAGE_NAME}@${check.latest}`);
1336
+ const ok = res.code === 0;
1337
+ if (ok) recordUpdate(gdir, { from: check.current, to: check.latest, at: Date.now(), status: 'installed' });
1338
+ appendLine('operator', `update-now ${ok ? 'installed' : `failed code=${res.code}`}`);
1339
+ return { ok, updated: ok, from: check.current, to: check.latest, code: res.code };
1340
+ },
1270
1341
  };
1271
1342
 
1272
1343
  app.get('/api/operator/diag', async (c) => {
@@ -1887,6 +1958,7 @@ export async function createServer(overrides = {}) {
1887
1958
  daemonBridge,
1888
1959
  daemonSupervisor,
1889
1960
  daemonReady,
1961
+ tunnelWatchdog,
1890
1962
  syncControl,
1891
1963
  sessionReporter,
1892
1964
  detectedAgents,
@@ -1898,6 +1970,7 @@ export async function createServer(overrides = {}) {
1898
1970
  try { transcriptRecorder.stop(); } catch {}
1899
1971
  try { inboxWatcher.stop(); } catch {}
1900
1972
  try { daemonBridge?.stop(); } catch {}
1973
+ try { tunnelWatchdog?.stop(); } catch {}
1901
1974
  // The daemon is deliberately NOT stopped here — it is detached so sync
1902
1975
  // keeps running after wild-workspace closes. `wild-workspace daemon
1903
1976
  // stop` is the explicit off-switch.
@@ -29,6 +29,33 @@ export function loadOperatorToken(dataDir) {
29
29
  }
30
30
  }
31
31
 
32
+ // RC1 hot-reload: read the operator token LIVE (with a tiny TTL cache) instead of
33
+ // the value the server snapshotted at boot. Today `operator enable` writes the
34
+ // token to disk but a long-running server keeps serving its cached "disabled"
35
+ // state, so the channel 401s until a manual restart (the exact bug from the first
36
+ // external install). A short TTL keeps this off the hot auth path — every request
37
+ // reads from cache, and `enable`/`disable` take effect within `ttlMs`.
38
+ //
39
+ // The cache is keyed by dataDir so two servers (tests, multiple installs) in one
40
+ // process don't read each other's tokens. `now` is injectable for tests.
41
+ const _tokenCache = new Map(); // dataDir -> { token, at }
42
+ export function getOperatorToken(dataDir, { ttlMs = 2000, now = Date.now } = {}) {
43
+ const t = now();
44
+ const hit = _tokenCache.get(dataDir);
45
+ if (hit && t - hit.at < ttlMs) return hit.token;
46
+ const token = loadOperatorToken(dataDir);
47
+ _tokenCache.set(dataDir, { token, at: t });
48
+ return token;
49
+ }
50
+
51
+ // Drop the cached token for a dataDir (or all of them). `enable`/`disable` run in
52
+ // a separate CLI process from the server, so they don't need this — it exists so
53
+ // in-process callers (and tests) can force a re-read without waiting out the TTL.
54
+ export function invalidateOperatorTokenCache(dataDir) {
55
+ if (dataDir === undefined) _tokenCache.clear();
56
+ else _tokenCache.delete(dataDir);
57
+ }
58
+
32
59
  // Turn the channel on. Idempotent by default — returns the existing token if one
33
60
  // is already set (so a re-run doesn't invalidate the code the user already
34
61
  // shared). Pass { rotate:true } to force a fresh token. Returns the token, or
@@ -0,0 +1,84 @@
1
+ // First-run browser orchestration for the LOCAL owner (B1). Extracted from the
2
+ // CLI so it's importable + unit-testable without the bin's shebang. The CLI
3
+ // (bin/wild-workspace.mjs) imports openOwnerBrowser; tests import the helpers.
4
+
5
+ // The URL to open for the LOCAL owner. A slug-linked install runs in public
6
+ // mode (the server denies anon — C1), so the owner must authenticate: append
7
+ // the partner token, which the SPA immediately exchanges for an HttpOnly cookie
8
+ // and strips from the address bar (S1). A localhost-only install needs no token.
9
+ // The token is only ever placed in the URL we hand the browser — never printed.
10
+ export function localBrowserUrl(config) {
11
+ const host = config.host === '0.0.0.0' ? '127.0.0.1' : config.host;
12
+ const base = `http://${host}:${config.port}`;
13
+ return config.publicMode ? `${base}/?t=${encodeURIComponent(config.partnerToken)}` : base;
14
+ }
15
+
16
+ // Ask the running local server (over genuine loopback) for a one-time sign-in
17
+ // link to the PUBLIC url. Returns the URL or null (no slug / older server).
18
+ export async function fetchPublicBootstrapUrl(config) {
19
+ const host = config.host === '0.0.0.0' ? '127.0.0.1' : config.host;
20
+ try {
21
+ const ac = new AbortController();
22
+ const t = setTimeout(() => ac.abort(), 4000);
23
+ const r = await fetch(`http://${host}:${config.port}/api/auth/bootstrap`, {
24
+ method: 'POST',
25
+ signal: ac.signal,
26
+ });
27
+ clearTimeout(t);
28
+ if (!r.ok) return null;
29
+ const body = await r.json().catch(() => ({}));
30
+ return typeof body.url === 'string' ? body.url : null;
31
+ } catch {
32
+ return null;
33
+ }
34
+ }
35
+
36
+ // Poll the public url's /api/health until the tunnel forwards (200) or we give
37
+ // up — so we never open the owner onto a 502 "warming up" page.
38
+ export async function publicTunnelReady(shareBaseUrl, { tries = 6, gapMs = 1300 } = {}) {
39
+ const base = String(shareBaseUrl || '').replace(/\/$/, '');
40
+ if (!/^https?:\/\//.test(base)) return false;
41
+ for (let i = 0; i < tries; i += 1) {
42
+ try {
43
+ const ac = new AbortController();
44
+ const t = setTimeout(() => ac.abort(), 2500);
45
+ const r = await fetch(`${base}/api/health`, { signal: ac.signal });
46
+ clearTimeout(t);
47
+ if (r.ok) return true;
48
+ } catch { /* not up yet */ }
49
+ if (i < tries - 1) await new Promise((res) => setTimeout(res, gapMs));
50
+ }
51
+ return false;
52
+ }
53
+
54
+ // Open the owner's browser the friendliest way for THIS install:
55
+ // - slug-linked + public: land them signed-in on <slug>.venturewild.llc (their
56
+ // real, bookmarkable home) via a one-time bootstrap link, once the tunnel is
57
+ // confirmed up. If it isn't ready yet, fall back to localhost (always works
58
+ // locally) and tell them their public url is warming up.
59
+ // - localhost-only: just open localhost.
60
+ // Tokens only ever reach the browser via open() — never printed to stdout (B1/S1).
61
+ // `opts.open` / `opts.ready` are injectable seams for tests; in production the
62
+ // opener is the dynamically-imported `open` package and `ready` uses the defaults.
63
+ export async function openOwnerBrowser(config, opts = {}) {
64
+ let open = opts.open;
65
+ if (!open) {
66
+ try { open = (await import('open')).default; } catch { return; }
67
+ }
68
+ const slugLinked = config.publicMode && config.account?.slug && config.shareBaseUrl;
69
+ if (slugLinked) {
70
+ const link = await fetchPublicBootstrapUrl(config);
71
+ if (link && (await publicTunnelReady(config.shareBaseUrl, opts.ready))) {
72
+ console.log(` opening your workspace at ${config.shareBaseUrl} …`);
73
+ try { await open(link); } catch { /* best-effort */ }
74
+ return 'public';
75
+ }
76
+ // Tunnel not up yet (or older server) — open locally so first run is never a
77
+ // dead page; the public url comes alive on its own as the daemon links.
78
+ try { await open(localBrowserUrl(config)); } catch { /* best-effort */ }
79
+ console.log(` your workspace will be live at ${config.shareBaseUrl} shortly (warming up the tunnel)…`);
80
+ return 'fallback-local';
81
+ }
82
+ try { await open(localBrowserUrl(config)); } catch { /* best-effort */ }
83
+ return 'local';
84
+ }
@@ -0,0 +1,78 @@
1
+ // `wild-workspace reset` — take an install back to the beginning so it can be
2
+ // re-onboarded clean. UNLINKS the account (slug/email/computer), RESETS
3
+ // onboarding, and FLUSHES local config/state (device secrets, token registry,
4
+ // chat thread, canvas + bazaar local state).
5
+ //
6
+ // It deliberately NEVER touches the user's workspace files (CLAUDE.md rule #1),
7
+ // nor the always-on registration / consent choices — those are install plumbing,
8
+ // not "the beginning". See RESET_KEEPS for the honest list of what survives.
9
+
10
+ import fs from 'node:fs';
11
+ import path from 'node:path';
12
+
13
+ // What survives a reset — documented so the command stays honest about scope.
14
+ export const RESET_KEEPS = [
15
+ "the user's workspace files (everything outside .wild-workspace) — never touched",
16
+ 'service.json — always-on registration stays armed',
17
+ 'observability.json — your consent choice is preserved',
18
+ 'operator.json — the support-channel token',
19
+ 'logs/ + diagnostics/ — kept for debugging',
20
+ ];
21
+
22
+ // Build the list of targets to remove. `dataDirs` are the (possibly several)
23
+ // cwd-keyed `.wild-workspace` dirs that hold the account/onboarding/chat; the
24
+ // stable `globalDir` (~/.wild-workspace) holds device secrets + canvas/bazaar
25
+ // state. Each target is annotated with whether it currently exists.
26
+ export function planReset({ dataDirs = [], globalDir, includeMarketplace = true }) {
27
+ const targets = [];
28
+ const seen = new Set();
29
+ const add = (root, name, kind) => {
30
+ if (!root) return;
31
+ const p = path.join(root, name);
32
+ if (seen.has(p)) return;
33
+ seen.add(p);
34
+ targets.push({ path: p, kind, name });
35
+ };
36
+ // Per-workspace data dirs: the account binding + onboarding + chat thread,
37
+ // plus legacy secret/registry locations.
38
+ for (const dir of dataDirs) {
39
+ add(dir, 'account.json', 'file'); // unlink slug / email / this computer
40
+ add(dir, 'agent-identity.json', 'file'); // re-trigger onboarding
41
+ add(dir, 'chat-session.json', 'file'); // fresh chat thread
42
+ add(dir, 'secrets.json', 'file'); // legacy location (now in globalDir)
43
+ add(dir, 'revoked.json', 'file'); // legacy location
44
+ }
45
+ // Stable global dir: device secrets (regenerated fresh on next start) + the
46
+ // token registry + local UI/marketplace state.
47
+ add(globalDir, 'secrets.json', 'file');
48
+ add(globalDir, 'revoked.json', 'file');
49
+ if (includeMarketplace) {
50
+ add(globalDir, 'canvas', 'dir'); // agent-made blocks + theme.json
51
+ add(globalDir, 'bazaar', 'dir'); // local shelf + ledger
52
+ }
53
+ for (const t of targets) {
54
+ try {
55
+ t.exists = fs.existsSync(t.path);
56
+ } catch {
57
+ t.exists = false;
58
+ }
59
+ }
60
+ return targets;
61
+ }
62
+
63
+ // Remove every target that exists. Returns what was removed vs. what failed.
64
+ // Idempotent: a missing target is simply skipped.
65
+ export function applyReset(targets) {
66
+ const removed = [];
67
+ const failed = [];
68
+ for (const t of targets) {
69
+ if (!t.exists) continue;
70
+ try {
71
+ fs.rmSync(t.path, { recursive: t.kind === 'dir', force: true });
72
+ removed.push(t.path);
73
+ } catch (e) {
74
+ failed.push({ path: t.path, error: e?.message || String(e) });
75
+ }
76
+ }
77
+ return { removed, failed };
78
+ }
@@ -41,6 +41,47 @@ export function probeHealth(port, timeoutMs = 2500) {
41
41
  });
42
42
  }
43
43
 
44
+ /**
45
+ * Ask the running server its version via /api/health. Returns the version string
46
+ * or null (server down / no version field / parse error). Never throws. Used by
47
+ * the version-drift check (RC1) — a stale server keeps running its OLD code after
48
+ * an upgrade, so we compare what's RUNNING to what's INSTALLED on disk.
49
+ */
50
+ export function probeHealthVersion(port, timeoutMs = 2500) {
51
+ return new Promise((resolve) => {
52
+ const req = http.get(
53
+ { host: '127.0.0.1', port, path: '/api/health', timeout: timeoutMs },
54
+ (res) => {
55
+ let body = '';
56
+ res.on('data', (d) => { body += d; if (body.length > 4096) req.destroy(); });
57
+ res.on('end', () => {
58
+ try { resolve(JSON.parse(body).version || null); } catch { resolve(null); }
59
+ });
60
+ },
61
+ );
62
+ req.on('error', () => resolve(null));
63
+ req.on('timeout', () => { req.destroy(); resolve(null); });
64
+ });
65
+ }
66
+
67
+ /**
68
+ * The version installed on disk RIGHT NOW — read fresh from the package.json that
69
+ * ships next to this file, NOT the in-memory APP_VERSION constant. The supervisor
70
+ * is long-lived: after `npm i -g` (or the operator `reinstall-daemon`) swaps the
71
+ * package, the supervisor's own constant is stale too, so only a fresh disk read
72
+ * sees the new version. Respawning the server child reloads index.mjs from this
73
+ * same path, so the restart actually picks up the new code. Returns null on error.
74
+ */
75
+ export function installedVersion(entry = DEFAULT_SERVER_ENTRY) {
76
+ try {
77
+ // index.mjs lives at <pkg>/server/src/index.mjs → package.json is ../../.
78
+ const pkg = path.resolve(path.dirname(entry), '..', '..', 'package.json');
79
+ return JSON.parse(fs.readFileSync(pkg, 'utf8')).version || null;
80
+ } catch {
81
+ return null;
82
+ }
83
+ }
84
+
44
85
  export class WorkspaceSupervisor {
45
86
  constructor({
46
87
  serverEntry = DEFAULT_SERVER_ENTRY,
@@ -58,12 +99,32 @@ export class WorkspaceSupervisor {
58
99
  env = process.env,
59
100
  crashLoopThreshold = 3,
60
101
  diagnosticsImpl = null,
102
+ // RC1 version-drift auto-restart: when the RUNNING server reports an older
103
+ // version than what's INSTALLED on disk, restart it so it picks up the new
104
+ // code. On by default; seams injected for tests. WILD_WORKSPACE_NO_AUTORESTART=1
105
+ // disables it (e.g. a developer running an intentionally-older server).
106
+ autoRestartOnVersionDrift = env.WILD_WORKSPACE_NO_AUTORESTART !== '1',
107
+ versionImpl = probeHealthVersion,
108
+ installedVersionImpl = () => installedVersion(serverEntry),
109
+ // Phase 2 auto-update (Pillar B): the always-on supervisor self-updates the
110
+ // whole stack on the user's channel, with health-gated rollback. On by
111
+ // default; the env kill switch + the persisted off switch both disable it.
112
+ // Only wired up in start() (not in the unit-test path, which calls tick()
113
+ // directly) — see start(). updatePollMs is the *wake* cadence; the actual
114
+ // check interval lives inside AutoUpdater (6h) and self-rate-limits.
115
+ autoUpdate = env.WILD_WORKSPACE_NO_AUTOUPDATE !== '1',
116
+ updatePollMs = 60 * 60 * 1000, // wake hourly; AutoUpdater gates real checks
117
+ autoUpdaterFactory = null, // test seam: (supervisor) => AutoUpdater-like
61
118
  } = {}) {
62
119
  Object.assign(this, {
63
120
  serverEntry, workspaceDir, port, globalDir, node, pollMs,
64
121
  backoffStartMs, backoffMaxMs, probeTimeoutMs, spawnImpl, probeImpl, nowImpl, env,
65
122
  crashLoopThreshold, diagnosticsImpl,
123
+ autoRestartOnVersionDrift, versionImpl, installedVersionImpl,
124
+ autoUpdate, updatePollMs, autoUpdaterFactory,
66
125
  });
126
+ this.autoUpdater = null;
127
+ this.updateTimer = null;
67
128
  this.logFile = path.join(globalDir, 'supervisor.log');
68
129
  this.serverLogFile = path.join(globalDir, 'server.out.log');
69
130
  this.lockFile = path.join(globalDir, 'supervisor.lock');
@@ -135,6 +196,28 @@ export class WorkspaceSupervisor {
135
196
  this.backoff = this.backoffStartMs; // healthy → reset backoff
136
197
  this.spawnCount = 0; // healthy → not a crash loop
137
198
  this.pushedThisEpisode = false;
199
+ // RC1 version drift: a healthy-but-STALE server (running older code than
200
+ // what's installed) should be restarted so the upgrade actually lands.
201
+ // Only when WE own the child — we restart by killing it and letting the
202
+ // next tick respawn (which reloads index.mjs from disk). A server started
203
+ // by someone else (foreground `wild-workspace`) we leave alone; we have no
204
+ // handle on it. The restarted server reports the installed version, so the
205
+ // drift clears and this won't loop.
206
+ if (this.autoRestartOnVersionDrift && this.child) {
207
+ try {
208
+ const running = await this.versionImpl(this.port, this.probeTimeoutMs);
209
+ const installed = this.installedVersionImpl();
210
+ if (running && installed && running !== installed) {
211
+ this.log(`version drift: running=${running} installed=${installed} — restarting server`);
212
+ try { this.child.kill(); } catch { /* exit handler clears child */ }
213
+ this.child = null;
214
+ this.backoff = this.backoffStartMs; // upgrade is intentional, not a crash
215
+ return 'version-drift-restart';
216
+ }
217
+ } catch (e) {
218
+ this.log(`version-drift check error: ${e?.message || e}`);
219
+ }
220
+ }
138
221
  return 'healthy';
139
222
  }
140
223
  if (this.child) return 'booting'; // spawned, still coming up
@@ -198,6 +281,47 @@ export class WorkspaceSupervisor {
198
281
  }
199
282
  }
200
283
 
284
+ /**
285
+ * Restart the supervised server child so freshly installed code is loaded.
286
+ * Kills it and lets the next tick respawn (which reloads index.mjs from disk) —
287
+ * the same mechanism as the version-drift restart, exposed for the AutoUpdater.
288
+ * No-op (returns false) when we don't own a child (foreground server).
289
+ */
290
+ restartChild() {
291
+ if (!this.child) return false;
292
+ this.log('restartChild: killing server to load new code');
293
+ try { this.child.kill(); } catch { /* exit handler clears child */ }
294
+ this.child = null;
295
+ this.backoff = this.backoffStartMs; // an intentional restart, not a crash
296
+ return true;
297
+ }
298
+
299
+ /** Build the AutoUpdater bound to this supervisor. Separated for the test seam. */
300
+ async buildAutoUpdater() {
301
+ if (this.autoUpdaterFactory) return this.autoUpdaterFactory(this);
302
+ // Lazy import keeps the unit-test path (which never calls start()) free of the
303
+ // auto-update module + its registry/npm seams.
304
+ const { AutoUpdater } = await import('./auto-update.mjs');
305
+ return new AutoUpdater({
306
+ globalDir: this.globalDir,
307
+ port: this.port,
308
+ installedVersionImpl: this.installedVersionImpl,
309
+ healthVersionImpl: (port) => this.versionImpl(port, this.probeTimeoutMs),
310
+ restartImpl: async () => { this.restartChild(); },
311
+ nowImpl: this.nowImpl,
312
+ env: this.env,
313
+ logImpl: (m) => this.log(m),
314
+ onUpdate: (rec) => this.log(`auto-update result: ${rec.from || '?'}→${rec.to} ${rec.status}`),
315
+ });
316
+ }
317
+
318
+ runUpdateTick() {
319
+ if (!this.autoUpdater) return;
320
+ this.autoUpdater.tick()
321
+ .then((r) => { if (r && !['not-due', 'disabled', 'up-to-date', 'busy'].includes(r)) this.log(`auto-update tick: ${r}`); })
322
+ .catch((e) => this.log(`auto-update error: ${e?.message || e}`));
323
+ }
324
+
201
325
  /** Acquire the lock and start the supervision loop. Idempotent across processes. */
202
326
  start() {
203
327
  if (!this.acquireLock()) return { started: false, reason: 'already-running' };
@@ -207,11 +331,24 @@ export class WorkspaceSupervisor {
207
331
  this.log(`supervisor start pid=${process.pid} watching http://127.0.0.1:${this.port}/api/health (workspace=${this.workspaceDir})`);
208
332
  this.timer = setInterval(() => { this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`)); }, this.pollMs);
209
333
  this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`));
334
+
335
+ // Phase 2 auto-update: wake on a slow timer; the first check fires shortly
336
+ // after start so the server has time to come up (verify reads its /health).
337
+ if (this.autoUpdate && this.env.VITEST !== 'true' && this.env.NODE_ENV !== 'test') {
338
+ this.buildAutoUpdater().then((u) => {
339
+ this.autoUpdater = u;
340
+ this.updateTimer = setInterval(() => this.runUpdateTick(), this.updatePollMs);
341
+ if (this.updateTimer.unref) this.updateTimer.unref();
342
+ const kick = setTimeout(() => this.runUpdateTick(), 60_000);
343
+ if (kick.unref) kick.unref();
344
+ }).catch((e) => this.log(`auto-update init error: ${e?.message || e}`));
345
+ }
210
346
  return { started: true };
211
347
  }
212
348
 
213
349
  stop() {
214
350
  if (this.timer) { clearInterval(this.timer); this.timer = null; }
351
+ if (this.updateTimer) { clearInterval(this.updateTimer); this.updateTimer = null; }
215
352
  this.releaseLock();
216
353
  }
217
354
  }