@venturewild/workspace 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@venturewild/workspace",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "Claude Code Web — Replit/Lovable-style chat-first browser UI that wraps the AI agent already installed on your machine.",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -86,3 +86,25 @@ export function resolveDaemonVersion({ env = process.env, requireResolve } = {})
86
86
  return null;
87
87
  }
88
88
  }
89
+
90
+ /**
91
+ * The daemon version the INSTALLED meta package PINS for this platform — read
92
+ * from the meta `package.json`'s optionalDependencies. This is the version
93
+ * `npm i -g @venturewild/workspace@<v>` is SUPPOSED to have pulled onto disk.
94
+ * Comparing it to resolveDaemonVersion() catches the go-live failure where the
95
+ * meta package updated but its daemon optionalDependency on disk lagged behind
96
+ * (the tangled Windows dev box stuck on the @0.2.0-era daemon). Returns the
97
+ * pinned version string, or null when it can't be read. Self-contained: reads the
98
+ * meta package.json that ships two dirs up from this file.
99
+ */
100
+ export function expectedDaemonVersion({ metaPkgPath } = {}) {
101
+ const tag = platformTag();
102
+ try {
103
+ const pkg = metaPkgPath || path.resolve(__dirname, '..', '..', 'package.json');
104
+ const parsed = JSON.parse(readFileSync(pkg, 'utf8'));
105
+ const v = parsed?.optionalDependencies?.[`@venturewild/workspace-daemon-${tag}`];
106
+ return typeof v === 'string' ? v.replace(/^[~^]/, '') : null;
107
+ } catch {
108
+ return null;
109
+ }
110
+ }
@@ -17,12 +17,25 @@ import path from 'node:path';
17
17
  import { buildConfig, APP_VERSION } from './config.mjs';
18
18
  import { detectAgents, pickDefaultAgent } from './agent.mjs';
19
19
  import { probeAgentReadiness } from './agent-readiness.mjs';
20
- import { resolveDaemonBinary } from './daemon-bin.mjs';
20
+ import { resolveDaemonBinary, resolveDaemonVersion, expectedDaemonVersion } from './daemon-bin.mjs';
21
21
  import { checkPort } from './preview.mjs';
22
22
  import { loadAccount } from './account.mjs';
23
23
  import { serviceStatus } from './service.mjs';
24
24
  import { probeHealth, probeHealthVersion } from './supervisor.mjs';
25
- import { listLogs, diagnosticsDir } from './logpaths.mjs';
25
+ import { listLogs, diagnosticsDir, globalDir } from './logpaths.mjs';
26
+
27
+ // The daemon version the currently-RUNNING daemon was spawned under — the marker
28
+ // the supervisor writes to ~/.wild-workspace/daemon-runtime.json (the daemon's
29
+ // own /health reports no version). null when unread (never started / no marker).
30
+ function readRunningDaemonVersion(env = process.env) {
31
+ try {
32
+ const file = path.join(globalDir(env), 'daemon-runtime.json');
33
+ const v = JSON.parse(fs.readFileSync(file, 'utf8'))?.daemonVersion;
34
+ return typeof v === 'string' ? v : null;
35
+ } catch {
36
+ return null;
37
+ }
38
+ }
26
39
 
27
40
  const STATUS_ICON = { ok: '✅', warn: '⚠️', fail: '❌', info: 'ℹ️' };
28
41
 
@@ -93,6 +106,9 @@ export async function runDoctor(opts = {}, deps = {}) {
93
106
  listLogs: deps.listLogs || listLogs,
94
107
  fetchImpl: deps.fetchImpl || ((...a) => globalThis.fetch(...a)),
95
108
  probeRunningVersion: deps.probeRunningVersion || probeHealthVersion,
109
+ daemonInstalledVersion: deps.daemonInstalledVersion || (() => resolveDaemonVersion({ env })),
110
+ daemonExpectedVersion: deps.daemonExpectedVersion || (() => expectedDaemonVersion()),
111
+ daemonRunningVersion: deps.daemonRunningVersion || (() => readRunningDaemonVersion(env)),
96
112
  };
97
113
  const checks = [];
98
114
  const add = (c) => checks.push(c);
@@ -169,6 +185,45 @@ export async function runDoctor(opts = {}, deps = {}) {
169
185
  return { status: 'ok', detail: `${r.path} (${r.source})`, hint: null };
170
186
  });
171
187
 
188
+ // 4b. Daemon version drift (the go-live stale-process finding, Part 8). Three
189
+ // versions should agree: what the meta package PINS (expected), what's actually
190
+ // on disk (installed subpackage), and what the live daemon was spawned under
191
+ // (running marker). A mismatch is the exact "support channel silently 504s after
192
+ // an update" chain — the meta package updated but the daemon binary on disk
193
+ // lagged, or the daemon kept running old code. Surfaced so the fix (reinstall /
194
+ // restart) is obvious instead of invisible.
195
+ await guarded('daemonVersion', 'Sync daemon version', async () => {
196
+ const expected = d.daemonExpectedVersion();
197
+ const installed = d.daemonInstalledVersion();
198
+ const running = d.daemonRunningVersion();
199
+ const bits = [`pinned=${expected || '?'}`, `installed=${installed || 'PATH/vendor'}`, `running=${running || 'not started'}`];
200
+ const detail = bits.join(' ');
201
+ // Meta pins a version but the on-disk daemon subpackage is a DIFFERENT one →
202
+ // `npm i -g` didn't refresh the optionalDependency (the Windows dev box lag).
203
+ if (expected && installed && expected !== installed) {
204
+ return {
205
+ status: 'warn',
206
+ detail,
207
+ hint: `The daemon on disk (${installed}) does not match what this version pins (${expected}). Reinstall to refresh it: npm i -g @venturewild/workspace@latest`,
208
+ };
209
+ }
210
+ // The live daemon is running an older binary than what's installed → it needs
211
+ // a recycle (the always-on supervisor does this on its next tick).
212
+ if (installed && running && installed !== running) {
213
+ return {
214
+ status: 'warn',
215
+ detail,
216
+ hint: `The running daemon (${running}) is older than installed (${installed}). Always-on recycles it automatically; or restart sync (\`wild-workspace daemon stop\` then \`wild-workspace\`).`,
217
+ };
218
+ }
219
+ if (!installed) {
220
+ // PATH/vendor resolution — can't compare versions; the daemonBinary check
221
+ // above already warns about the missing bundled binary.
222
+ return { status: 'info', detail, hint: null };
223
+ }
224
+ return { status: 'ok', detail, hint: null };
225
+ });
226
+
172
227
  // 5. Workspace port
173
228
  await guarded('port', `Workspace port :${config.port}`, async () => {
174
229
  const inUse = await d.checkPort(config.port);
@@ -24,7 +24,7 @@
24
24
  // synced workspace (locked principle #1). Every external touch-point (reg.exe,
25
25
  // launchctl, kill) is an injected seam for testability.
26
26
 
27
- import { execFile } from 'node:child_process';
27
+ import { execFile, spawn } from 'node:child_process';
28
28
  import { promisify } from 'node:util';
29
29
  import fs from 'node:fs';
30
30
  import os from 'node:os';
@@ -327,6 +327,102 @@ async function linuxStatus({ dir, systemdUserDir, execFileImpl, probeImpl, port
327
327
  return { installed, runValue: installed ? unit : null, supervisorPid, supervisorAlive, serverUp, enabled, active };
328
328
  }
329
329
 
330
+ // --- self-restart: re-exec the supervisor to load freshly-installed code -----
331
+ //
332
+ // After an auto-update installs new code, the long-lived SUPERVISOR keeps running
333
+ // the OLD code until it restarts — RC1b restarts the server CHILD, never the
334
+ // supervisor parent. That's the go-live "stale-process-after-update chain"
335
+ // (remote-support-and-self-healing-design.md Part 8): the supervisor's daemon-
336
+ // drift recycle logic can't run, so the daemon stays on the old binary and the
337
+ // support channel silently 504s. restartSelf() restarts the supervisor itself,
338
+ // per-OS, so the whole stack lands new code with NO reboot:
339
+ // - macOS: launchctl kickstart -k gui/<uid>/<label> (launchd kills + relaunches us)
340
+ // - Linux: systemctl --user restart <unit> (only when systemd-managed)
341
+ // - Windows: re-spawn the hidden VBS launcher; the caller then exits so the
342
+ // successor takes the singleton lock (no service manager to do it).
343
+ //
344
+ // SAFE BY CONSTRUCTION — never kill the only supervisor on a non-managed run:
345
+ // - mac: kickstart errors when the job isn't loaded (manual `service run`) →
346
+ // reported not-restarted, supervisor keeps running (old code, same as
347
+ // before this feature) rather than dying.
348
+ // - linux: gated on INVOCATION_ID (systemd sets it for its own services); a
349
+ // manual run has none → no-op (a `restart` would otherwise spawn a
350
+ // SECOND supervisor that collides on the singleton lock).
351
+ // - win: only re-spawns when the installed launcher exists.
352
+ // On mac/Linux the service manager kills+sequences the restart (no lock race). On
353
+ // Windows the caller exits AFTER we've spawned the successor; the successor's node
354
+ // boot (~hundreds of ms) outlasts the caller's lock release, so it takes over
355
+ // cleanly — and a lost race merely falls back to the next-login launch (no user
356
+ // downtime: the server + daemon are independent processes that keep serving).
357
+
358
+ async function macRestartSelf({ execFileImpl, uid, label }) {
359
+ const target = `gui/${uid}/${label}`;
360
+ try {
361
+ await execFileImpl('launchctl', ['kickstart', '-k', target]);
362
+ return { restarted: true, method: 'launchctl-kickstart', target };
363
+ } catch (e) {
364
+ return { restarted: false, method: 'launchctl-kickstart', target, error: String(e?.message || e).split('\n')[0] };
365
+ }
366
+ }
367
+
368
+ async function linuxRestartSelf({ execFileImpl, env, unit }) {
369
+ if (!env.INVOCATION_ID) {
370
+ return { restarted: false, method: 'systemctl', unit, reason: 'not-systemd-managed' };
371
+ }
372
+ try {
373
+ await execFileImpl('systemctl', ['--user', 'restart', unit]);
374
+ return { restarted: true, method: 'systemctl', unit };
375
+ } catch (e) {
376
+ return { restarted: false, method: 'systemctl', unit, error: String(e?.message || e).split('\n')[0] };
377
+ }
378
+ }
379
+
380
+ function winRestartSelf({ dir, spawnImpl }) {
381
+ const vbs = path.join(dir, 'launch-hidden.vbs');
382
+ if (!fs.existsSync(vbs)) {
383
+ return { restarted: false, method: 'win-relaunch', reason: 'launcher-absent' };
384
+ }
385
+ try {
386
+ const child = spawnImpl('wscript.exe', [vbs], { detached: true, windowsHide: true, stdio: 'ignore' });
387
+ child?.unref?.();
388
+ // willExit: the caller MUST process.exit() so the successor can take the lock.
389
+ return { restarted: true, method: 'win-relaunch', launcher: vbs, willExit: true };
390
+ } catch (e) {
391
+ return { restarted: false, method: 'win-relaunch', launcher: vbs, error: String(e?.message || e).split('\n')[0] };
392
+ }
393
+ }
394
+
395
+ /**
396
+ * Restart the always-on supervisor process so freshly-installed supervisor code
397
+ * loads (the Part-8 stale-process fix). Returns { restarted, method, ... }; a
398
+ * `willExit:true` (Windows) tells the caller to process.exit() after we return so
399
+ * the just-spawned successor can take the singleton lock. Never throws.
400
+ */
401
+ export async function restartSelf(opts = {}, deps = {}) {
402
+ const platform = deps.platform || process.platform;
403
+ // dir (where the Windows launcher lives) may come from the operational opts
404
+ // (the supervisor passes its configured globalDir) or the test deps.
405
+ const dir = opts.dir || deps.dir || globalDir();
406
+ if (platform === 'darwin') {
407
+ return macRestartSelf({
408
+ execFileImpl: deps.execFileImpl || execFileP,
409
+ uid: deps.uid ?? currentUid(),
410
+ label: deps.label || LAUNCHD_LABEL,
411
+ });
412
+ }
413
+ if (platform === 'linux') {
414
+ return linuxRestartSelf({
415
+ execFileImpl: deps.execFileImpl || execFileP,
416
+ env: deps.env || process.env,
417
+ unit: deps.unit || SYSTEMD_UNIT,
418
+ });
419
+ }
420
+ if (platform === 'win32') {
421
+ return winRestartSelf({ dir, spawnImpl: deps.spawnImpl || spawn });
422
+ }
423
+ return { restarted: false, supported: false, platform };
424
+ }
425
+
330
426
  // --- public API (platform dispatch) ----------------------------------------
331
427
 
332
428
  const unsupported = (platform, key) => ({
@@ -26,6 +26,7 @@ import os from 'node:os';
26
26
  import path from 'node:path';
27
27
  import { fileURLToPath } from 'node:url';
28
28
  import { resolveDaemonVersion } from './daemon-bin.mjs';
29
+ import { restartSelf } from './service.mjs';
29
30
 
30
31
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
31
32
  const DEFAULT_SERVER_ENTRY = path.join(__dirname, 'index.mjs');
@@ -83,6 +84,13 @@ export function installedVersion(entry = DEFAULT_SERVER_ENTRY) {
83
84
  }
84
85
  }
85
86
 
87
+ // Captured ONCE at module load = the version of the code THIS supervisor process
88
+ // is running. A fresh installedVersion() reads disk, which moves ahead after an
89
+ // in-place `npm i -g`; the difference is the supervisor's OWN staleness (the
90
+ // Part-8 gap). Distinct from APP_VERSION only in that we read the same file the
91
+ // drift check reads, so they're guaranteed equal at startup (no false drift).
92
+ export const SUPERVISOR_VERSION = installedVersion();
93
+
86
94
  export class WorkspaceSupervisor {
87
95
  constructor({
88
96
  serverEntry = DEFAULT_SERVER_ENTRY,
@@ -134,6 +142,26 @@ export class WorkspaceSupervisor {
134
142
  // under (tracked in `daemon-runtime.json`, since the daemon's /health reports
135
143
  // no version). Test seam: inject a version function.
136
144
  daemonVersionImpl = () => resolveDaemonVersion({ env }),
145
+ // Supervisor self-restart after auto-update (the Part-8 stale-process fix):
146
+ // once an update installs new code and the server child restarts + verifies
147
+ // healthy, the supervisor must restart ITSELF so its own new code (e.g. the
148
+ // daemon-drift recycle) loads — RC1b only restarts the child. Per-OS re-exec
149
+ // lives in service.mjs::restartSelf. On by default; kill switch
150
+ // WILD_WORKSPACE_NO_SELF_RESTART=1. A cooldown + a once-per-process guard
151
+ // prevent any restart loop; the delay lets the triggering update tick unwind
152
+ // and logs flush first. All seams injected (no real exit/spawn in tests).
153
+ selfRestart = env.WILD_WORKSPACE_NO_SELF_RESTART !== '1',
154
+ selfRestartCooldownMs = 10 * 60 * 1000,
155
+ selfRestartDelayMs = 3000,
156
+ restartSelfImpl = restartSelf,
157
+ exitImpl = (code = 0) => process.exit(code),
158
+ scheduleImpl = (fn, ms) => { const t = setTimeout(fn, ms); if (t.unref) t.unref(); return t; },
159
+ // The version THIS supervisor process is running (captured at module load).
160
+ // The self-drift backstop self-restarts when the installed-on-disk version
161
+ // moves ahead of this — covering EVERY update path (our auto-updater, the
162
+ // operator `update-now`, the CLI `update apply`, a manual `npm i -g`), not
163
+ // just our own. null disables the backstop (tests default to null).
164
+ selfVersion = SUPERVISOR_VERSION,
137
165
  } = {}) {
138
166
  Object.assign(this, {
139
167
  serverEntry, workspaceDir, port, globalDir, node, pollMs,
@@ -142,6 +170,8 @@ export class WorkspaceSupervisor {
142
170
  autoRestartOnVersionDrift, versionImpl, installedVersionImpl,
143
171
  autoUpdate, updatePollMs, autoUpdaterFactory,
144
172
  superviseDaemon, daemonPollMs, daemonSupervisorFactory, daemonVersionImpl,
173
+ selfRestart, selfRestartCooldownMs, selfRestartDelayMs, restartSelfImpl, exitImpl, scheduleImpl,
174
+ selfVersion,
145
175
  });
146
176
  this.autoUpdater = null;
147
177
  this.updateTimer = null;
@@ -149,6 +179,10 @@ export class WorkspaceSupervisor {
149
179
  this.daemonTimer = null;
150
180
  this._daemonTicking = false;
151
181
  this.daemonRuntimeFile = path.join(globalDir, 'daemon-runtime.json');
182
+ // Persists the last self-restart time so a fresh post-re-exec supervisor
183
+ // honours the cooldown too (belt-and-suspenders against a restart loop).
184
+ this.selfRestartFile = path.join(globalDir, 'self-restart.json');
185
+ this._selfRestartScheduled = false;
152
186
  this.logFile = path.join(globalDir, 'supervisor.log');
153
187
  this.serverLogFile = path.join(globalDir, 'server.out.log');
154
188
  this.lockFile = path.join(globalDir, 'supervisor.lock');
@@ -243,6 +277,10 @@ export class WorkspaceSupervisor {
243
277
  this.restartChild();
244
278
  return 'restart-requested';
245
279
  }
280
+ // Part-8 backstop: if disk moved ahead of our own code (any update path),
281
+ // schedule a supervisor self-restart. Side-effect only — never changes the
282
+ // tick decision below (server/daemon healing proceeds as usual meanwhile).
283
+ this.maybeSelfRestartOnDrift();
246
284
  if (await this.probeImpl(this.port, this.probeTimeoutMs)) {
247
285
  this.backoff = this.backoffStartMs; // healthy → reset backoff
248
286
  this.spawnCount = 0; // healthy → not a crash loop
@@ -347,6 +385,96 @@ export class WorkspaceSupervisor {
347
385
  return true;
348
386
  }
349
387
 
388
+ /** The last self-restart time (epoch ms), or 0. Used for the loop-guard cooldown. */
389
+ readLastSelfRestart() {
390
+ try { return Number(JSON.parse(fs.readFileSync(this.selfRestartFile, 'utf8')).at) || 0; }
391
+ catch { return 0; }
392
+ }
393
+
394
+ writeLastSelfRestart(at) {
395
+ try {
396
+ fs.mkdirSync(this.globalDir, { recursive: true });
397
+ fs.writeFileSync(this.selfRestartFile, JSON.stringify({ at }));
398
+ } catch { /* best-effort */ }
399
+ }
400
+
401
+ /**
402
+ * Schedule a supervisor self-restart so freshly-installed SUPERVISOR code loads
403
+ * (the Part-8 stale-process fix). Called from the AutoUpdater's onUpdate hook
404
+ * AFTER an update installed + restarted the server child + verified it healthy —
405
+ * so a bad release has already rolled back before we re-exec ourselves. Guarded
406
+ * three ways against a restart loop: the kill switch, a once-per-process flag,
407
+ * and a persisted cooldown (survives the re-exec). Returns a status string
408
+ * ('scheduled' | 'disabled' | 'already' | 'cooldown') for tests/logging. The
409
+ * actual restart runs on a short delay so the triggering tick unwinds first.
410
+ */
411
+ scheduleSelfRestart(reason) {
412
+ if (!this.selfRestart) return 'disabled';
413
+ if (this._selfRestartScheduled) return 'already';
414
+ const now = this.nowImpl();
415
+ const last = this.readLastSelfRestart();
416
+ if (last && now - last < this.selfRestartCooldownMs) {
417
+ this.log(`self-restart skipped (cooldown, last ${Math.round((now - last) / 1000)}s ago) — ${reason}`);
418
+ return 'cooldown';
419
+ }
420
+ this._selfRestartScheduled = true;
421
+ this.writeLastSelfRestart(now);
422
+ this.log(`self-restart scheduled in ${this.selfRestartDelayMs}ms — ${reason}`);
423
+ this.scheduleImpl(() => {
424
+ this._performSelfRestart(reason).catch((e) => this.log(`self-restart error: ${e?.message || e}`));
425
+ }, this.selfRestartDelayMs);
426
+ return 'scheduled';
427
+ }
428
+
429
+ /**
430
+ * Carry out the self-restart. On mac/Linux the service manager kills+relaunches
431
+ * us (we just issue the command and get SIGTERM'd → our exit handler releases the
432
+ * lock). On Windows restartSelf spawned a hidden successor and returns
433
+ * willExit:true — we then release the lock (via stop()) and exit so the successor
434
+ * can take it. A non-managed run reports restarted:false and we stay up on the
435
+ * old code (no worse than before this feature). Never throws.
436
+ */
437
+ async _performSelfRestart(reason) {
438
+ this.log(`self-restart now — ${reason}`);
439
+ let r;
440
+ try {
441
+ r = await this.restartSelfImpl({ dir: this.globalDir, port: this.port });
442
+ } catch (e) {
443
+ this.log(`self-restart impl error: ${e?.message || e}`);
444
+ return { restarted: false, error: e?.message || String(e) };
445
+ }
446
+ this.log(`self-restart result: ${JSON.stringify(r)}`);
447
+ if (r && r.willExit) {
448
+ this.stop(); // clears timers + releases the lock so the successor can take it
449
+ this.exitImpl(0);
450
+ }
451
+ return r;
452
+ }
453
+
454
+ /**
455
+ * Backstop for the Part-8 gap on EVERY update path, not just our own auto-
456
+ * updater: when the version installed on disk no longer matches the code THIS
457
+ * supervisor is running, the supervisor is stale → schedule a self-restart.
458
+ * RC1b already restarts the stale server child and daemonTick recycles the
459
+ * stale daemon; this is the missing third leg (the supervisor itself), so an
460
+ * operator `update-now` / CLI `update apply` / manual `npm i -g` also lands new
461
+ * supervisor code with no reboot. Skipped while OUR auto-updater is mid-flight
462
+ * so the rollback window is respected (that path self-restarts via the onUpdate
463
+ * hook, only after verify succeeds). Cheap (an in-memory compare guarding a disk
464
+ * read) and idempotent (scheduleSelfRestart de-dupes). Never throws.
465
+ */
466
+ maybeSelfRestartOnDrift() {
467
+ if (!this.selfRestart || !this.selfVersion) return false;
468
+ if (this._selfRestartScheduled) return false;
469
+ if (this.autoUpdater && this.autoUpdater.inProgress) return false; // respect rollback window
470
+ let installed = null;
471
+ try { installed = this.installedVersionImpl(); } catch { return false; }
472
+ if (!installed || installed === this.selfVersion) return false;
473
+ this.log(`supervisor version drift: running=${this.selfVersion} installed=${installed} — self-restarting`);
474
+ this.scheduleSelfRestart(`supervisor drift ${this.selfVersion}→${installed}`);
475
+ return true;
476
+ }
477
+
350
478
  /** Build the AutoUpdater bound to this supervisor. Separated for the test seam. */
351
479
  async buildAutoUpdater() {
352
480
  if (this.autoUpdaterFactory) return this.autoUpdaterFactory(this);
@@ -362,7 +490,16 @@ export class WorkspaceSupervisor {
362
490
  nowImpl: this.nowImpl,
363
491
  env: this.env,
364
492
  logImpl: (m) => this.log(m),
365
- onUpdate: (rec) => this.log(`auto-update result: ${rec.from || '?'}→${rec.to} ${rec.status}`),
493
+ onUpdate: (rec) => {
494
+ this.log(`auto-update result: ${rec.from || '?'}→${rec.to} ${rec.status}`);
495
+ // A genuine version change landed healthy → restart the supervisor itself
496
+ // so its own new code loads (Part-8 stale-process fix). Guarded against
497
+ // loops inside scheduleSelfRestart. Fires only on a real bump (to≠from),
498
+ // never on rollback/failure (those statuses aren't 'ok').
499
+ if (rec.status === 'ok' && rec.to && rec.from && rec.to !== rec.from) {
500
+ this.scheduleSelfRestart(`auto-update ${rec.from}→${rec.to}`);
501
+ }
502
+ },
366
503
  });
367
504
  }
368
505