@openparachute/hub 0.6.4-rc.9 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -341,6 +341,96 @@ describe("install", () => {
341
341
  }
342
342
  });
343
343
 
344
+ test("names the squatter holding the canonical port when the walk assigns a fallback (#590)", async () => {
345
+ // Field bug #590 item 2: a stale pre-supervisor vault zombie squats 1940;
346
+ // the install-time port walk silently routed to a fallback. Now it names the
347
+ // holder (pid + command line) + hints it may be a stale daemon. Detection
348
+ // only — never kills. Reuses the #581 pidOnPort / ownerOfPid seams.
349
+ const { path, configDir, cleanup } = makeTempPath();
350
+ try {
351
+ const logs: string[] = [];
352
+ const code = await install("vault", {
353
+ runner: async () => 0,
354
+ manifestPath: path,
355
+ configDir,
356
+ startService: async () => 0,
357
+ isLinked: () => false,
358
+ // Only vault's canonical 1940 is held → the walk picks a fallback in-range.
359
+ portProbe: async (p) => p === 1940,
360
+ // Inject the #581 seams: a foreign pid squats 1940.
361
+ pidOnPort: (p) => (p === 1940 ? 1234 : undefined),
362
+ ownerOfPid: (pid) => (pid === 1234 ? "bun /opt/vault/src/server.ts" : undefined),
363
+ log: (l) => logs.push(l),
364
+ });
365
+ expect(code).toBe(0);
366
+ const joined = logs.join("\n");
367
+ // The fallback warning still fires…
368
+ expect(joined).toMatch(/canonical port 1940 is in use; assigned/);
369
+ // …and now it NAMES the squatter + hints at a stale daemon.
370
+ expect(joined).toContain("pid 1234 (bun /opt/vault/src/server.ts)");
371
+ expect(joined).toMatch(/stale pre-supervisor daemon/);
372
+ expect(joined).toContain("kill 1234");
373
+ const entry = findService("parachute-vault", path);
374
+ expect(entry?.port).not.toBe(1940);
375
+ } finally {
376
+ cleanup();
377
+ }
378
+ });
379
+
380
+ test("squatter pid present but command line unreadable → names the pid alone (#590)", async () => {
381
+ const { path, configDir, cleanup } = makeTempPath();
382
+ try {
383
+ const logs: string[] = [];
384
+ const code = await install("vault", {
385
+ runner: async () => 0,
386
+ manifestPath: path,
387
+ configDir,
388
+ startService: async () => 0,
389
+ isLinked: () => false,
390
+ portProbe: async (p) => p === 1940,
391
+ pidOnPort: (p) => (p === 1940 ? 4321 : undefined),
392
+ ownerOfPid: () => undefined, // ps failed / pid gone
393
+ log: (l) => logs.push(l),
394
+ });
395
+ expect(code).toBe(0);
396
+ const joined = logs.join("\n");
397
+ expect(joined).toContain("held by pid 4321.");
398
+ expect(joined).not.toContain("(undefined)");
399
+ } finally {
400
+ cleanup();
401
+ }
402
+ });
403
+
404
+ test("no squatter naming when the canonical port is free (#590 — no false positive)", async () => {
405
+ const { path, configDir, cleanup } = makeTempPath();
406
+ try {
407
+ const logs: string[] = [];
408
+ let pidProbed = false;
409
+ const code = await install("vault", {
410
+ runner: async () => 0,
411
+ manifestPath: path,
412
+ configDir,
413
+ startService: async () => 0,
414
+ isLinked: () => false,
415
+ portProbe: async () => false, // canonical 1940 is free
416
+ pidOnPort: () => {
417
+ pidProbed = true;
418
+ return 9999;
419
+ },
420
+ ownerOfPid: () => "should-not-appear",
421
+ log: (l) => logs.push(l),
422
+ });
423
+ expect(code).toBe(0);
424
+ const joined = logs.join("\n");
425
+ // Canonical assigned → no fallback warning, no squatter probe at all.
426
+ expect(joined).not.toMatch(/is in use; assigned/);
427
+ expect(joined).not.toContain("should-not-appear");
428
+ expect(pidProbed).toBe(false);
429
+ } finally {
430
+ cleanup();
431
+ }
432
+ });
433
+
344
434
  test("`install lens` aliases to notes with a rename notice", async () => {
345
435
  // Transition alias for the brief Notes→Lens rename (Apr 19) that was
346
436
  // reverted on launch eve (Apr 22). Accepted for one release cycle so
@@ -67,6 +67,7 @@ function fakeHubUnitDeps(): HubUnitDeps {
67
67
  readFile: () => undefined,
68
68
  exists: () => false,
69
69
  probeHealth: async () => false,
70
+ probeHealthVersion: async () => null,
70
71
  portListening: async () => false,
71
72
  sleep: async () => {},
72
73
  };
@@ -16,15 +16,18 @@
16
16
  * `expose-cloudflare.ts` (cloudflared) use so the two paths can't drift.
17
17
  */
18
18
 
19
+ import pkg from "../../package.json" with { type: "json" };
19
20
  import { readHubPort } from "../hub-control.ts";
20
21
  import { hubDbPath, openHubDb } from "../hub-db.ts";
21
22
  import {
22
23
  type EnsureHubUnitOpts,
23
24
  type EnsureHubUnitResult,
25
+ type EnsureHubVersionMatchesResult,
24
26
  HUB_UNIT_DEFAULT_PORT,
25
27
  type HubUnitDeps,
26
28
  defaultHubUnitDeps,
27
29
  ensureHubUnit as ensureHubUnitImpl,
30
+ ensureHubVersionMatches as ensureHubVersionMatchesImpl,
28
31
  } from "../hub-unit.ts";
29
32
  import {
30
33
  type DriveModuleOpDeps,
@@ -54,6 +57,17 @@ export interface ExposeSupervisorOpts {
54
57
  hubUnitDeps?: HubUnitDeps;
55
58
  /** Ensure the hub unit is up before / during expose (§3.2 / §4.3a). */
56
59
  ensureHubUnit?: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
60
+ /**
61
+ * Version-check-and-restart at the expose adoption point (#590). After the
62
+ * hub unit is confirmed up, compare the RUNNING hub's `/health` version to the
63
+ * installed package version; restart the managed unit on mismatch so an expose
64
+ * never wires a tunnel to a stale zombie. Production wires
65
+ * `ensureHubVersionMatches`; tests inject a stub.
66
+ */
67
+ ensureHubVersion?: (ctx: {
68
+ port: number;
69
+ log: (line: string) => void;
70
+ }) => Promise<EnsureHubVersionMatchesResult>;
57
71
  /** Drive a per-module op against the running hub (reads operator.token). */
58
72
  driveModuleOp?: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
59
73
  /**
@@ -83,6 +97,10 @@ export interface ExposeSupervisorOpts {
83
97
  export interface ResolvedExposeSupervisor {
84
98
  hubUnitDeps: HubUnitDeps;
85
99
  ensureHubUnit: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
100
+ ensureHubVersion: (ctx: {
101
+ port: number;
102
+ log: (line: string) => void;
103
+ }) => Promise<EnsureHubVersionMatchesResult>;
86
104
  driveModuleOp: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
87
105
  openDb: (configDir: string) => import("bun:sqlite").Database;
88
106
  selfHealOperatorTokenIssuer: (
@@ -105,6 +123,15 @@ export function resolveExposeSupervisor(
105
123
  return {
106
124
  hubUnitDeps,
107
125
  ensureHubUnit: opts?.ensureHubUnit ?? ensureHubUnitImpl,
126
+ ensureHubVersion:
127
+ opts?.ensureHubVersion ??
128
+ ((ctx) =>
129
+ ensureHubVersionMatchesImpl({
130
+ installedVersion: pkg.version,
131
+ port: ctx.port,
132
+ deps: hubUnitDeps,
133
+ log: ctx.log,
134
+ })),
108
135
  driveModuleOp: opts?.driveModuleOp ?? driveModuleOpImpl,
109
136
  openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
110
137
  selfHealOperatorTokenIssuer:
@@ -145,6 +172,24 @@ export async function ensureHubUnitForExpose(
145
172
  ): Promise<{ ok: boolean; port: number }> {
146
173
  const ensured = await sup.ensureHubUnit({ port, deps: sup.hubUnitDeps, log });
147
174
  if (ensured.outcome === "already-up" || ensured.outcome === "started") {
175
+ // #590: the hub is up — but is it the version we installed? A zombie that
176
+ // merely answers /health must not become the target of a fresh tunnel.
177
+ // Compare + restart-on-mismatch (once). A non-unit-managed mismatch is NOT
178
+ // killed: surface it + fail the expose so the operator resolves it; a
179
+ // still-mismatched-after-restart (bun-linked branch) warns + continues.
180
+ try {
181
+ const versionResult = await sup.ensureHubVersion({ port: ensured.port, log });
182
+ for (const m of versionResult.messages) log(m);
183
+ if (
184
+ versionResult.outcome === "not-unit-managed" ||
185
+ versionResult.outcome === "restart-failed"
186
+ ) {
187
+ return { ok: false, port: ensured.port };
188
+ }
189
+ } catch (err) {
190
+ // A version-check failure must never block expose — degrade to a note.
191
+ log(`note: hub version check skipped (${err instanceof Error ? err.message : String(err)})`);
192
+ }
148
193
  return { ok: true, port: ensured.port };
149
194
  }
150
195
  for (const m of ensured.messages) log(m);
@@ -35,12 +35,18 @@
35
35
  import { spawnSync } from "node:child_process";
36
36
  import { join } from "node:path";
37
37
  import { fileURLToPath } from "node:url";
38
+ import pkg from "../../package.json" with { type: "json" };
38
39
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
39
40
  import { type ExposeState, readExposeState } from "../expose-state.ts";
40
41
  import { type EnsureHubOpts, HUB_DEFAULT_PORT, HUB_SVC, readHubPort } from "../hub-control.ts";
41
42
  import { hubDbPath, openHubDb } from "../hub-db.ts";
42
43
  import { deriveHubOrigin } from "../hub-origin.ts";
43
- import { ensureHubUnit, installAndStartHubUnit } from "../hub-unit.ts";
44
+ import {
45
+ type EnsureHubVersionMatchesResult,
46
+ ensureHubUnit,
47
+ ensureHubVersionMatches,
48
+ installAndStartHubUnit,
49
+ } from "../hub-unit.ts";
44
50
  import { issueOperatorToken, readOperatorTokenFile } from "../operator-token.ts";
45
51
  import { type AliveFn, defaultAlive, processState } from "../process-state.ts";
46
52
  import { findService, readManifestLenient } from "../services-manifest.ts";
@@ -81,6 +87,18 @@ export interface InitOpts {
81
87
  * Design §3.3 (init row), §4.1/§4.2, appendix (c).
82
88
  */
83
89
  ensureHub?: (opts: EnsureHubOpts) => Promise<{ pid: number; port: number; started: boolean }>;
90
+ /**
91
+ * Test seam: version-check-and-restart at the hub adoption point (#590).
92
+ * After init confirms a hub is answering on the canonical port, it compares
93
+ * the RUNNING hub's `/health` version against this installed package version;
94
+ * on mismatch it restarts the managed unit (once) so a freshly-installed hub
95
+ * never adopts a stale zombie. Production wires `ensureHubVersionMatches`;
96
+ * tests stub it to assert the call without touching launchctl / the live hub.
97
+ */
98
+ ensureHubVersion?: (ctx: {
99
+ port: number;
100
+ log: (line: string) => void;
101
+ }) => Promise<EnsureHubVersionMatchesResult>;
84
102
  /**
85
103
  * Test seam: guarantee an operator token exists once the hub is up (design
86
104
  * §3.1 / §3.3). Production reads `operator.token`; if absent AND a hub user
@@ -582,6 +600,18 @@ export async function init(opts: InitOpts = {}): Promise<number> {
582
600
  // spawn). The `ensureHub` seam is preserved for tests (and the return shape is
583
601
  // unchanged); only the production default flipped.
584
602
  const ensureHub = opts.ensureHub ?? defaultEnsureHubViaUnit;
603
+ // #590: after the hub is confirmed up, compare its RUNNING version to the
604
+ // installed package version and restart the managed unit on mismatch, so a
605
+ // freshly-installed hub never adopts a stale zombie that merely answers
606
+ // /health. Injectable for tests.
607
+ const ensureHubVersion =
608
+ opts.ensureHubVersion ??
609
+ ((ctx) =>
610
+ ensureHubVersionMatches({
611
+ installedVersion: pkg.version,
612
+ port: ctx.port,
613
+ log: ctx.log,
614
+ }));
585
615
  const guaranteeOperatorToken = opts.guaranteeOperatorToken ?? defaultGuaranteeOperatorToken;
586
616
  const readExposeStateFn = opts.readExposeStateFn ?? (() => readExposeState());
587
617
  const isTty = opts.isTty ?? Boolean(process.stdin.isTTY && process.stdout.isTTY);
@@ -640,6 +670,38 @@ export async function init(opts: InitOpts = {}): Promise<number> {
640
670
  // overridden, so the fallback is almost always correct.
641
671
  if (hubPort === undefined) hubPort = HUB_DEFAULT_PORT;
642
672
 
673
+ // Step 1.25 (#590): the hub answered /health, but is it the version we just
674
+ // installed? A zombie LaunchAgent survives `rm -rf ~/.parachute`, so a brand-
675
+ // new install can adopt month-old code that merely keeps the port. Compare the
676
+ // RUNNING version to the installed package version; on mismatch, restart the
677
+ // managed unit (once) so the tunnel/wizard/vault-install downstream bind to the
678
+ // NEW code. A non-unit-managed hub (legacy detached pid / dev `bun run serve`)
679
+ // is NOT killed — we surface the mismatch + an actionable message and bail so
680
+ // the operator decides. A still-mismatched-after-restart (bun-linked branch)
681
+ // warns + continues rather than looping.
682
+ try {
683
+ const versionResult = await ensureHubVersion({ port: hubPort, log });
684
+ for (const m of versionResult.messages) log(m);
685
+ if (versionResult.outcome === "not-unit-managed") {
686
+ // We can't safely take over a hub we don't own. Stop here so init doesn't
687
+ // wire a fresh tunnel + credentials to a stale runtime (the #590 field bug).
688
+ log("");
689
+ log("Resolve the version mismatch above, then re-run `parachute init`.");
690
+ return 1;
691
+ }
692
+ if (versionResult.outcome === "restart-failed") {
693
+ log("");
694
+ log("The hub service manager rejected the restart command.");
695
+ log("Try checking the logs:");
696
+ log(" parachute logs hub");
697
+ return 1;
698
+ }
699
+ // `match` / `not-running` / `restarted` / `still-mismatched` → continue.
700
+ } catch (err) {
701
+ // A version-check failure must never block init — degrade to a note.
702
+ log(`note: hub version check skipped (${err instanceof Error ? err.message : String(err)})`);
703
+ }
704
+
643
705
  // Step 1.5: guarantee an operator token exists (design §3.1 / §3.3). Under
644
706
  // the unified model every per-module verb is an authenticated module-ops
645
707
  // call, so the steady-state operator needs an `operator.token` on disk — the
@@ -5,7 +5,12 @@ import { autoWireScribeAuth } from "../auto-wire.ts";
5
5
  import { bunGlobalPrefixes, isLinked as defaultIsLinkedShared } from "../bun-link.ts";
6
6
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
7
7
  import { type ExposeState, readExposeState } from "../expose-state.ts";
8
- import { HUB_DEFAULT_PORT, readHubPort } from "../hub-control.ts";
8
+ import {
9
+ HUB_DEFAULT_PORT,
10
+ type PidOnPortFn,
11
+ defaultPidOnPort,
12
+ readHubPort,
13
+ } from "../hub-control.ts";
9
14
  import { type HubUnitDeps, defaultHubUnitDeps, isHubUnitInstalled } from "../hub-unit.ts";
10
15
  import {
11
16
  type ModuleManifest,
@@ -34,6 +39,7 @@ import {
34
39
  type DisableStaleModuleUnitsResult,
35
40
  disableStaleModuleUnits as defaultDisableStaleModuleUnits,
36
41
  } from "../stale-module-units.ts";
42
+ import { type OwnerProbeFn, defaultOwnerOfPid } from "../supervisor.ts";
37
43
  import { WELL_KNOWN_PATH } from "../well-known.ts";
38
44
  import { type LifecycleOpts, start as lifecycleStart } from "./lifecycle.ts";
39
45
  import { migrateNotice } from "./migrate.ts";
@@ -301,6 +307,22 @@ export interface InstallOpts {
301
307
  * unless the test populates services.json directly.
302
308
  */
303
309
  portProbe?: (port: number) => Promise<boolean>;
310
+ /**
311
+ * Test seam for the install-time port-squatter naming (#590 item 2). When the
312
+ * canonical port walk has to assign a fallback port because the canonical one
313
+ * is held, this looks up the pid LISTENing on the canonical port so the
314
+ * warning can name the holder (`pid 1234 (bun .../vault/src/server.ts)`) — the
315
+ * same #581 `pidOnPort` / `ownerOfPid` seams the supervisor start-path uses,
316
+ * reused (not duplicated). Detection-only — never kills. Production wires
317
+ * `defaultPidOnPort` (`lsof -ti :<port>`); tests inject a deterministic stub.
318
+ */
319
+ pidOnPort?: PidOnPortFn;
320
+ /**
321
+ * Test seam for the install-time port-squatter naming (#590 item 2): the
322
+ * best-effort command line of the squatting pid. Production wires
323
+ * `defaultOwnerOfPid` (`ps -o command= -p <pid>`); tests inject a stub.
324
+ */
325
+ ownerOfPid?: OwnerProbeFn;
304
326
  /**
305
327
  * Test seam for reading `<packageDir>/.parachute/module.json`. Production
306
328
  * uses the real file reader; tests inject a map from package-dir → manifest
@@ -974,6 +996,25 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
974
996
  });
975
997
  if (portResult.warning) {
976
998
  log(`⚠ ${portResult.warning}`);
999
+ // #590 item 2: the canonical port was held, so we walked to a fallback. Name
1000
+ // the squatter — the supervisor start-path does this post-#581; do it here at
1001
+ // install-time too. Reuse the #581 pidOnPort / ownerOfPid seams (detection
1002
+ // only; never kill). When the holder is a foreign pid (not one of OUR rows —
1003
+ // which is the common case when a stale pre-supervisor daemon is squatting),
1004
+ // surface its pid + command line + a hint.
1005
+ if (canonicalPort !== undefined && portResult.source !== "canonical") {
1006
+ const pidOnPort = opts.pidOnPort ?? defaultPidOnPort;
1007
+ const ownerOfPid = opts.ownerOfPid ?? defaultOwnerOfPid;
1008
+ const holder = pidOnPort(canonicalPort);
1009
+ if (holder !== undefined) {
1010
+ const cmdline = ownerOfPid(holder);
1011
+ const who = cmdline ? `pid ${holder} (${cmdline})` : `pid ${holder}`;
1012
+ log(` canonical port ${canonicalPort} is held by ${who}.`);
1013
+ log(
1014
+ ` This may be a stale pre-supervisor daemon. If so, stop it (kill ${holder}) and re-run \`parachute install ${entryName}\` to reclaim the canonical port.`,
1015
+ );
1016
+ }
1017
+ }
977
1018
  }
978
1019
 
979
1020
  // Find-or-seed the manifest entry. Re-read after the seed write so a silent
package/src/hub-unit.ts CHANGED
@@ -76,6 +76,16 @@ export interface HubUnitDeps extends ManagedUnitDeps {
76
76
  * uses a bounded `fetch`; tests inject a deterministic stub.
77
77
  */
78
78
  probeHealth: (port: number) => Promise<boolean>;
79
+ /**
80
+ * HTTP `/health` probe that ALSO reads the JSON `version` field of the
81
+ * running hub (#590). Resolves to `{ ok, version }` — `ok` mirrors
82
+ * {@link probeHealth} (2xx), `version` is the running hub's reported version
83
+ * (or `undefined` when the body has no `version` field — a very old hub that
84
+ * predates the field; the caller treats that as a mismatch). Resolves to
85
+ * `null` when the hub doesn't answer at all (connection-refused / timeout).
86
+ * Production uses a bounded `fetch`; tests inject a deterministic stub.
87
+ */
88
+ probeHealthVersion: (port: number) => Promise<{ ok: boolean; version?: string } | null>;
79
89
  /** TCP connect-probe for readiness polling (reuses `defaultPortListening`). */
80
90
  portListening: PortListeningFn;
81
91
  /** Sleep between readiness polls (tests pin to 0). */
@@ -98,9 +108,41 @@ async function defaultProbeHealth(port: number): Promise<boolean> {
98
108
  }
99
109
  }
100
110
 
111
+ /**
112
+ * Default version-aware `/health` probe (#590). Reads the JSON body and pulls
113
+ * out the `version` field. Returns `null` on any network error / timeout (the
114
+ * hub isn't answering); `{ ok, version }` otherwise — `version` is `undefined`
115
+ * when the body has no string `version` field (a very old hub, or a non-JSON
116
+ * body), which the caller treats as a mismatch. 1.5s timeout, mirroring
117
+ * {@link defaultProbeHealth}.
118
+ */
119
+ async function defaultProbeHealthVersion(
120
+ port: number,
121
+ ): Promise<{ ok: boolean; version?: string } | null> {
122
+ try {
123
+ const res = await fetch(`http://127.0.0.1:${port}/health`, {
124
+ signal: AbortSignal.timeout(1500),
125
+ });
126
+ let version: string | undefined;
127
+ try {
128
+ const body = (await res.json()) as unknown;
129
+ if (body && typeof body === "object" && "version" in body) {
130
+ const v = (body as { version?: unknown }).version;
131
+ if (typeof v === "string" && v.length > 0) version = v;
132
+ }
133
+ } catch {
134
+ // Non-JSON body → no version. Leave `version` undefined (→ mismatch).
135
+ }
136
+ return version !== undefined ? { ok: res.ok, version } : { ok: res.ok };
137
+ } catch {
138
+ return null;
139
+ }
140
+ }
141
+
101
142
  export const defaultHubUnitDeps: HubUnitDeps = {
102
143
  ...defaultManagedUnitDeps,
103
144
  probeHealth: defaultProbeHealth,
145
+ probeHealthVersion: defaultProbeHealthVersion,
104
146
  portListening: defaultPortListening,
105
147
  sleep: (ms) => new Promise((r) => setTimeout(r, ms)),
106
148
  };
@@ -158,6 +200,16 @@ export function isHubUnitInstalled(deps: HubUnitDeps): boolean {
158
200
  * Is a service manager (systemd / launchd) available on this platform at all?
159
201
  * macOS → launchctl; Linux → systemctl. A box with neither (a bare container,
160
202
  * an init-less host) has no manager — the foreground-`serve`-only path (R19/D1).
203
+ *
204
+ * NOTE: production `deps.which` is `Bun.which`, which resolves against the
205
+ * process PATH. This ASSUMES `launchctl` (`/bin/launchctl`) / `systemctl` are on
206
+ * the PATH — true on any normal macOS / systemd box. A deliberately stripped
207
+ * PATH (a `nix develop` shell that omits `/bin`, a minimal CI image) would make
208
+ * `which` return null and misclassify a genuinely launchd-managed hub as
209
+ * not-unit-managed. The #590 version-check then degrades to the "stop it
210
+ * yourself" path rather than restarting the unit — a safe (never-kill)
211
+ * degradation, but worth knowing if a dev sees not-unit-managed on a box that
212
+ * clearly runs the hub under launchd.
161
213
  */
162
214
  export function hasServiceManager(deps: HubUnitDeps): boolean {
163
215
  if (deps.platform === "darwin") return deps.which("launchctl") !== null;
@@ -342,6 +394,209 @@ export function restartHubUnit(deps: HubUnitDeps): HubUnitManagerOpResult {
342
394
  return { outcome: "ok", messages: [] };
343
395
  }
344
396
 
397
+ /**
398
+ * Outcome of {@link ensureHubVersionMatches} (#590).
399
+ */
400
+ export type HubVersionOutcome =
401
+ /** The running hub's version matched the installed version — no action. */
402
+ | "match"
403
+ /** Hub wasn't answering `/health` at all — nothing to compare (no-op). */
404
+ | "not-running"
405
+ /**
406
+ * Versions mismatched, the hub is unit-managed, the unit was restarted, and
407
+ * the running version now matches the installed version. The zombie was
408
+ * cleared.
409
+ */
410
+ | "restarted"
411
+ /**
412
+ * Versions mismatched and the hub is unit-managed, but after the (single)
413
+ * restart the running version STILL doesn't match — e.g. a bun-linked
414
+ * checkout on a feature branch whose package.json version trails the running
415
+ * code, or a restart that adopted yet-another stale build. We restart at most
416
+ * once and then continue rather than loop (the restart-loop guard).
417
+ */
418
+ | "still-mismatched"
419
+ /**
420
+ * Versions mismatched but the running hub is NOT unit-managed (a legacy
421
+ * detached pid, or a dev `bun run serve` in a terminal, or no service
422
+ * manager at all). We do NOT kill it blindly — we surface the mismatch +
423
+ * an actionable message and stop.
424
+ */
425
+ | "not-unit-managed"
426
+ /**
427
+ * Versions mismatched, the hub is unit-managed, but the restart command
428
+ * itself failed (the manager rejected it). Surface the manager's error.
429
+ */
430
+ | "restart-failed";
431
+
432
+ export interface EnsureHubVersionMatchesResult {
433
+ outcome: HubVersionOutcome;
434
+ /** The running hub's reported version (undefined when it had no version field / wasn't running). */
435
+ runningVersion?: string;
436
+ /** The installed package version we compared against. */
437
+ installedVersion: string;
438
+ /** Human-readable lines the caller should surface (mismatch notice, actionable hints). */
439
+ messages: string[];
440
+ }
441
+
442
+ export interface EnsureHubVersionMatchesOpts {
443
+ /** The installed package version (the caller reads its own `package.json`). */
444
+ installedVersion: string;
445
+ /** Hub port to probe (default 1939). */
446
+ port?: number;
447
+ /** Injectable deps (defaults to production). */
448
+ deps?: HubUnitDeps;
449
+ /** Readiness budget after a restart, in ms (default 15s). */
450
+ readyTimeoutMs?: number;
451
+ /** Poll interval for the post-restart re-probe, in ms (default 250). */
452
+ readyPollMs?: number;
453
+ log?: (line: string) => void;
454
+ }
455
+
456
+ /**
457
+ * Version-check-and-restart at a hub adoption point (#590).
458
+ *
459
+ * The field bug: a freshly-installed hub (e.g. 0.6.4-rc.9) adopts an
460
+ * arbitrarily-stale RUNNING hub (0.5.14-rc.4) merely because it answers
461
+ * `/health` on 1939 — a zombie LaunchAgent survives `rm -rf ~/.parachute`, and
462
+ * everything downstream (tunnel, wizard, vault install) then binds to month-old
463
+ * code running against a directory deleted out from under it.
464
+ *
465
+ * This helper closes that edge. Given the INSTALLED package version (the caller
466
+ * reads its own `package.json` at runtime), it:
467
+ * 1. Probes `/health` for the RUNNING version. Not answering → `not-running`
468
+ * (nothing to adopt; the caller's bringup path handles starting it).
469
+ * 2. Version matches → `match` (today's behavior, no extra restart).
470
+ * 3. Version mismatches (INCLUDING a hub with no `version` field — a very old
471
+ * hub — which reads as "undefined ≠ installed"):
472
+ * a. If the running hub is NOT unit-managed (no manager / no unit
473
+ * installed) → `not-unit-managed`. We do NOT kill it blindly: a
474
+ * detached legacy pid or a dev `bun run serve` may be the operator's,
475
+ * and KeepAlive-less processes aren't ours to reap. Surface an
476
+ * actionable message and stop.
477
+ * b. If it IS unit-managed → restart the unit ONCE
478
+ * ({@link restartHubUnit}), then re-probe `/health` until the version
479
+ * matches or the timeout elapses:
480
+ * - now matches → `restarted` (zombie cleared).
481
+ * - still mismatched → `still-mismatched` (restart-loop guard: we
482
+ * restart at most once; a bun-linked branch checkout whose
483
+ * package.json trails the code stays here — warn + continue, do
484
+ * not loop).
485
+ * - restart command failed → `restart-failed`.
486
+ *
487
+ * The CALLER decides whether a given outcome is fatal. `init` and the expose
488
+ * chains both want: `match`/`not-running`/`restarted` → continue silently-ish;
489
+ * `not-unit-managed`/`still-mismatched`/`restart-failed` → warn loudly (and, for
490
+ * init, optionally bail) so a brand-new tunnel never wires to a zombie.
491
+ *
492
+ * Everything is behind the {@link HubUnitDeps} seam — no real launchctl /
493
+ * systemctl / HTTP call in tests.
494
+ */
495
+ export async function ensureHubVersionMatches(
496
+ opts: EnsureHubVersionMatchesOpts,
497
+ ): Promise<EnsureHubVersionMatchesResult> {
498
+ const deps = opts.deps ?? defaultHubUnitDeps;
499
+ const port = opts.port ?? HUB_UNIT_DEFAULT_PORT;
500
+ const installedVersion = opts.installedVersion;
501
+ const readyTimeoutMs = opts.readyTimeoutMs ?? 15_000;
502
+ const readyPollMs = opts.readyPollMs ?? 250;
503
+ const log = opts.log ?? (() => {});
504
+
505
+ const probe = await deps.probeHealthVersion(port);
506
+ if (probe === null) {
507
+ // Hub isn't answering — nothing to compare. The caller's bringup path owns
508
+ // starting it; this helper is a no-op here.
509
+ return { outcome: "not-running", installedVersion, messages: [] };
510
+ }
511
+
512
+ const runningVersion = probe.version;
513
+ if (runningVersion === installedVersion) {
514
+ // Exactly today's behavior — versions agree, no extra restart.
515
+ return { outcome: "match", runningVersion, installedVersion, messages: [] };
516
+ }
517
+
518
+ // Mismatch (includes the no-`version`-field very-old-hub case → undefined).
519
+ const runningLabel = runningVersion ?? "an older version (no version field)";
520
+
521
+ // Is this hub one we can restart through the manager? If there's no manager,
522
+ // or no unit installed, the running hub is a legacy detached pid / a dev
523
+ // foreground `serve` — NOT ours to reap. Surface + stop (do not kill blindly).
524
+ if (!hasServiceManager(deps) || !isHubUnitInstalled(deps)) {
525
+ return {
526
+ outcome: "not-unit-managed",
527
+ runningVersion,
528
+ installedVersion,
529
+ messages: [
530
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed.`,
531
+ " The running hub is NOT managed by a Parachute service unit (a detached process or a foreground `parachute serve` / `bun src/cli.ts serve`), so it won't be restarted automatically.",
532
+ ` Stop it yourself (find it with \`lsof -ti :${port}\` then \`kill <pid>\`, or quit the foreground \`parachute serve\` / \`bun src/cli.ts serve\` on a dev checkout), then re-run so the new code is adopted.`,
533
+ ],
534
+ };
535
+ }
536
+
537
+ // Unit-managed mismatch: restart the unit ONCE to pick up the new code.
538
+ log(
539
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed — restarting the hub unit to pick up the new code.`,
540
+ );
541
+ const restart = restartHubUnit(deps);
542
+ if (restart.outcome !== "ok") {
543
+ return {
544
+ outcome: "restart-failed",
545
+ runningVersion,
546
+ installedVersion,
547
+ messages: [
548
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed, and the hub unit restart failed.`,
549
+ ...restart.messages,
550
+ ],
551
+ };
552
+ }
553
+
554
+ // Builders for the two terminal outcomes of the post-restart re-probe loop.
555
+ const restartedResult = (v: string): EnsureHubVersionMatchesResult => ({
556
+ outcome: "restarted",
557
+ runningVersion: v,
558
+ installedVersion,
559
+ messages: [`✓ hub unit restarted; now running ${installedVersion}.`],
560
+ });
561
+ const stillMismatchedResult = (last: string | undefined): EnsureHubVersionMatchesResult => {
562
+ const reports = last ? ` (reports ${last})` : "";
563
+ return {
564
+ outcome: "still-mismatched",
565
+ ...(last !== undefined ? { runningVersion: last } : {}),
566
+ installedVersion,
567
+ messages: [
568
+ `⚠ restarted the hub unit, but it is still not reporting ${installedVersion}${reports}.`,
569
+ " This can happen with a bun-linked checkout on a feature branch whose package.json version trails the running code.",
570
+ ` Continuing — verify with \`parachute status\` / \`curl http://127.0.0.1:${port}/health\` if the hub should be on a specific version.`,
571
+ ],
572
+ };
573
+ };
574
+
575
+ // Re-probe `/health` until the running version matches the installed version
576
+ // or the readiness budget elapses. Restart-loop guard: we restart AT MOST
577
+ // once — if it still mismatches after this single restart (e.g. a bun-linked
578
+ // checkout on a branch), we warn + continue rather than looping.
579
+ const deadline = Date.now() + readyTimeoutMs;
580
+ for (;;) {
581
+ const after = await deps.probeHealthVersion(port);
582
+ if (after !== null && after.version === installedVersion) {
583
+ return restartedResult(installedVersion);
584
+ }
585
+ if (Date.now() >= deadline) {
586
+ // Report the last-observed (still-stale) version if the hub came back.
587
+ return stillMismatchedResult(after?.version ?? runningVersion);
588
+ }
589
+ if (readyPollMs > 0) await deps.sleep(readyPollMs);
590
+ else break;
591
+ }
592
+ // readyPollMs === 0 fast-path: one more probe, then settle.
593
+ const finalProbe = await deps.probeHealthVersion(port);
594
+ if (finalProbe !== null && finalProbe.version === installedVersion) {
595
+ return restartedResult(installedVersion);
596
+ }
597
+ return stillMismatchedResult(finalProbe?.version ?? runningVersion);
598
+ }
599
+
345
600
  /**
346
601
  * Run-state of the hub UNIT as reported by the platform manager (design §6.4).
347
602
  * This is the manager's view — NOT a liveness verdict. The hub answering