@openparachute/hub 0.6.4-rc.8 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hub-unit.ts CHANGED
@@ -76,6 +76,16 @@ export interface HubUnitDeps extends ManagedUnitDeps {
76
76
  * uses a bounded `fetch`; tests inject a deterministic stub.
77
77
  */
78
78
  probeHealth: (port: number) => Promise<boolean>;
79
+ /**
80
+ * HTTP `/health` probe that ALSO reads the JSON `version` field of the
81
+ * running hub (#590). Resolves to `{ ok, version }` — `ok` mirrors
82
+ * {@link probeHealth} (2xx), `version` is the running hub's reported version
83
+ * (or `undefined` when the body has no `version` field — a very old hub that
84
+ * predates the field; the caller treats that as a mismatch). Resolves to
85
+ * `null` when the hub doesn't answer at all (connection-refused / timeout).
86
+ * Production uses a bounded `fetch`; tests inject a deterministic stub.
87
+ */
88
+ probeHealthVersion: (port: number) => Promise<{ ok: boolean; version?: string } | null>;
79
89
  /** TCP connect-probe for readiness polling (reuses `defaultPortListening`). */
80
90
  portListening: PortListeningFn;
81
91
  /** Sleep between readiness polls (tests pin to 0). */
@@ -98,9 +108,41 @@ async function defaultProbeHealth(port: number): Promise<boolean> {
98
108
  }
99
109
  }
100
110
 
111
+ /**
112
+ * Default version-aware `/health` probe (#590). Reads the JSON body and pulls
113
+ * out the `version` field. Returns `null` on any network error / timeout (the
114
+ * hub isn't answering); `{ ok, version }` otherwise — `version` is `undefined`
115
+ * when the body has no string `version` field (a very old hub, or a non-JSON
116
+ * body), which the caller treats as a mismatch. 1.5s timeout, mirroring
117
+ * {@link defaultProbeHealth}.
118
+ */
119
+ async function defaultProbeHealthVersion(
120
+ port: number,
121
+ ): Promise<{ ok: boolean; version?: string } | null> {
122
+ try {
123
+ const res = await fetch(`http://127.0.0.1:${port}/health`, {
124
+ signal: AbortSignal.timeout(1500),
125
+ });
126
+ let version: string | undefined;
127
+ try {
128
+ const body = (await res.json()) as unknown;
129
+ if (body && typeof body === "object" && "version" in body) {
130
+ const v = (body as { version?: unknown }).version;
131
+ if (typeof v === "string" && v.length > 0) version = v;
132
+ }
133
+ } catch {
134
+ // Non-JSON body → no version. Leave `version` undefined (→ mismatch).
135
+ }
136
+ return version !== undefined ? { ok: res.ok, version } : { ok: res.ok };
137
+ } catch {
138
+ return null;
139
+ }
140
+ }
141
+
101
142
  export const defaultHubUnitDeps: HubUnitDeps = {
102
143
  ...defaultManagedUnitDeps,
103
144
  probeHealth: defaultProbeHealth,
145
+ probeHealthVersion: defaultProbeHealthVersion,
104
146
  portListening: defaultPortListening,
105
147
  sleep: (ms) => new Promise((r) => setTimeout(r, ms)),
106
148
  };
@@ -158,6 +200,16 @@ export function isHubUnitInstalled(deps: HubUnitDeps): boolean {
158
200
  * Is a service manager (systemd / launchd) available on this platform at all?
159
201
  * macOS → launchctl; Linux → systemctl. A box with neither (a bare container,
160
202
  * an init-less host) has no manager — the foreground-`serve`-only path (R19/D1).
203
+ *
204
+ * NOTE: production `deps.which` is `Bun.which`, which resolves against the
205
+ * process PATH. This ASSUMES `launchctl` (`/bin/launchctl`) / `systemctl` are on
206
+ * the PATH — true on any normal macOS / systemd box. A deliberately stripped
207
+ * PATH (a `nix develop` shell that omits `/bin`, a minimal CI image) would make
208
+ * `which` return null and misclassify a genuinely launchd-managed hub as
209
+ * not-unit-managed. The #590 version-check then degrades to the "stop it
210
+ * yourself" path rather than restarting the unit — a safe (never-kill)
211
+ * degradation, but worth knowing if a dev sees not-unit-managed on a box that
212
+ * clearly runs the hub under launchd.
161
213
  */
162
214
  export function hasServiceManager(deps: HubUnitDeps): boolean {
163
215
  if (deps.platform === "darwin") return deps.which("launchctl") !== null;
@@ -342,6 +394,209 @@ export function restartHubUnit(deps: HubUnitDeps): HubUnitManagerOpResult {
342
394
  return { outcome: "ok", messages: [] };
343
395
  }
344
396
 
397
+ /**
398
+ * Outcome of {@link ensureHubVersionMatches} (#590).
399
+ */
400
+ export type HubVersionOutcome =
401
+ /** The running hub's version matched the installed version — no action. */
402
+ | "match"
403
+ /** Hub wasn't answering `/health` at all — nothing to compare (no-op). */
404
+ | "not-running"
405
+ /**
406
+ * Versions mismatched, the hub is unit-managed, the unit was restarted, and
407
+ * the running version now matches the installed version. The zombie was
408
+ * cleared.
409
+ */
410
+ | "restarted"
411
+ /**
412
+ * Versions mismatched and the hub is unit-managed, but after the (single)
413
+ * restart the running version STILL doesn't match — e.g. a bun-linked
414
+ * checkout on a feature branch whose package.json version trails the running
415
+ * code, or a restart that adopted yet-another stale build. We restart at most
416
+ * once and then continue rather than loop (the restart-loop guard).
417
+ */
418
+ | "still-mismatched"
419
+ /**
420
+ * Versions mismatched but the running hub is NOT unit-managed (a legacy
421
+ * detached pid, or a dev `bun run serve` in a terminal, or no service
422
+ * manager at all). We do NOT kill it blindly — we surface the mismatch +
423
+ * an actionable message and stop.
424
+ */
425
+ | "not-unit-managed"
426
+ /**
427
+ * Versions mismatched, the hub is unit-managed, but the restart command
428
+ * itself failed (the manager rejected it). Surface the manager's error.
429
+ */
430
+ | "restart-failed";
431
+
432
+ export interface EnsureHubVersionMatchesResult {
433
+ outcome: HubVersionOutcome;
434
+ /** The running hub's reported version (undefined when it had no version field / wasn't running). */
435
+ runningVersion?: string;
436
+ /** The installed package version we compared against. */
437
+ installedVersion: string;
438
+ /** Human-readable lines the caller should surface (mismatch notice, actionable hints). */
439
+ messages: string[];
440
+ }
441
+
442
+ export interface EnsureHubVersionMatchesOpts {
443
+ /** The installed package version (the caller reads its own `package.json`). */
444
+ installedVersion: string;
445
+ /** Hub port to probe (default 1939). */
446
+ port?: number;
447
+ /** Injectable deps (defaults to production). */
448
+ deps?: HubUnitDeps;
449
+ /** Readiness budget after a restart, in ms (default 15s). */
450
+ readyTimeoutMs?: number;
451
+ /** Poll interval for the post-restart re-probe, in ms (default 250). */
452
+ readyPollMs?: number;
453
+ log?: (line: string) => void;
454
+ }
455
+
456
+ /**
457
+ * Version-check-and-restart at a hub adoption point (#590).
458
+ *
459
+ * The field bug: a freshly-installed hub (e.g. 0.6.4-rc.9) adopts an
460
+ * arbitrarily-stale RUNNING hub (0.5.14-rc.4) merely because it answers
461
+ * `/health` on 1939 — a zombie LaunchAgent survives `rm -rf ~/.parachute`, and
462
+ * everything downstream (tunnel, wizard, vault install) then binds to month-old
463
+ * code running against a directory deleted out from under it.
464
+ *
465
+ * This helper closes that edge. Given the INSTALLED package version (the caller
466
+ * reads its own `package.json` at runtime), it:
467
+ * 1. Probes `/health` for the RUNNING version. Not answering → `not-running`
468
+ * (nothing to adopt; the caller's bringup path handles starting it).
469
+ * 2. Version matches → `match` (today's behavior, no extra restart).
470
+ * 3. Version mismatches (INCLUDING a hub with no `version` field — a very old
471
+ * hub — which reads as "undefined ≠ installed"):
472
+ * a. If the running hub is NOT unit-managed (no manager / no unit
473
+ * installed) → `not-unit-managed`. We do NOT kill it blindly: a
474
+ * detached legacy pid or a dev `bun run serve` may be the operator's,
475
+ * and KeepAlive-less processes aren't ours to reap. Surface an
476
+ * actionable message and stop.
477
+ * b. If it IS unit-managed → restart the unit ONCE
478
+ * ({@link restartHubUnit}), then re-probe `/health` until the version
479
+ * matches or the timeout elapses:
480
+ * - now matches → `restarted` (zombie cleared).
481
+ * - still mismatched → `still-mismatched` (restart-loop guard: we
482
+ * restart at most once; a bun-linked branch checkout whose
483
+ * package.json trails the code stays here — warn + continue, do
484
+ * not loop).
485
+ * - restart command failed → `restart-failed`.
486
+ *
487
+ * The CALLER decides whether a given outcome is fatal. `init` and the expose
488
+ * chains both want: `match`/`not-running`/`restarted` → continue silently-ish;
489
+ * `not-unit-managed`/`still-mismatched`/`restart-failed` → warn loudly (and, for
490
+ * init, optionally bail) so a brand-new tunnel never wires to a zombie.
491
+ *
492
+ * Everything is behind the {@link HubUnitDeps} seam — no real launchctl /
493
+ * systemctl / HTTP call in tests.
494
+ */
495
+ export async function ensureHubVersionMatches(
496
+ opts: EnsureHubVersionMatchesOpts,
497
+ ): Promise<EnsureHubVersionMatchesResult> {
498
+ const deps = opts.deps ?? defaultHubUnitDeps;
499
+ const port = opts.port ?? HUB_UNIT_DEFAULT_PORT;
500
+ const installedVersion = opts.installedVersion;
501
+ const readyTimeoutMs = opts.readyTimeoutMs ?? 15_000;
502
+ const readyPollMs = opts.readyPollMs ?? 250;
503
+ const log = opts.log ?? (() => {});
504
+
505
+ const probe = await deps.probeHealthVersion(port);
506
+ if (probe === null) {
507
+ // Hub isn't answering — nothing to compare. The caller's bringup path owns
508
+ // starting it; this helper is a no-op here.
509
+ return { outcome: "not-running", installedVersion, messages: [] };
510
+ }
511
+
512
+ const runningVersion = probe.version;
513
+ if (runningVersion === installedVersion) {
514
+ // Exactly today's behavior — versions agree, no extra restart.
515
+ return { outcome: "match", runningVersion, installedVersion, messages: [] };
516
+ }
517
+
518
+ // Mismatch (includes the no-`version`-field very-old-hub case → undefined).
519
+ const runningLabel = runningVersion ?? "an older version (no version field)";
520
+
521
+ // Is this hub one we can restart through the manager? If there's no manager,
522
+ // or no unit installed, the running hub is a legacy detached pid / a dev
523
+ // foreground `serve` — NOT ours to reap. Surface + stop (do not kill blindly).
524
+ if (!hasServiceManager(deps) || !isHubUnitInstalled(deps)) {
525
+ return {
526
+ outcome: "not-unit-managed",
527
+ runningVersion,
528
+ installedVersion,
529
+ messages: [
530
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed.`,
531
+ " The running hub is NOT managed by a Parachute service unit (a detached process or a foreground `parachute serve` / `bun src/cli.ts serve`), so it won't be restarted automatically.",
532
+ ` Stop it yourself (find it with \`lsof -ti :${port}\` then \`kill <pid>\`, or quit the foreground \`parachute serve\` / \`bun src/cli.ts serve\` on a dev checkout), then re-run so the new code is adopted.`,
533
+ ],
534
+ };
535
+ }
536
+
537
+ // Unit-managed mismatch: restart the unit ONCE to pick up the new code.
538
+ log(
539
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed — restarting the hub unit to pick up the new code.`,
540
+ );
541
+ const restart = restartHubUnit(deps);
542
+ if (restart.outcome !== "ok") {
543
+ return {
544
+ outcome: "restart-failed",
545
+ runningVersion,
546
+ installedVersion,
547
+ messages: [
548
+ `⚠ the running hub is ${runningLabel} but ${installedVersion} is installed, and the hub unit restart failed.`,
549
+ ...restart.messages,
550
+ ],
551
+ };
552
+ }
553
+
554
+ // Builders for the two terminal outcomes of the post-restart re-probe loop.
555
+ const restartedResult = (v: string): EnsureHubVersionMatchesResult => ({
556
+ outcome: "restarted",
557
+ runningVersion: v,
558
+ installedVersion,
559
+ messages: [`✓ hub unit restarted; now running ${installedVersion}.`],
560
+ });
561
+ const stillMismatchedResult = (last: string | undefined): EnsureHubVersionMatchesResult => {
562
+ const reports = last ? ` (reports ${last})` : "";
563
+ return {
564
+ outcome: "still-mismatched",
565
+ ...(last !== undefined ? { runningVersion: last } : {}),
566
+ installedVersion,
567
+ messages: [
568
+ `⚠ restarted the hub unit, but it is still not reporting ${installedVersion}${reports}.`,
569
+ " This can happen with a bun-linked checkout on a feature branch whose package.json version trails the running code.",
570
+ ` Continuing — verify with \`parachute status\` / \`curl http://127.0.0.1:${port}/health\` if the hub should be on a specific version.`,
571
+ ],
572
+ };
573
+ };
574
+
575
+ // Re-probe `/health` until the running version matches the installed version
576
+ // or the readiness budget elapses. Restart-loop guard: we restart AT MOST
577
+ // once — if it still mismatches after this single restart (e.g. a bun-linked
578
+ // checkout on a branch), we warn + continue rather than looping.
579
+ const deadline = Date.now() + readyTimeoutMs;
580
+ for (;;) {
581
+ const after = await deps.probeHealthVersion(port);
582
+ if (after !== null && after.version === installedVersion) {
583
+ return restartedResult(installedVersion);
584
+ }
585
+ if (Date.now() >= deadline) {
586
+ // Report the last-observed (still-stale) version if the hub came back.
587
+ return stillMismatchedResult(after?.version ?? runningVersion);
588
+ }
589
+ if (readyPollMs > 0) await deps.sleep(readyPollMs);
590
+ else break;
591
+ }
592
+ // readyPollMs === 0 fast-path: one more probe, then settle.
593
+ const finalProbe = await deps.probeHealthVersion(port);
594
+ if (finalProbe !== null && finalProbe.version === installedVersion) {
595
+ return restartedResult(installedVersion);
596
+ }
597
+ return stillMismatchedResult(finalProbe?.version ?? runningVersion);
598
+ }
599
+
345
600
  /**
346
601
  * Run-state of the hub UNIT as reported by the platform manager (design §6.4).
347
602
  * This is the manager's view — NOT a liveness verdict. The hub answering