@openparachute/hub 0.6.2 → 0.6.3-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +87 -35
  2. package/package.json +1 -1
  3. package/src/__tests__/api-hub-upgrade.test.ts +690 -0
  4. package/src/__tests__/api-modules-ops.test.ts +359 -3
  5. package/src/__tests__/api-modules.test.ts +54 -0
  6. package/src/__tests__/expose-cloudflare.test.ts +163 -72
  7. package/src/__tests__/expose-off-auto.test.ts +26 -1
  8. package/src/__tests__/expose.test.ts +260 -240
  9. package/src/__tests__/hub-control.test.ts +1 -242
  10. package/src/__tests__/hub-server.test.ts +64 -0
  11. package/src/__tests__/hub-unit.test.ts +574 -0
  12. package/src/__tests__/init.test.ts +219 -2
  13. package/src/__tests__/lifecycle.test.ts +416 -1448
  14. package/src/__tests__/managed-unit.test.ts +575 -0
  15. package/src/__tests__/migrate-cutover.test.ts +840 -0
  16. package/src/__tests__/migrate-offer.test.ts +240 -0
  17. package/src/__tests__/migrate.test.ts +132 -0
  18. package/src/__tests__/module-ops-client.test.ts +556 -0
  19. package/src/__tests__/port-probe.test.ts +23 -0
  20. package/src/__tests__/setup-wizard.test.ts +130 -0
  21. package/src/__tests__/status-supervisor.test.ts +504 -0
  22. package/src/__tests__/status.test.ts +157 -708
  23. package/src/__tests__/supervisor.test.ts +471 -6
  24. package/src/__tests__/upgrade.test.ts +351 -5
  25. package/src/api-hub-upgrade.ts +384 -0
  26. package/src/api-hub.ts +2 -1
  27. package/src/api-modules-ops.ts +221 -0
  28. package/src/api-modules.ts +18 -2
  29. package/src/cli.ts +97 -12
  30. package/src/cloudflare/connector-service.ts +117 -322
  31. package/src/commands/expose-cloudflare.ts +63 -71
  32. package/src/commands/expose-supervisor.ts +247 -0
  33. package/src/commands/expose.ts +59 -48
  34. package/src/commands/init.ts +225 -12
  35. package/src/commands/lifecycle.ts +455 -816
  36. package/src/commands/migrate-cutover.ts +837 -0
  37. package/src/commands/migrate.ts +71 -2
  38. package/src/commands/serve-boot.ts +71 -25
  39. package/src/commands/status.ts +535 -235
  40. package/src/commands/upgrade.ts +100 -2
  41. package/src/help.ts +128 -68
  42. package/src/hub-control.ts +23 -162
  43. package/src/hub-server.ts +39 -0
  44. package/src/hub-unit.ts +735 -0
  45. package/src/hub-upgrade-helper.ts +306 -0
  46. package/src/hub-upgrade-mode.ts +209 -0
  47. package/src/hub-upgrade-status.ts +150 -0
  48. package/src/managed-unit.ts +692 -0
  49. package/src/migrate-offer.ts +186 -0
  50. package/src/module-ops-client.ts +457 -0
  51. package/src/port-probe.ts +50 -0
  52. package/src/process-state.ts +19 -3
  53. package/src/setup-wizard.ts +80 -1
  54. package/src/supervisor.ts +389 -38
  55. package/web/ui/dist/assets/index-D_6AFvZy.js +61 -0
  56. package/web/ui/dist/assets/{index-BiBlvEaj.css → index-mz8XcVPP.css} +1 -1
  57. package/web/ui/dist/index.html +2 -2
  58. package/web/ui/dist/assets/index-CIN3mnmf.js +0 -61
@@ -47,21 +47,21 @@ import {
47
47
  clearExposeState,
48
48
  writeExposeState,
49
49
  } from "../expose-state.ts";
50
- import {
51
- type EnsureHubOpts,
52
- HUB_DEFAULT_PORT,
53
- ensureHubRunning,
54
- readHubPort,
55
- } from "../hub-control.ts";
50
+ import { HUB_DEFAULT_PORT, readHubPort } from "../hub-control.ts";
56
51
  import { deriveHubOrigin } from "../hub-origin.ts";
57
- import { type AliveFn, defaultAlive, processState } from "../process-state.ts";
52
+ import { HUB_UNIT_DEFAULT_PORT } from "../hub-unit.ts";
53
+ import { type AliveFn, defaultAlive } from "../process-state.ts";
58
54
  import { readManifest } from "../services-manifest.ts";
59
55
  import { type Runner, defaultRunner } from "../tailscale/run.ts";
60
- import { persistVaultHubOrigin } from "../vault-hub-origin-env.ts";
61
56
  import type { VaultAuthStatus } from "../vault/auth-status.ts";
62
- import { WELL_KNOWN_DIR } from "../well-known.ts";
63
57
  import { printPublic2FAWarning } from "./expose-2fa-warning.ts";
64
- import { restart } from "./lifecycle.ts";
58
+ import {
59
+ type ExposeSupervisorOpts,
60
+ type ResolvedExposeSupervisor,
61
+ ensureHubUnitForExpose,
62
+ resolveExposeSupervisor,
63
+ restartHubDependentViaSupervisor,
64
+ } from "./expose-supervisor.ts";
65
65
 
66
66
  const AUTH_DOC_URL =
67
67
  "https://github.com/ParachuteComputer/parachute-vault/blob/main/docs/auth-model.md";
@@ -325,8 +325,8 @@ export interface ExposeCloudflareOpts {
325
325
  cloudflaredHome?: string;
326
326
  /**
327
327
  * Config root for hub PID / port / log files. Defaults to `~/.parachute`.
328
- * Threaded into `ensureHubRunning` so cloudflared's ingress target stays
329
- * in sync with where the hub actually bound.
328
+ * Threaded through so cloudflared's ingress target stays in sync with where
329
+ * the hub actually bound.
330
330
  */
331
331
  configDir?: string;
332
332
  /**
@@ -336,22 +336,10 @@ export interface ExposeCloudflareOpts {
336
336
  */
337
337
  hubOrigin?: string;
338
338
  /**
339
- * Overrides for hub lifecycle primarily for tests. Tests pass
340
- * `skipHubLifecycle: true` (above) plus a seeded `hub.port` file so the
341
- * cloudflare path can resolve a port without actually spawning a hub.
342
- */
343
- hubEnsureOpts?: Omit<EnsureHubOpts, "configDir" | "wellKnownDir" | "log">;
344
- /**
345
- * Directory holding hub.html (passed through to the hub server on first
346
- * spawn). Defaults to the same `well-known/` resolution the Tailscale
347
- * path uses.
348
- */
349
- wellKnownDir?: string;
350
- /**
351
- * Skip spawning the hub server. Tests flip this on and pre-seed
339
+ * Skip ensuring the hub unit. Tests flip this on and pre-seed
352
340
  * `<configDir>/hub/run/hub.port` so `readHubPort` can resolve the
353
- * cloudflared target without a live process. Production always leaves
354
- * this off so the bringup self-heals a missing hub.
341
+ * cloudflared target without a live hub. Production always leaves this off so
342
+ * the bringup ensures the hub unit is up.
355
343
  */
356
344
  skipHub?: boolean;
357
345
  now?: () => Date;
@@ -368,13 +356,20 @@ export interface ExposeCloudflareOpts {
368
356
  */
369
357
  vaultAuthStatus?: VaultAuthStatus;
370
358
  /**
371
- * Restart a hub-dependent service so it re-reads the new public hub origin.
372
- * Mirrors the Tailscale path's `restartService` seam (`expose.ts`). Defaults
373
- * to lifecycle `restart`; tests inject a fake to assert the call without
374
- * spawning a real daemon. Only invoked for vault (the only `iss`-validating
375
- * service) and only when it's already running.
359
+ * Supervisor-path seams (design §4.3) the ONLY runtime as of Phase 5b.
360
+ * "ensure the hub" ensures the UNIT is up (not a detached spawn), and the
361
+ * post-route vault restart drives the running Supervisor over the loopback
362
+ * module-ops API (re-injecting the new public origin + firing the operator-
363
+ * token / vault `.env` self-heal). The cloudflared CONNECTOR unit is
364
+ * unchanged — it already installs/removes its own ManagedUnit
365
+ * (`installConnectorService` / `removeConnectorService`), independent of the
366
+ * hub's lifecycle.
367
+ *
368
+ * Production CLI dispatch passes `supervisor: {}` so the real
369
+ * `isHubUnitInstalled` probe resolves the seams; tests inject the seams they
370
+ * want to assert.
376
371
  */
377
- restartService?: (short: string) => Promise<number>;
372
+ supervisor?: ExposeSupervisorOpts;
378
373
  }
379
374
 
380
375
  interface Resolved {
@@ -400,13 +395,11 @@ interface Resolved {
400
395
  cloudflaredHome: string;
401
396
  configDir: string;
402
397
  hubOrigin: string | undefined;
403
- hubEnsureOpts: Omit<EnsureHubOpts, "configDir" | "wellKnownDir" | "log">;
404
- wellKnownDir: string;
405
398
  skipHub: boolean;
406
399
  now: () => Date;
407
400
  vaultHome: string | undefined;
408
401
  vaultAuthStatus: VaultAuthStatus | undefined;
409
- restartService: (short: string) => Promise<number>;
402
+ sup: ResolvedExposeSupervisor;
410
403
  }
411
404
 
412
405
  /**
@@ -488,20 +481,11 @@ function resolve(opts: ExposeCloudflareOpts, tunnelNameDefault: string): Resolve
488
481
  cloudflaredHome: opts.cloudflaredHome ?? DEFAULT_CLOUDFLARED_HOME,
489
482
  configDir,
490
483
  hubOrigin: opts.hubOrigin,
491
- hubEnsureOpts: opts.hubEnsureOpts ?? {},
492
- wellKnownDir: opts.wellKnownDir ?? WELL_KNOWN_DIR,
493
484
  skipHub: opts.skipHub ?? false,
494
485
  now: opts.now ?? (() => new Date()),
495
486
  vaultHome: opts.vaultHome,
496
487
  vaultAuthStatus: opts.vaultAuthStatus,
497
- restartService:
498
- opts.restartService ??
499
- ((short: string) =>
500
- restart(short, {
501
- manifestPath: opts.manifestPath,
502
- configDir,
503
- log: opts.log ?? (() => {}),
504
- })),
488
+ sup: resolveExposeSupervisor(opts.supervisor),
505
489
  };
506
490
  }
507
491
 
@@ -656,17 +640,16 @@ export async function exposeCloudflareUp(
656
640
  }
657
641
  hubPort = existing;
658
642
  } else {
659
- const hub = await ensureHubRunning({
660
- reservedPorts: manifest.services.map((s) => s.port),
661
- ...r.hubEnsureOpts,
662
- configDir: r.configDir,
663
- wellKnownDir: r.wellKnownDir,
664
- issuer: hubOrigin,
665
- log: r.log,
666
- });
667
- hubPort = hub.port;
668
- if (hub.started) r.log(`✓ hub started (pid ${hub.pid}, port ${hub.port}).`);
669
- else r.log(`✓ hub already running (pid ${hub.pid}, port ${hub.port}).`);
643
+ // §4.3a: "ensure the hub" = ensure the hub UNIT is up. The unit pins the
644
+ // canonical 1939 (no walking fallback), so that's the target cloudflared's
645
+ // ingress proxies to. Phase 5b retired the detached `ensureHubRunning`
646
+ // bringup — a box with no hub unit gets `ensureHubUnit`'s actionable "run
647
+ // `parachute migrate`" message, never a detached spawn.
648
+ const probePort = readHubPort(r.configDir) ?? HUB_UNIT_DEFAULT_PORT;
649
+ const ensured = await ensureHubUnitForExpose(r.sup, probePort, r.log);
650
+ if (!ensured.ok) return 1;
651
+ hubPort = ensured.port;
652
+ r.log(`✓ hub unit up (port ${hubPort}).`);
670
653
  }
671
654
  if (hubPort === 0) hubPort = HUB_DEFAULT_PORT;
672
655
 
@@ -909,20 +892,29 @@ export async function exposeCloudflareUp(
909
892
  // is the public origin) failed the `iss` check → 401 → "You're not signed in
910
893
  // to the hub." We mirror the Tailscale path here exactly.
911
894
  //
912
- // `persistVaultHubOrigin` writes the durable `.env` (skips loopback itself,
913
- // so a `--hub-origin http://127.0.0.1` override never bakes a dead issuer in);
914
- // the restart makes the running vault re-read it immediately rather than
915
- // waiting for the next reboot.
916
- persistVaultHubOrigin(r.configDir, hubOrigin, r.log);
917
- if (processState("vault", r.configDir, r.alive).status === "running") {
918
- r.log("");
919
- r.log("Restarting vault to pick up new hub origin…");
920
- const rcode = await r.restartService("vault");
921
- if (rcode !== 0) {
922
- r.log(
923
- " vault restart failed. Run manually once the issue is resolved: parachute restart vault",
924
- );
925
- }
895
+ // The supervised restart helper writes the durable `.env` (skipping loopback,
896
+ // so a `--hub-origin http://127.0.0.1` override never bakes a dead issuer in)
897
+ // and makes the running vault re-read it immediately rather than waiting for
898
+ // the next reboot.
899
+ //
900
+ // §4.3c: drive the restart through the running Supervisor
901
+ // (`driveModuleOp("vault", "restart")`), which re-injects the hub's current
902
+ // origin; `restartHubDependentViaSupervisor` also persists the durable `.env`
903
+ // + self-heals the operator-token issuer. Phase 5b retired the detached
904
+ // `lifecycle.restart` arm.
905
+ r.log("");
906
+ r.log("Restarting vault to pick up new hub origin…");
907
+ const rcode = await restartHubDependentViaSupervisor({
908
+ short: "vault",
909
+ hubOrigin,
910
+ configDir: r.configDir,
911
+ sup: r.sup,
912
+ log: r.log,
913
+ });
914
+ if (rcode !== 0) {
915
+ r.log(
916
+ "⚠ vault restart failed. Run manually once the issue is resolved: parachute restart vault",
917
+ );
926
918
  }
927
919
 
928
920
  const baseUrl = `https://${hostname}`;
@@ -0,0 +1,247 @@
1
+ /**
2
+ * Expose-path supervisor seams (design
3
+ * `parachute.computer/design/2026-06-01-hub-as-supervisor-unification.md` §4.3).
4
+ *
5
+ * Under the hub-as-supervisor unification, `expose` / `expose off` are decoupled
6
+ * from the hub's lifecycle. As of Phase 5b the supervised path is the ONLY
7
+ * runtime:
8
+ * - "ensure the hub" means "ensure the hub UNIT is up" (`ensureHubUnit`); a box
9
+ * with no unit gets `ensureHubUnit`'s actionable "run `parachute migrate`"
10
+ * message rather than a detached spawn.
11
+ * - the post-expose hub-dependent service restart goes through the RUNNING
12
+ * hub's in-process Supervisor over the loopback module-ops API
13
+ * (`driveModuleOp(short, "restart")`), NOT a detached `lifecycle.restart`.
14
+ *
15
+ * This module is the shared seam BOTH `expose.ts` (Tailscale) and
16
+ * `expose-cloudflare.ts` (cloudflared) use so the two paths can't drift.
17
+ */
18
+
19
+ import { readHubPort } from "../hub-control.ts";
20
+ import { hubDbPath, openHubDb } from "../hub-db.ts";
21
+ import {
22
+ type EnsureHubUnitOpts,
23
+ type EnsureHubUnitResult,
24
+ HUB_UNIT_DEFAULT_PORT,
25
+ type HubUnitDeps,
26
+ defaultHubUnitDeps,
27
+ ensureHubUnit as ensureHubUnitImpl,
28
+ } from "../hub-unit.ts";
29
+ import {
30
+ type DriveModuleOpDeps,
31
+ type ModuleOp,
32
+ ModuleOpHttpError,
33
+ type ModuleOpResult,
34
+ NoOperatorTokenError,
35
+ OperatorTokenExpiredError,
36
+ driveModuleOp as driveModuleOpImpl,
37
+ } from "../module-ops-client.ts";
38
+ import {
39
+ type OperatorIssuerHealStatus,
40
+ selfHealOperatorTokenIssuer as selfHealOperatorTokenIssuerImpl,
41
+ } from "../operator-token.ts";
42
+ import { persistVaultHubOrigin, selfHealVaultHubOrigin } from "../vault-hub-origin-env.ts";
43
+
44
+ /**
45
+ * Injectable supervisor-path seams shared by the Tailscale + cloudflared expose
46
+ * paths. Mirrors `LifecycleOpts.supervisor`: everything is injectable so tests
47
+ * can assert the `ensureHubUnit` / `driveModuleOp` / operator-token-self-heal
48
+ * calls without a live hub or a real launchd/systemd. Production wires the real
49
+ * impls against an opened hub.db + the resolved hub origin; the CLI dispatch
50
+ * passes `supervisor: {}`.
51
+ */
52
+ export interface ExposeSupervisorOpts {
53
+ /** Deps for the ensure-hub-unit call + the module-op self-heal. */
54
+ hubUnitDeps?: HubUnitDeps;
55
+ /** Ensure the hub unit is up before / during expose (§3.2 / §4.3a). */
56
+ ensureHubUnit?: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
57
+ /** Drive a per-module op against the running hub (reads operator.token). */
58
+ driveModuleOp?: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
59
+ /**
60
+ * Open the hub DB used to validate/auto-rotate the operator token in
61
+ * `driveModuleOp` + to self-heal its issuer. Production opens
62
+ * `<configDir>/hub.db`; tests inject an in-memory/seeded db. Returns a handle
63
+ * the caller closes.
64
+ */
65
+ openDb?: (configDir: string) => import("bun:sqlite").Database;
66
+ /**
67
+ * Self-heal the operator token's stale `iss` toward the new public origin
68
+ * BEFORE the supervised restart (§4.3c). After `expose up` the running hub
69
+ * re-resolves its issuer to the public origin, so a loopback-minted operator
70
+ * token must be re-minted under that origin or the CLI's own `driveModuleOp`
71
+ * would fail iss-validation. Mirrors lifecycle's `selfHealOperatorTokenOnStart`.
72
+ * Production delegates to `selfHealOperatorTokenIssuer`; tests inject a stub.
73
+ */
74
+ selfHealOperatorTokenIssuer?: (
75
+ db: import("bun:sqlite").Database,
76
+ opts: { issuer: string; configDir: string; log: (line: string) => void },
77
+ ) => Promise<OperatorIssuerHealStatus>;
78
+ /** Loopback hub base URL override (default derives from the hub port). */
79
+ baseUrl?: string;
80
+ }
81
+
82
+ /** Resolved expose supervisor-path seams (see {@link ExposeSupervisorOpts}). */
83
+ export interface ResolvedExposeSupervisor {
84
+ hubUnitDeps: HubUnitDeps;
85
+ ensureHubUnit: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
86
+ driveModuleOp: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
87
+ openDb: (configDir: string) => import("bun:sqlite").Database;
88
+ selfHealOperatorTokenIssuer: (
89
+ db: import("bun:sqlite").Database,
90
+ opts: { issuer: string; configDir: string; log: (line: string) => void },
91
+ ) => Promise<OperatorIssuerHealStatus>;
92
+ baseUrl: string | undefined;
93
+ }
94
+
95
+ /**
96
+ * Resolve the expose supervisor seams. Production passes `supervisor: {}` (or
97
+ * omits it) and gets the real impls; tests inject the seams they want to assert.
98
+ * Phase 5b retired the dual-dispatch discriminant — the supervised path is the
99
+ * only runtime, so there is no longer an `isHubUnitInstalled` probe here.
100
+ */
101
+ export function resolveExposeSupervisor(
102
+ opts: ExposeSupervisorOpts | undefined,
103
+ ): ResolvedExposeSupervisor {
104
+ const hubUnitDeps = opts?.hubUnitDeps ?? defaultHubUnitDeps;
105
+ return {
106
+ hubUnitDeps,
107
+ ensureHubUnit: opts?.ensureHubUnit ?? ensureHubUnitImpl,
108
+ driveModuleOp: opts?.driveModuleOp ?? driveModuleOpImpl,
109
+ openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
110
+ selfHealOperatorTokenIssuer:
111
+ opts?.selfHealOperatorTokenIssuer ?? selfHealOperatorTokenIssuerImpl,
112
+ baseUrl: opts?.baseUrl,
113
+ };
114
+ }
115
+
116
+ /**
117
+ * Resolve the issuer the operator token's `iss` is validated against on the
118
+ * loopback module-ops call. Mirrors lifecycle's `resolveOperatorTokenIssuer`:
119
+ * the operator token ALWAYS carries an `iss`, so this falls back to the
120
+ * canonical loopback origin (`http://127.0.0.1:<hubPort>`) when no public
121
+ * origin is known. The CLI hits the hub on loopback, and the hub validates the
122
+ * bearer against its per-request issuer — which for a loopback request is the
123
+ * loopback origin — so the operator token must remain validatable there.
124
+ */
125
+ function resolveExposeOperatorTokenIssuer(
126
+ hubOrigin: string | undefined,
127
+ configDir: string,
128
+ ): string {
129
+ if (hubOrigin) return hubOrigin;
130
+ const port = readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT;
131
+ return `http://127.0.0.1:${port}`;
132
+ }
133
+
134
+ /**
135
+ * Ensure the hub UNIT is up for an expose, mapping `ensureHubUnit`'s structured
136
+ * outcome to a simple ok/not-ok the caller turns into an exit code. Returns the
137
+ * probed port so the caller can plan tailscale/cloudflared against it (§4.3a:
138
+ * tailscale needs only the hub reachable on loopback, which the unit
139
+ * guarantees). On a non-up outcome the messages are surfaced.
140
+ */
141
+ export async function ensureHubUnitForExpose(
142
+ sup: ResolvedExposeSupervisor,
143
+ port: number,
144
+ log: (line: string) => void,
145
+ ): Promise<{ ok: boolean; port: number }> {
146
+ const ensured = await sup.ensureHubUnit({ port, deps: sup.hubUnitDeps, log });
147
+ if (ensured.outcome === "already-up" || ensured.outcome === "started") {
148
+ return { ok: true, port: ensured.port };
149
+ }
150
+ for (const m of ensured.messages) log(m);
151
+ return { ok: false, port: ensured.port };
152
+ }
153
+
154
+ /**
155
+ * Restart a hub-dependent service (today: vault) via the running hub's
156
+ * Supervisor after an expose changed the public origin (§4.3c). The supervised
157
+ * restart re-injects the hub's current per-request-resolved origin into the
158
+ * module's env; this helper ALSO fires the durable origin self-heals that the
159
+ * detached `lifecycle.restart` path used to provide:
160
+ * - `selfHealOperatorTokenIssuer` — re-mint the operator token under the new
161
+ * public origin BEFORE the module-op, so the CLI's own `driveModuleOp`
162
+ * bearer validates (and a later supervised restart's iss is current).
163
+ * - `persistVaultHubOrigin` / `selfHealVaultHubOrigin` — write the public
164
+ * origin into vault's `.env` so a future out-of-band boot also validates.
165
+ *
166
+ * Returns the module-op exit code (0 on success). Errors are surfaced as
167
+ * actionable lines (never a raw 401 / thrown HTTP error) and mapped to a
168
+ * non-zero code so the caller can warn-and-continue exactly as the detached
169
+ * restart path does.
170
+ */
171
+ export async function restartHubDependentViaSupervisor(args: {
172
+ short: string;
173
+ hubOrigin: string;
174
+ configDir: string;
175
+ sup: ResolvedExposeSupervisor;
176
+ log: (line: string) => void;
177
+ }): Promise<number> {
178
+ const { short, hubOrigin, configDir, sup, log } = args;
179
+ const issuer = resolveExposeOperatorTokenIssuer(hubOrigin, configDir);
180
+ const db = sup.openDb(configDir);
181
+ try {
182
+ // Self-heal the operator token's iss toward the NEW public origin first, so
183
+ // the bearer the CLI presents on the loopback module-op validates and a
184
+ // subsequent supervised restart's injected iss is current. The loopback /
185
+ // provenance guards live inside `selfHealOperatorTokenIssuer`, so passing
186
+ // the resolved (possibly loopback) origin is safe — it no-ops on loopback.
187
+ try {
188
+ const status = await sup.selfHealOperatorTokenIssuer(db, {
189
+ issuer: hubOrigin,
190
+ configDir,
191
+ log,
192
+ });
193
+ if (status.kind === "rotated") {
194
+ log(` refreshed operator.token issuer → ${hubOrigin} (was stale after exposure)`);
195
+ }
196
+ } catch (err) {
197
+ // A self-heal failure must never block the restart — degrade to a note.
198
+ log(
199
+ ` note: operator.token issuer self-heal skipped (${
200
+ err instanceof Error ? err.message : String(err)
201
+ })`,
202
+ );
203
+ }
204
+ // Durable .env persistence + vault-side self-heal (parity with the detached
205
+ // `persistVaultHubOriginForStart`). Both are called for parity with that
206
+ // detached path: `persistVaultHubOrigin` is the PRIMARY write — it stamps
207
+ // the new public origin into vault's `.env` (skipping loopback / unchanged
208
+ // values itself). `selfHealVaultHubOrigin` is a deliberate no-op in the
209
+ // normal case here — persist just wrote the public origin, so selfHeal's
210
+ // `current !== undefined && !isLoopbackOrigin(current)` guard short-circuits.
211
+ // It only fires for OLD installs where `.env` was left stale-loopback (the
212
+ // persist write can be skipped on edge cases), keeping the pair behaviorally
213
+ // identical to the detached path.
214
+ if (short === "vault") {
215
+ persistVaultHubOrigin(configDir, hubOrigin, log);
216
+ selfHealVaultHubOrigin(configDir, log, `${configDir}/expose-state.json`);
217
+ }
218
+
219
+ const deps: DriveModuleOpDeps = {
220
+ db,
221
+ issuer,
222
+ configDir,
223
+ ...(sup.baseUrl !== undefined ? { baseUrl: sup.baseUrl } : {}),
224
+ };
225
+ try {
226
+ await sup.driveModuleOp(short, "restart", deps);
227
+ return 0;
228
+ } catch (err) {
229
+ if (err instanceof NoOperatorTokenError || err instanceof OperatorTokenExpiredError) {
230
+ log(`✗ ${short}: ${err.message}`);
231
+ return 1;
232
+ }
233
+ if (err instanceof ModuleOpHttpError) {
234
+ // A not-supervised module (404) after an expose just means it wasn't
235
+ // running — the detached path's `processState !== running` guard simply
236
+ // skips it. Treat 404 the same: nothing to restart, not a failure.
237
+ if (err.status === 404 && err.code === "not_supervised") return 0;
238
+ log(`✗ ${short}: ${err.message}`);
239
+ return 1;
240
+ }
241
+ log(`✗ ${short}: ${err instanceof Error ? err.message : String(err)}`);
242
+ return 1;
243
+ }
244
+ } finally {
245
+ db.close();
246
+ }
247
+ }
@@ -8,17 +8,10 @@ import {
8
8
  readExposeState,
9
9
  writeExposeState,
10
10
  } from "../expose-state.ts";
11
- import {
12
- type EnsureHubOpts,
13
- type StopHubOpts,
14
- defaultPortProbe,
15
- ensureHubRunning,
16
- readHubPort,
17
- stopHub,
18
- } from "../hub-control.ts";
11
+ import { defaultPortProbe, readHubPort } from "../hub-control.ts";
19
12
  import { deriveHubOrigin } from "../hub-origin.ts";
13
+ import { HUB_UNIT_DEFAULT_PORT } from "../hub-unit.ts";
20
14
  import { HUB_PATH, writeHubFile } from "../hub.ts";
21
- import { type AliveFn, processState } from "../process-state.ts";
22
15
  import { shortNameForManifest } from "../service-spec.ts";
23
16
  import { type ServiceEntry, readManifest } from "../services-manifest.ts";
24
17
  import { type ServeEntry, bringupCommand, teardownCommand } from "../tailscale/commands.ts";
@@ -27,7 +20,6 @@ import { type Runner, defaultRunner } from "../tailscale/run.ts";
27
20
  import { clearVaultHubOrigin } from "../vault-hub-origin-env.ts";
28
21
  import type { VaultAuthStatus } from "../vault/auth-status.ts";
29
22
  import {
30
- WELL_KNOWN_DIR,
31
23
  WELL_KNOWN_MOUNT,
32
24
  WELL_KNOWN_PATH,
33
25
  buildWellKnown,
@@ -35,7 +27,12 @@ import {
35
27
  writeWellKnownFile,
36
28
  } from "../well-known.ts";
37
29
  import { printPublic2FAWarning } from "./expose-2fa-warning.ts";
38
- import { restart } from "./lifecycle.ts";
30
+ import {
31
+ type ExposeSupervisorOpts,
32
+ ensureHubUnitForExpose,
33
+ resolveExposeSupervisor,
34
+ restartHubDependentViaSupervisor,
35
+ } from "./expose-supervisor.ts";
39
36
 
40
37
  /**
41
38
  * Two exposure layers share a single tailscale serve config on this node.
@@ -67,17 +64,12 @@ export interface ExposeOpts {
67
64
  statePath?: string;
68
65
  wellKnownPath?: string;
69
66
  hubPath?: string;
70
- /** Directory holding hub.html (passed to the hub server). */
71
- wellKnownDir?: string;
72
67
  configDir?: string;
73
68
  port?: number;
74
69
  log?: (line: string) => void;
75
70
  /** Override detected FQDN — primarily for tests. */
76
71
  fqdnOverride?: string;
77
- /** Overrides for the hub lifecycle primarily for tests. */
78
- hubEnsureOpts?: Omit<EnsureHubOpts, "configDir" | "wellKnownDir" | "log">;
79
- hubStopOpts?: Omit<StopHubOpts, "configDir" | "log">;
80
- /** Skip spawning the hub server. Tests flip this off to verify it's called. */
72
+ /** Skip ensuring the hub unit. Tests seed a `hub.port` and flip this on. */
81
73
  skipHub?: boolean;
82
74
  /**
83
75
  * Probe a port to decide whether a service is responding. Returns true when
@@ -93,14 +85,6 @@ export interface ExposeOpts {
93
85
  * through to vault (and future services) via PARACHUTE_HUB_ORIGIN.
94
86
  */
95
87
  hubOrigin?: string;
96
- /** Process-liveness check for auto-restart — test seam. */
97
- alive?: AliveFn;
98
- /**
99
- * Restart a service by short name after exposure changes. Defaults to the
100
- * lifecycle `restart`; tests inject a fake to assert the call without
101
- * spawning real child processes.
102
- */
103
- restartService?: (short: string) => Promise<number>;
104
88
  /**
105
89
  * Override `~/.parachute/vault` for the 2FA-enrollment probe on the public
106
90
  * (Funnel) layer. Tests point at a tmp dir; production omits and the probe
@@ -113,6 +97,20 @@ export interface ExposeOpts {
113
97
  * `<vaultHome>/config.yaml` from disk. (#186)
114
98
  */
115
99
  vaultAuthStatus?: VaultAuthStatus;
100
+ /**
101
+ * Supervisor-path seams (design §4.3) — the ONLY runtime as of Phase 5b.
102
+ * `expose` "ensures the hub" by ensuring the UNIT is up (not a detached spawn),
103
+ * the post-expose hub-dependent restart drives the running Supervisor over the
104
+ * loopback module-ops API, and `expose off` leaves the hub RUNNING (a managed
105
+ * hub with Restart=always/KeepAlive would just respawn a stopped one — D3).
106
+ * A box with no hub unit gets `ensureHubUnit`'s actionable "run `parachute
107
+ * migrate`" message rather than a detached spawn.
108
+ *
109
+ * The production CLI dispatch passes `supervisor: {}` so the real
110
+ * `isHubUnitInstalled` probe resolves the seams; tests inject the seams they
111
+ * want to assert.
112
+ */
113
+ supervisor?: ExposeSupervisorOpts;
116
114
  }
117
115
 
118
116
  /**
@@ -235,11 +233,14 @@ export async function exposeUp(layer: ExposeLayer, opts: ExposeOpts = {}): Promi
235
233
  const statePath = opts.statePath ?? EXPOSE_STATE_PATH;
236
234
  const wellKnownFilePath = opts.wellKnownPath ?? WELL_KNOWN_PATH;
237
235
  const hubFilePath = opts.hubPath ?? HUB_PATH;
238
- const wellKnownDir = opts.wellKnownDir ?? WELL_KNOWN_DIR;
239
236
  const configDir = opts.configDir ?? CONFIG_DIR;
240
237
  const port = opts.port ?? 443;
241
238
  const log = opts.log ?? ((line) => console.log(line));
242
239
  const funnel = layer === "public";
240
+ // §4.3: ensure the hub UNIT is up (it guarantees loopback reachability) +
241
+ // restart hub-dependent services via the Supervisor. The detached arm was
242
+ // retired in Phase 5b.
243
+ const sup = resolveExposeSupervisor(opts.supervisor);
243
244
 
244
245
  if (!(await isTailscaleInstalled(runner))) {
245
246
  log("tailscale is not installed or not on PATH.");
@@ -320,17 +321,17 @@ export async function exposeUp(layer: ExposeLayer, opts: ExposeOpts = {}): Promi
320
321
  }
321
322
  hubPort = existing;
322
323
  } else {
323
- const hub = await ensureHubRunning({
324
- reservedPorts: manifest.services.map((s) => s.port),
325
- ...(opts.hubEnsureOpts ?? {}),
326
- configDir,
327
- wellKnownDir,
328
- issuer: hubOrigin,
329
- log,
330
- });
331
- hubPort = hub.port;
332
- if (hub.started) log(`✓ hub started (pid ${hub.pid}, port ${hub.port}).`);
333
- else log(`✓ hub already running (pid ${hub.pid}, port ${hub.port}).`);
324
+ // §4.3a: "ensure the hub" = ensure the hub UNIT is up. The unit guarantees
325
+ // the hub is reachable on loopback (all tailscale needs); it pins the
326
+ // canonical 1939 (no walking fallback), so that's the target. Phase 5b
327
+ // retired the detached `ensureHubRunning` bringup — a box with no hub unit
328
+ // gets `ensureHubUnit`'s actionable "run `parachute migrate`" message, never
329
+ // a detached spawn.
330
+ const probePort = readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT;
331
+ const ensured = await ensureHubUnitForExpose(sup, probePort, log);
332
+ if (!ensured.ok) return 1;
333
+ hubPort = ensured.port;
334
+ log(`✓ hub unit up (port ${hubPort}).`);
334
335
  }
335
336
 
336
337
  const entries = planEntries(hubPort);
@@ -392,13 +393,20 @@ export async function exposeUp(layer: ExposeLayer, opts: ExposeOpts = {}): Promi
392
393
  // claude.ai MCP failed with a cryptic "Couldn't reach the MCP server". The
393
394
  // old output told the user to restart manually; it got buried in the wall
394
395
  // of expose output. Do the restart ourselves.
395
- const doRestart =
396
- opts.restartService ?? ((short: string) => restart(short, { manifestPath, configDir, log }));
396
+ // §4.3c: the hub-dependent restart goes through the running Supervisor
397
+ // (`driveModuleOp(short, "restart")`), which re-injects the hub's current
398
+ // origin; the origin self-heal (operator-token iss + vault `.env`) fires there.
399
+ // Phase 5b retired the detached `lifecycle.restart` arm.
397
400
  for (const short of HUB_DEPENDENT_SHORTS) {
398
- if (processState(short, configDir, opts.alive).status !== "running") continue;
399
401
  log("");
400
402
  log(`Restarting ${short} to pick up new hub origin…`);
401
- const rcode = await doRestart(short);
403
+ const rcode = await restartHubDependentViaSupervisor({
404
+ short,
405
+ hubOrigin,
406
+ configDir,
407
+ sup,
408
+ log,
409
+ });
402
410
  if (rcode !== 0) {
403
411
  log(
404
412
  `⚠ ${short} restart failed. Run manually once the issue is resolved: parachute restart ${short}`,
@@ -415,6 +423,10 @@ export async function exposeOff(layer: ExposeLayer, opts: ExposeOpts = {}): Prom
415
423
  const hubFilePath = opts.hubPath ?? HUB_PATH;
416
424
  const configDir = opts.configDir ?? CONFIG_DIR;
417
425
  const log = opts.log ?? ((line) => console.log(line));
426
+ // D3 (§4.3a): `expose off` tears down ONLY the exposure layer and leaves the
427
+ // hub running — the hub is a persistent platform unit now, so stopping it
428
+ // would just be respawned by the manager. The detached `stopHub` arm was
429
+ // retired in Phase 5b, so there is no longer any hub-lifecycle dispatch here.
418
430
 
419
431
  const state = readExposeState(statePath);
420
432
  if (!state || state.entries.length === 0) {
@@ -455,13 +467,12 @@ export async function exposeOff(layer: ExposeLayer, opts: ExposeOpts = {}): Prom
455
467
  unlinkSync(hubFilePath);
456
468
  }
457
469
 
458
- // Hub lives only as long as some layer is exposed. State was just cleared,
459
- // so no layer is active stop the hub. (Layer switch doesn't go through
460
- // here; that path reuses the running hub.)
461
- if (!opts.skipHub) {
462
- const stopped = await stopHub({ ...(opts.hubStopOpts ?? {}), configDir, log });
463
- if (stopped) log("✓ hub stopped.");
464
- }
470
+ // D3 (§4.3a) `expose off` no longer stops the hub. The hub is a persistent
471
+ // platform unit (Restart=always / KeepAlive) that runs whether or not a layer
472
+ // is exposed: the "hub exists only while exposed" invariant inverted under the
473
+ // supervised model, and the detached `stopHub` arm was retired in Phase 5b
474
+ // (stopping it would just be respawned by the manager). We tear down ONLY the
475
+ // exposure layer above and leave the hub running.
465
476
 
466
477
  log(`✓ ${layerLabel(layer)} exposure removed.`);
467
478
  return 0;