@openparachute/hub 0.6.2 → 0.6.3-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +87 -35
  2. package/package.json +1 -1
  3. package/src/__tests__/api-hub-upgrade.test.ts +690 -0
  4. package/src/__tests__/api-modules-ops.test.ts +359 -3
  5. package/src/__tests__/api-modules.test.ts +54 -0
  6. package/src/__tests__/expose-cloudflare.test.ts +163 -72
  7. package/src/__tests__/expose-off-auto.test.ts +26 -1
  8. package/src/__tests__/expose.test.ts +260 -240
  9. package/src/__tests__/hub-control.test.ts +1 -242
  10. package/src/__tests__/hub-server.test.ts +64 -0
  11. package/src/__tests__/hub-unit.test.ts +574 -0
  12. package/src/__tests__/init.test.ts +219 -2
  13. package/src/__tests__/lifecycle.test.ts +416 -1448
  14. package/src/__tests__/managed-unit.test.ts +575 -0
  15. package/src/__tests__/migrate-cutover.test.ts +840 -0
  16. package/src/__tests__/migrate-offer.test.ts +240 -0
  17. package/src/__tests__/migrate.test.ts +132 -0
  18. package/src/__tests__/module-ops-client.test.ts +556 -0
  19. package/src/__tests__/port-probe.test.ts +23 -0
  20. package/src/__tests__/setup-wizard.test.ts +130 -0
  21. package/src/__tests__/status-supervisor.test.ts +504 -0
  22. package/src/__tests__/status.test.ts +157 -708
  23. package/src/__tests__/supervisor.test.ts +471 -6
  24. package/src/__tests__/upgrade.test.ts +351 -5
  25. package/src/api-hub-upgrade.ts +384 -0
  26. package/src/api-hub.ts +2 -1
  27. package/src/api-modules-ops.ts +221 -0
  28. package/src/api-modules.ts +18 -2
  29. package/src/cli.ts +97 -12
  30. package/src/cloudflare/connector-service.ts +117 -322
  31. package/src/commands/expose-cloudflare.ts +63 -71
  32. package/src/commands/expose-supervisor.ts +247 -0
  33. package/src/commands/expose.ts +59 -48
  34. package/src/commands/init.ts +225 -12
  35. package/src/commands/lifecycle.ts +455 -816
  36. package/src/commands/migrate-cutover.ts +837 -0
  37. package/src/commands/migrate.ts +71 -2
  38. package/src/commands/serve-boot.ts +71 -25
  39. package/src/commands/status.ts +535 -235
  40. package/src/commands/upgrade.ts +100 -2
  41. package/src/help.ts +128 -68
  42. package/src/hub-control.ts +23 -162
  43. package/src/hub-server.ts +39 -0
  44. package/src/hub-unit.ts +735 -0
  45. package/src/hub-upgrade-helper.ts +306 -0
  46. package/src/hub-upgrade-mode.ts +209 -0
  47. package/src/hub-upgrade-status.ts +150 -0
  48. package/src/managed-unit.ts +692 -0
  49. package/src/migrate-offer.ts +186 -0
  50. package/src/module-ops-client.ts +457 -0
  51. package/src/port-probe.ts +50 -0
  52. package/src/process-state.ts +19 -3
  53. package/src/setup-wizard.ts +80 -1
  54. package/src/supervisor.ts +389 -38
  55. package/web/ui/dist/assets/index-D_6AFvZy.js +61 -0
  56. package/web/ui/dist/assets/{index-BiBlvEaj.css → index-mz8XcVPP.css} +1 -1
  57. package/web/ui/dist/index.html +2 -2
  58. package/web/ui/dist/assets/index-CIN3mnmf.js +0 -61
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * The detached one-shot hub-upgrade helper (design 2026-06-01 §5.3 / D4).
4
+ *
5
+ * ── WHY A SEPARATE, DETACHED PROCESS ───────────────────────────────────────
6
+ *
7
+ * `POST /api/hub/upgrade` can't rewrite + restart the hub from inside the
8
+ * request handler: restarting the hub kills the very process serving the
9
+ * request, so the response would die with the old binary before it could
10
+ * report success. The resolution (§5.3): the endpoint spawns THIS helper with
11
+ * `detached: true` + `proc.unref()` — the ONE legitimate detached process in
12
+ * the unified model, *because it must outlive the hub it's upgrading*. The
13
+ * helper owns the restart; the request handler returns 202 immediately.
14
+ *
15
+ * Detached + unref'd means: no controlling terminal tie, its own process
16
+ * group, and the parent (hub) exiting does NOT deliver SIGHUP/SIGTERM to it.
17
+ * So when the helper later tears the hub down, it keeps running to completion.
18
+ *
19
+ * ── WHAT IT DOES ───────────────────────────────────────────────────────────
20
+ *
21
+ * 1. Mark the on-disk status file `running` (the SPA polls it — it's a FILE,
22
+ * not the in-memory ops registry, precisely because the hub goes down
23
+ * mid-upgrade; see hub-upgrade-status.ts).
24
+ * 2. Rewrite the hub binary — REUSES `upgrade("hub", …)` from commands/
25
+ * upgrade.ts (the channel-aware `bun add -g @openparachute/hub@<channel>`
26
+ * / linked git-pull + downgrade guard). No duplicated rewrite logic.
27
+ * 3. Trigger the platform-appropriate restart:
28
+ * - **unit-managed (VM/Mac)** → `restartHubUnit` (systemctl restart /
29
+ * launchctl kickstart -k). The manager tears the old hub down, starts
30
+ * the new binary, which re-boots every module from services.json.
31
+ * - **container (no unit manager)** → the runtime re-runs CMD on the
32
+ * hub's exit, so the helper sends the old hub a graceful SIGTERM (the
33
+ * `serve` loop's SIGTERM handler stops children + the server cleanly,
34
+ * then the process exits → the runtime brings it back on the rewritten
35
+ * binary). The hub PID is passed in via `--hub-pid`.
36
+ *
37
+ * The `upgrade("hub", …)` call ALSO does the unit restart itself on a
38
+ * unit-managed box (its Phase-4 dual-dispatch — `supervisor: {}` opts into the
39
+ * `restartHubUnit` arm). So on VM/Mac the helper's rewrite step already
40
+ * restarts the unit; the helper does NOT double-restart. On a container,
41
+ * `upgrade` finds no unit (its restart arm degrades to the no-unit fallback,
42
+ * which is a detached lifecycle restart we DON'T want here) — so the helper
43
+ * passes `restartFn: noop` to upgrade and owns the container restart itself
44
+ * via the SIGTERM path. This keeps the restart authority unambiguous per
45
+ * platform.
46
+ *
47
+ * ── TESTABILITY ────────────────────────────────────────────────────────────
48
+ *
49
+ * `runHubUpgradeHelper` is the pure, injectable core. Every side effect — the
50
+ * status writes, the `upgrade()` call, the unit restart, the container exit
51
+ * signal — is a seam, so the rewrite-then-restart sequence + the container
52
+ * graceful-exit path are unit-tested with NO real `bun add -g`, NO real
53
+ * systemctl, and NO real process signal. Only the thin argv-parsing `main()`
54
+ * at the bottom touches the real OS, and it's only reached when this file is
55
+ * the entrypoint (`import.meta.main`).
56
+ */
57
+
58
+ import { type UpgradeOpts, upgrade as realUpgrade } from "./commands/upgrade.ts";
59
+ import { CONFIG_DIR } from "./config.ts";
60
+ import {
61
+ type HubUnitDeps,
62
+ type HubUnitManagerOpResult,
63
+ defaultHubUnitDeps,
64
+ isHubUnitInstalled,
65
+ restartHubUnit as realRestartHubUnit,
66
+ } from "./hub-unit.ts";
67
+ import {
68
+ type HubUpgradeStatus,
69
+ appendHubUpgradeStatus,
70
+ readHubUpgradeStatus,
71
+ } from "./hub-upgrade-status.ts";
72
+
73
+ export interface HubUpgradeHelperArgs {
74
+ /** Operation id (matches the status file's `operation_id`). */
75
+ operationId: string;
76
+ /** Closed-enum channel (validated by the endpoint before spawn). */
77
+ channel: "rc" | "latest";
78
+ /** PARACHUTE_HOME (where the status file + services.json live). */
79
+ configDir: string;
80
+ /**
81
+ * The PID of the hub process to gracefully terminate on the container path.
82
+ * Undefined on the unit-managed path (the manager owns the restart there).
83
+ */
84
+ hubPid?: number;
85
+ }
86
+
87
+ /** Injectable side-effect seams (production wires the real impls). */
88
+ export interface HubUpgradeHelperDeps {
89
+ /** Rewrite the hub binary. Production proxies to `commands/upgrade.ts`. */
90
+ upgrade?: (svc: string, opts: UpgradeOpts) => Promise<number>;
91
+ /** Is a hub unit installed? (Decides unit-managed vs container restart.) */
92
+ isHubUnitInstalled?: (deps: HubUnitDeps) => boolean;
93
+ /** Restart the hub unit (unit-managed path). */
94
+ restartHubUnit?: (deps: HubUnitDeps) => HubUnitManagerOpResult;
95
+ /** Deps for the unit probes/ops. */
96
+ hubUnitDeps?: HubUnitDeps;
97
+ /**
98
+ * Send the graceful-exit signal to the hub (container path). Production
99
+ * `process.kill(pid, "SIGTERM")`; tests record the call.
100
+ */
101
+ signalHub?: (pid: number, signal: NodeJS.Signals) => void;
102
+ /** Append to the on-disk status file (test seam). */
103
+ appendStatus?: (
104
+ configDir: string,
105
+ operationId: string,
106
+ patch: Partial<Pick<HubUpgradeStatus, "phase" | "error">>,
107
+ logLine?: string,
108
+ ) => void;
109
+ }
110
+
111
+ /**
112
+ * The pure helper core: rewrite the hub binary, then trigger the platform
113
+ * restart. Returns a terminal exit code (0 = restart dispatched / success).
114
+ * Records progress to the status file throughout.
115
+ */
116
+ export async function runHubUpgradeHelper(
117
+ args: HubUpgradeHelperArgs,
118
+ deps: HubUpgradeHelperDeps = {},
119
+ ): Promise<number> {
120
+ const upgrade = deps.upgrade ?? realUpgrade;
121
+ const unitInstalledFn = deps.isHubUnitInstalled ?? isHubUnitInstalled;
122
+ const restartUnit = deps.restartHubUnit ?? realRestartHubUnit;
123
+ const hubUnitDeps = deps.hubUnitDeps ?? defaultHubUnitDeps;
124
+ const signalHub = deps.signalHub ?? ((pid, signal) => process.kill(pid, signal));
125
+ const append = deps.appendStatus ?? appendHubUpgradeStatus;
126
+ const { configDir, operationId } = args;
127
+
128
+ append(
129
+ configDir,
130
+ operationId,
131
+ { phase: "running" },
132
+ `hub-upgrade helper started (op ${operationId})`,
133
+ );
134
+
135
+ const unitManaged = unitInstalledFn(hubUnitDeps);
136
+
137
+ // ── Rewrite the binary ───────────────────────────────────────────────────
138
+ // REUSE commands/upgrade.ts for the channel-aware rewrite (bun add -g
139
+ // @openparachute/hub@<channel> / linked git-pull + downgrade guard) — but
140
+ // REWRITE ONLY: suppress upgrade's own restart with a no-op `restartFn`. The
141
+ // HELPER owns the restart explicitly below (the spec's "the helper owns the
142
+ // restart"), so the restart authority is unambiguous per platform rather than
143
+ // buried in upgrade.ts's dual-dispatch. `supervisor` is intentionally omitted
144
+ // so upgrade takes its detached arm with our no-op restartFn (a pure rewrite,
145
+ // no lifecycle restart fired).
146
+ const upgradeOpts: UpgradeOpts = {
147
+ channel: args.channel,
148
+ configDir,
149
+ restartFn: async () => 0,
150
+ log: (line) => append(configDir, operationId, {}, line),
151
+ };
152
+
153
+ let code: number;
154
+ try {
155
+ code = await upgrade("hub", upgradeOpts);
156
+ } catch (err) {
157
+ const msg = err instanceof Error ? err.message : String(err);
158
+ append(configDir, operationId, { phase: "failed", error: msg }, `hub-upgrade failed: ${msg}`);
159
+ return 1;
160
+ }
161
+
162
+ if (code !== 0) {
163
+ append(
164
+ configDir,
165
+ operationId,
166
+ { phase: "failed", error: `upgrade exited ${code}` },
167
+ `hub-upgrade rewrite failed (exit ${code}) — binary NOT restarted`,
168
+ );
169
+ return code;
170
+ }
171
+
172
+ // ── Restart (helper-owned) ───────────────────────────────────────────────
173
+ if (unitManaged) {
174
+ // VM/Mac: restart the hub UNIT via the platform manager (systemctl restart
175
+ // / launchctl kickstart -k). The manager tears the old hub down (children
176
+ // die), starts the new binary, which re-boots every module from
177
+ // services.json. NEVER a PID signal — launchd KeepAlive / systemd
178
+ // Restart=always would fight it (R17). We mark `restarting`; we canNOT
179
+ // reliably write `succeeded` — the new hub's version is the SPA's success
180
+ // signal (it polls /health + /api/hub), not our file.
181
+ const res = restartUnit(hubUnitDeps);
182
+ for (const m of res.messages) append(configDir, operationId, {}, m);
183
+ if (res.outcome !== "ok") {
184
+ append(
185
+ configDir,
186
+ operationId,
187
+ { phase: "failed", error: `hub unit restart ${res.outcome}` },
188
+ `hub binary rewritten but the unit restart ${res.outcome} — restart it manually`,
189
+ );
190
+ return 1;
191
+ }
192
+ append(
193
+ configDir,
194
+ operationId,
195
+ { phase: "restarting" },
196
+ "hub unit restarted via the service manager — the SPA polls /health + version for the new binary",
197
+ );
198
+ return 0;
199
+ }
200
+
201
+ // Container path: the rewrite landed on the persistent disk (the endpoint
202
+ // already gated on mode === "in-place"; an image-pinned hub never spawns a
203
+ // helper). Now signal the hub to exit gracefully so the container runtime
204
+ // re-runs CMD (`serve`) on the rewritten binary. The hub's SIGTERM handler
205
+ // (cli.ts serve case) stops supervised children + the server cleanly, then
206
+ // the process exits and the runtime brings it back.
207
+ append(
208
+ configDir,
209
+ operationId,
210
+ { phase: "restarting" },
211
+ "container: signalling the hub to exit gracefully so the runtime restarts it on the new binary",
212
+ );
213
+ if (args.hubPid !== undefined && Number.isFinite(args.hubPid) && args.hubPid > 0) {
214
+ try {
215
+ signalHub(args.hubPid, "SIGTERM");
216
+ } catch (err) {
217
+ // The hub may have already exited (a racing restart). Not fatal — the
218
+ // rewrite is done; the runtime will bring it back on the new binary
219
+ // regardless. Record + succeed.
220
+ const msg = err instanceof Error ? err.message : String(err);
221
+ append(configDir, operationId, {}, `hub graceful-exit signal noted as already-gone (${msg})`);
222
+ }
223
+ } else {
224
+ append(
225
+ configDir,
226
+ operationId,
227
+ {},
228
+ "no hub pid provided — relying on the platform runtime's own restart to pick up the new binary",
229
+ );
230
+ }
231
+ return 0;
232
+ }
233
+
234
+ // ── Entrypoint (only runs when invoked directly as `bun hub-upgrade-helper.ts`) ──
235
+
236
+ function parseArgs(argv: string[]): HubUpgradeHelperArgs | { error: string } {
237
+ let operationId: string | undefined;
238
+ let channel: string | undefined;
239
+ let configDir: string | undefined;
240
+ let hubPid: number | undefined;
241
+ for (let i = 0; i < argv.length; i++) {
242
+ const arg = argv[i];
243
+ const next = argv[i + 1];
244
+ switch (arg) {
245
+ // Each value-bearing flag guards `next !== undefined` so a truncated argv
246
+ // (flag last in argv, no value) leaves the value unset — surfaced as a
247
+ // clear "required" error below — rather than silently consuming the next
248
+ // flag as its value.
249
+ case "--op":
250
+ if (next !== undefined) operationId = next;
251
+ i++;
252
+ break;
253
+ case "--channel":
254
+ if (next !== undefined) channel = next;
255
+ i++;
256
+ break;
257
+ case "--config-dir":
258
+ if (next !== undefined) configDir = next;
259
+ i++;
260
+ break;
261
+ case "--hub-pid":
262
+ if (next !== undefined) hubPid = Number(next);
263
+ i++;
264
+ break;
265
+ default:
266
+ return { error: `unexpected argument "${arg}"` };
267
+ }
268
+ }
269
+ if (!operationId) return { error: "--op <id> is required" };
270
+ if (channel !== "rc" && channel !== "latest") {
271
+ return { error: `--channel must be "rc" or "latest" (got "${channel ?? ""}")` };
272
+ }
273
+ const resolved: HubUpgradeHelperArgs = {
274
+ operationId,
275
+ channel,
276
+ configDir: configDir ?? CONFIG_DIR,
277
+ };
278
+ if (hubPid !== undefined && Number.isFinite(hubPid)) resolved.hubPid = hubPid;
279
+ return resolved;
280
+ }
281
+
282
+ async function main(): Promise<number> {
283
+ const parsed = parseArgs(process.argv.slice(2));
284
+ if ("error" in parsed) {
285
+ console.error(`hub-upgrade-helper: ${parsed.error}`);
286
+ return 2;
287
+ }
288
+ // If the endpoint never seeded the status file (it always should), bail
289
+ // visibly rather than silently no-op'ing.
290
+ if (!readHubUpgradeStatus(parsed.configDir)) {
291
+ console.error(
292
+ `hub-upgrade-helper: no status file for op ${parsed.operationId} under ${parsed.configDir}`,
293
+ );
294
+ return 2;
295
+ }
296
+ return await runHubUpgradeHelper(parsed);
297
+ }
298
+
299
+ if (import.meta.main) {
300
+ main()
301
+ .then((code) => process.exit(code))
302
+ .catch((err) => {
303
+ console.error("hub-upgrade-helper: fatal", err);
304
+ process.exit(1);
305
+ });
306
+ }
@@ -0,0 +1,209 @@
1
+ /**
2
+ * In-place-vs-redeploy detection for `POST /api/hub/upgrade` (design
3
+ * 2026-06-01 §5.3 — the OPEN implementation detail flagged for D4).
4
+ *
5
+ * The hub-upgrade endpoint must decide, BEFORE it spawns the detached helper,
6
+ * whether an on-disk binary rewrite (`bun add -g @openparachute/hub@<channel>`
7
+ * / a linked git-pull) will actually PERSIST across the next restart:
8
+ *
9
+ * - **in-place** — the hub binary lives on a writable, persistent location
10
+ * (a bun-linked checkout, or a `bun add -g` install under a $BUN_INSTALL
11
+ * that survives restart). A rewrite + restart genuinely upgrades the hub.
12
+ *
13
+ * - **redeploy-required** — the hub binary is baked into a container image
14
+ * (Render/Fly image-pinned). `bun add -g` would write to the image's
15
+ * ephemeral layer and be LOST on the next container restart, so the rewrite
16
+ * is a misleading no-op. The honest path is a platform redeploy from the
17
+ * operator's dashboard, NOT a false "upgraded."
18
+ *
19
+ * ── THE HEURISTIC (conservative; flagged for review) ───────────────────────
20
+ *
21
+ * Signals, in priority order:
22
+ *
23
+ * 1. **bun-linked** (`detectHubInstallSource` → `bun-linked`): the hub runs
24
+ * from a git checkout on disk. A `git pull` in that checkout is always
25
+ * persistent (the checkout is the operator's own filesystem, not an image
26
+ * layer). → **in-place**. This is Aaron's dev box + every VM/Mac that
27
+ * bun-linked the hub.
28
+ *
29
+ * 2. **container, BUN_INSTALL on the persistent disk**: a container (the
30
+ * Render Blueprint pins `PARACHUTE_HOME=/parachute`) whose `$BUN_INSTALL`
31
+ * points INSIDE the persistent mount (`/parachute/...` — the same place
32
+ * runtime module installs land via `/api/modules/:short/install`). A
33
+ * `bun add -g` there writes to the mounted volume, which survives a
34
+ * container restart. → **in-place**. This is the "hub installed to the
35
+ * persistent disk" arm §5.3 calls out.
36
+ *
37
+ * 3. **container, BUN_INSTALL NOT on the persistent disk** (or unset): the
38
+ * hub is image-pinned — `bun add -g` writes to the ephemeral image layer
39
+ * and is lost on restart. → **redeploy-required**. This is the default
40
+ * Render/Fly image shape today (the Dockerfile `bun add`s the hub into the
41
+ * image; $BUN_INSTALL defaults to `/root/.bun`, not the mount).
42
+ *
43
+ * 4. **npm, non-container**: a `bun add -g` install on a VM/Mac (not a
44
+ * container). The global bun prefix is on the operator's own writable
45
+ * filesystem → persistent. → **in-place**.
46
+ *
47
+ * 5. **unknown / anything else**: we couldn't classify the install source.
48
+ * → **redeploy-required** (the honest fallback — §5.3: "When uncertain,
49
+ * prefer redeploy-required over a silent no-op"). The SPA then tells the
50
+ * operator to redeploy rather than promising an upgrade that may evaporate.
51
+ *
52
+ * ── FALSE-POSITIVE / FALSE-NEGATIVE RISK (for the reviewer) ─────────────────
53
+ *
54
+ * - **False "in-place" (the dangerous direction)** would tell the operator
55
+ * "upgraded" while the rewrite silently evaporates on the next restart.
56
+ * The only path that risks this is signal #2: a container whose
57
+ * `$BUN_INSTALL` is under the persistent mount but where the operator
58
+ * mounted the disk read-only, or where the bun cache (not the install) is
59
+ * what's on the mount. We mitigate by requiring `$BUN_INSTALL` to be a
60
+ * descendant of the persistent-home prefix — the strictest signal available
61
+ * without probing writability (which we can't do reliably from the request
62
+ * handler before spawning the helper). A residual risk remains; see the
63
+ * note in `detectHubUpgradeMode` on tightening this with a write-probe in
64
+ * the helper if it proves wrong in the field.
65
+ *
66
+ * - **False "redeploy-required" (the safe direction)** merely tells the
67
+ * operator to redeploy when an in-place upgrade would have worked — annoying
68
+ * but never destructive. Signals #3/#5 deliberately err here.
69
+ *
70
+ * Pure + injectable: no I/O beyond the (already-injectable) install-source
71
+ * detection. The env + srcDir are passed in so tests drive every branch.
72
+ */
73
+
74
+ import { dirname } from "node:path";
75
+ import { fileURLToPath } from "node:url";
76
+ import { CONTAINER_HOME } from "./hub-control.ts";
77
+ import {
78
+ type DetectInstallSourceDeps,
79
+ type InstallSource,
80
+ detectHubInstallSource,
81
+ } from "./install-source.ts";
82
+
83
+ /** The two upgrade modes the SPA branches on. */
84
+ export type HubUpgradeMode = "in-place" | "redeploy-required";
85
+
86
+ export interface DetectHubUpgradeModeArgs {
87
+ /** Override `process.env` lookups (test seam). */
88
+ env?: Record<string, string | undefined>;
89
+ /**
90
+ * Directory used to locate the hub's package.json + classify install source.
91
+ * Defaults to the running source dir. Test seam.
92
+ */
93
+ hubSrcDir?: string;
94
+ /** Pass-through deps for `detectHubInstallSource` (test seam). */
95
+ installSourceDeps?: DetectInstallSourceDeps;
96
+ /**
97
+ * Pre-classified install source — lets a caller that already ran
98
+ * `detectHubInstallSource` (e.g. the `/api/hub` handler) avoid a second
99
+ * filesystem walk. When set, `hubSrcDir`/`installSourceDeps` are ignored.
100
+ */
101
+ source?: InstallSource;
102
+ }
103
+
104
+ export interface HubUpgradeModeResult {
105
+ mode: HubUpgradeMode;
106
+ /** The classified install source (surfaced for diagnostics + the SPA copy). */
107
+ source: InstallSource["kind"] | "container";
108
+ /** Short human-readable reason — surfaced in the 202 body + SPA + tests. */
109
+ reason: string;
110
+ }
111
+
112
+ /**
113
+ * The Render Blueprint pins `PARACHUTE_HOME=/parachute` — the single most
114
+ * reliable container-mode signal the hub has (mirrors `api-hub.ts`'s
115
+ * container override; both use the shared `CONTAINER_HOME` constant). Fly uses
116
+ * the same pin via the shared image.
117
+ */
118
+ function isContainer(env: Record<string, string | undefined>): boolean {
119
+ return env.PARACHUTE_HOME === CONTAINER_HOME;
120
+ }
121
+
122
+ /**
123
+ * True when `$BUN_INSTALL` is a descendant of the persistent-home prefix —
124
+ * i.e. `bun add -g` writes land on the mounted volume that survives a
125
+ * container restart. The persistent home on the Render Blueprint is
126
+ * `/parachute`; we treat any `$BUN_INSTALL` under it as persistent.
127
+ *
128
+ * Strict (descendant-of), not a substring match, so a stray `/parachute` in
129
+ * an unrelated path component can't false-positive.
130
+ */
131
+ function bunInstallOnPersistentDisk(env: Record<string, string | undefined>): boolean {
132
+ const bunInstall = env.BUN_INSTALL;
133
+ const home = env.PARACHUTE_HOME;
134
+ if (!bunInstall || !home) return false;
135
+ if (bunInstall === home) return true;
136
+ const prefix = home.endsWith("/") ? home : `${home}/`;
137
+ return bunInstall.startsWith(prefix);
138
+ }
139
+
140
+ /**
141
+ * Decide whether the hub is in-place-upgradable (rewrite + restart works) or
142
+ * image-pinned (redeploy-only). See the module docstring for the full
143
+ * heuristic + risk analysis.
144
+ */
145
+ export function detectHubUpgradeMode(args: DetectHubUpgradeModeArgs = {}): HubUpgradeModeResult {
146
+ const env = args.env ?? process.env;
147
+ const hubSrcDir = args.hubSrcDir ?? dirname(fileURLToPath(import.meta.url));
148
+ const source = args.source ?? detectHubInstallSource(hubSrcDir, args.installSourceDeps);
149
+
150
+ const container = isContainer(env);
151
+
152
+ // Signal 1: bun-linked checkout. A `git pull` in the operator's own checkout
153
+ // is always persistent — even inside a container the checkout dir is on the
154
+ // operator's filesystem, not the ephemeral image layer. (In practice a
155
+ // container runs from /app/src image-pinned, not a checkout — but if it IS a
156
+ // checkout, in-place is correct.)
157
+ if (source.kind === "bun-linked") {
158
+ return {
159
+ mode: "in-place",
160
+ source: container ? "container" : "bun-linked",
161
+ reason: "bun-linked checkout — git pull + restart persists on disk",
162
+ };
163
+ }
164
+
165
+ if (container) {
166
+ // Signal 2: container with $BUN_INSTALL on the persistent mount → the
167
+ // `bun add -g` write survives a container restart.
168
+ if (bunInstallOnPersistentDisk(env)) {
169
+ return {
170
+ mode: "in-place",
171
+ source: "container",
172
+ reason:
173
+ "container with $BUN_INSTALL on the persistent disk — bun add -g persists across restart",
174
+ };
175
+ }
176
+ // Signal 3: container, image-pinned. `bun add -g` writes to the ephemeral
177
+ // image layer → lost on restart. The honest path is a platform redeploy.
178
+ //
179
+ // NOTE (reviewer): we could tighten signal-2 confidence by having the
180
+ // helper write-probe `$BUN_INSTALL` before committing to the rewrite. We
181
+ // deliberately do the cheaper env-based classification here and accept
182
+ // erring toward redeploy-required (the safe direction) when uncertain.
183
+ return {
184
+ mode: "redeploy-required",
185
+ source: "container",
186
+ reason:
187
+ "container image-pinned ($BUN_INSTALL not on the persistent disk) — bun add -g would be lost on the next container restart; redeploy from your platform dashboard instead",
188
+ };
189
+ }
190
+
191
+ // Signal 4: npm install on a VM/Mac (non-container). The global bun prefix is
192
+ // on the operator's own writable filesystem → persistent.
193
+ if (source.kind === "npm") {
194
+ return {
195
+ mode: "in-place",
196
+ source: "npm",
197
+ reason: "npm-installed on a persistent filesystem — bun add -g persists",
198
+ };
199
+ }
200
+
201
+ // Signal 5: unknown. Honest fallback — prefer redeploy-required over a silent
202
+ // no-op (§5.3).
203
+ return {
204
+ mode: "redeploy-required",
205
+ source: "unknown",
206
+ reason:
207
+ "could not classify the hub install source — redeploy from your platform dashboard to be safe",
208
+ };
209
+ }
@@ -0,0 +1,150 @@
1
+ /**
2
+ * The on-disk status file for an in-flight `POST /api/hub/upgrade` operation
3
+ * (design 2026-06-01 §5.3 / D4).
4
+ *
5
+ * WHY A FILE, NOT THE IN-MEMORY OPERATIONS REGISTRY: a hub-upgrade tears the
6
+ * hub DOWN mid-operation (the whole point — the new binary has to take over).
7
+ * The module-ops `InMemoryOperationsRegistry` is process-local and evaporates
8
+ * when the hub restarts, so it CANNOT carry hub-upgrade progress across the
9
+ * restart the SPA is polling through. A JSON file under `PARACHUTE_HOME`
10
+ * survives the hub bounce: the detached helper writes progress to it while the
11
+ * old hub is dying, and the NEW hub reads it back to answer
12
+ * `GET /api/hub/upgrade/status`. (On a container the file lives on the
13
+ * persistent disk — same place the DB + module installs live.)
14
+ *
15
+ * The file is single-slot (one upgrade at a time — there is only one hub). A
16
+ * stale file from a prior upgrade is simply overwritten when a new one starts.
17
+ *
18
+ * Wire shape mirrors the module-ops `Operation` enough that the SPA's polling
19
+ * code reads familiarly: `status` + `log` + `error` + timestamps, plus the
20
+ * hub-specific `mode` / `target_version` / `channel` the SPA branches on.
21
+ */
22
+
23
+ import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "node:fs";
24
+ import { dirname, join } from "node:path";
25
+ import type { HubUpgradeMode } from "./hub-upgrade-mode.ts";
26
+
27
+ /** Phases of a hub-upgrade, polled by the SPA. */
28
+ export type HubUpgradeStatusPhase =
29
+ /** The endpoint accepted the request + spawned the helper; not started yet. */
30
+ | "pending"
31
+ /** The helper is rewriting the binary / about to restart. */
32
+ | "running"
33
+ /**
34
+ * The rewrite + restart were dispatched. The SPA now switches to polling
35
+ * `/health` + the reported version directly — the helper may not get to
36
+ * write a terminal state (the hub it would report to is being torn down).
37
+ */
38
+ | "restarting"
39
+ /**
40
+ * Terminal success. RESERVED / SPA-INFERRED: the helper does NOT write this —
41
+ * it can't reliably record `succeeded` before the hub bounce tears down the
42
+ * process. The SPA infers success from `/health` + the reported version
43
+ * (HubUpgradeCard), not from this phase. Kept in the enum so the SPA success
44
+ * detection + the 409 in-flight guard's terminal-phase check can reference it.
45
+ */
46
+ | "succeeded"
47
+ /** Terminal failure (rewrite failed, downgrade refused, etc.). */
48
+ | "failed"
49
+ /**
50
+ * The endpoint determined the hub is image-pinned (redeploy-required) and did
51
+ * NOT spawn a helper — there's no in-place upgrade to run. The SPA shows
52
+ * "redeploy from your platform dashboard" instead of a progress spinner.
53
+ */
54
+ | "redeploy-required";
55
+
56
+ export interface HubUpgradeStatus {
57
+ /** Opaque id minted by the endpoint; echoed in the 202 body for polling. */
58
+ operation_id: string;
59
+ phase: HubUpgradeStatusPhase;
60
+ /** In-place vs redeploy-required (the §5.3 detection result). */
61
+ mode: HubUpgradeMode;
62
+ /** The version the operator is currently on (read at request time). */
63
+ current_version: string;
64
+ /** Best-effort resolved target version (`npm view`), or null if unknown. */
65
+ target_version: string | null;
66
+ /** Closed-enum channel the rewrite targets. */
67
+ channel: "rc" | "latest";
68
+ /** Sparse progress log, appended by the helper. */
69
+ log: string[];
70
+ /** Error message when `phase === "failed"`. */
71
+ error?: string;
72
+ started_at: string;
73
+ finished_at?: string;
74
+ }
75
+
76
+ /** Path of the single-slot hub-upgrade status file under `configDir`. */
77
+ export function hubUpgradeStatusPath(configDir: string): string {
78
+ return join(configDir, "hub-upgrade-status.json");
79
+ }
80
+
81
+ /**
82
+ * Atomically write the status file (write-temp + rename) so a poll that lands
83
+ * mid-write never sees a half-serialized JSON. Creates the parent dir if
84
+ * absent (a never-initialized PARACHUTE_HOME).
85
+ */
86
+ export function writeHubUpgradeStatus(configDir: string, status: HubUpgradeStatus): void {
87
+ const path = hubUpgradeStatusPath(configDir);
88
+ const dir = dirname(path);
89
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
90
+ const tmp = `${path}.tmp`;
91
+ writeFileSync(tmp, `${JSON.stringify(status, null, 2)}\n`, { mode: 0o600 });
92
+ renameSync(tmp, path);
93
+ }
94
+
95
+ /**
96
+ * Read the current status file, or null when none exists / is unreadable.
97
+ * Lenient: a malformed file reads as null (the SPA falls back to polling
98
+ * `/health` directly), never throws.
99
+ */
100
+ export function readHubUpgradeStatus(configDir: string): HubUpgradeStatus | null {
101
+ const path = hubUpgradeStatusPath(configDir);
102
+ if (!existsSync(path)) return null;
103
+ try {
104
+ const parsed = JSON.parse(readFileSync(path, "utf8")) as unknown;
105
+ if (parsed && typeof parsed === "object" && "operation_id" in parsed) {
106
+ return parsed as HubUpgradeStatus;
107
+ }
108
+ return null;
109
+ } catch {
110
+ return null;
111
+ }
112
+ }
113
+
114
+ /**
115
+ * Append a log line + (optionally) advance the phase, persisting atomically.
116
+ * Used by the helper to record progress the SPA polls. A missing file (the
117
+ * endpoint should always seed it first) is a no-op rather than a throw — the
118
+ * helper's job is the upgrade, not bookkeeping.
119
+ *
120
+ * OPERATION GUARD: `operationId` is the op the caller (helper) was spawned
121
+ * with. The status file is single-slot — a newer `POST /api/hub/upgrade` will
122
+ * overwrite it with a fresh `operation_id`. A still-running helper from the
123
+ * SUPERSEDED operation must not clobber the newer operation's status with its
124
+ * stale progress. So we only write when the on-disk `operation_id` still
125
+ * matches `operationId`; otherwise the append is a NO-OP (the newer operation
126
+ * owns the slot now). This makes concurrent-upgrade status-file corruption
127
+ * impossible from the helper side.
128
+ */
129
+ export function appendHubUpgradeStatus(
130
+ configDir: string,
131
+ operationId: string,
132
+ patch: Partial<Pick<HubUpgradeStatus, "phase" | "error">>,
133
+ logLine?: string,
134
+ ): void {
135
+ const current = readHubUpgradeStatus(configDir);
136
+ if (!current) return;
137
+ // A newer operation superseded this one — do NOT clobber its status.
138
+ if (current.operation_id !== operationId) return;
139
+ const next: HubUpgradeStatus = { ...current };
140
+ if (patch.phase) next.phase = patch.phase;
141
+ if (patch.error !== undefined) next.error = patch.error;
142
+ if (logLine) next.log = [...current.log, logLine];
143
+ // `succeeded` is reserved/SPA-inferred (see HubUpgradeStatusPhase) — the
144
+ // helper can't reliably write it before the hub bounce, so in practice only
145
+ // `failed` reaches this branch. Both terminal phases stamp `finished_at`.
146
+ if (patch.phase === "succeeded" || patch.phase === "failed") {
147
+ next.finished_at = new Date().toISOString();
148
+ }
149
+ writeHubUpgradeStatus(configDir, next);
150
+ }