@openparachute/hub 0.6.2 → 0.6.3-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -35
- package/package.json +1 -1
- package/src/__tests__/api-hub-upgrade.test.ts +690 -0
- package/src/__tests__/api-modules-ops.test.ts +359 -3
- package/src/__tests__/api-modules.test.ts +54 -0
- package/src/__tests__/expose-cloudflare.test.ts +163 -72
- package/src/__tests__/expose-off-auto.test.ts +26 -1
- package/src/__tests__/expose.test.ts +260 -240
- package/src/__tests__/hub-control.test.ts +1 -242
- package/src/__tests__/hub-server.test.ts +64 -0
- package/src/__tests__/hub-unit.test.ts +574 -0
- package/src/__tests__/init.test.ts +219 -2
- package/src/__tests__/lifecycle.test.ts +416 -1448
- package/src/__tests__/managed-unit.test.ts +575 -0
- package/src/__tests__/migrate-cutover.test.ts +840 -0
- package/src/__tests__/migrate-offer.test.ts +240 -0
- package/src/__tests__/migrate.test.ts +132 -0
- package/src/__tests__/module-ops-client.test.ts +556 -0
- package/src/__tests__/port-probe.test.ts +23 -0
- package/src/__tests__/setup-wizard.test.ts +130 -0
- package/src/__tests__/status-supervisor.test.ts +504 -0
- package/src/__tests__/status.test.ts +157 -708
- package/src/__tests__/supervisor.test.ts +471 -6
- package/src/__tests__/upgrade.test.ts +351 -5
- package/src/api-hub-upgrade.ts +384 -0
- package/src/api-hub.ts +2 -1
- package/src/api-modules-ops.ts +221 -0
- package/src/api-modules.ts +18 -2
- package/src/cli.ts +97 -12
- package/src/cloudflare/connector-service.ts +117 -322
- package/src/commands/expose-cloudflare.ts +63 -71
- package/src/commands/expose-supervisor.ts +247 -0
- package/src/commands/expose.ts +59 -48
- package/src/commands/init.ts +225 -12
- package/src/commands/lifecycle.ts +455 -816
- package/src/commands/migrate-cutover.ts +837 -0
- package/src/commands/migrate.ts +71 -2
- package/src/commands/serve-boot.ts +71 -25
- package/src/commands/status.ts +535 -235
- package/src/commands/upgrade.ts +100 -2
- package/src/help.ts +128 -68
- package/src/hub-control.ts +23 -162
- package/src/hub-server.ts +39 -0
- package/src/hub-unit.ts +735 -0
- package/src/hub-upgrade-helper.ts +306 -0
- package/src/hub-upgrade-mode.ts +209 -0
- package/src/hub-upgrade-status.ts +150 -0
- package/src/managed-unit.ts +692 -0
- package/src/migrate-offer.ts +186 -0
- package/src/module-ops-client.ts +457 -0
- package/src/port-probe.ts +50 -0
- package/src/process-state.ts +19 -3
- package/src/setup-wizard.ts +80 -1
- package/src/supervisor.ts +389 -38
- package/web/ui/dist/assets/index-D_6AFvZy.js +61 -0
- package/web/ui/dist/assets/{index-BiBlvEaj.css → index-mz8XcVPP.css} +1 -1
- package/web/ui/dist/index.html +2 -2
- package/web/ui/dist/assets/index-CIN3mnmf.js +0 -61
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* The detached one-shot hub-upgrade helper (design 2026-06-01 §5.3 / D4).
|
|
4
|
+
*
|
|
5
|
+
* ── WHY A SEPARATE, DETACHED PROCESS ───────────────────────────────────────
|
|
6
|
+
*
|
|
7
|
+
* `POST /api/hub/upgrade` can't rewrite + restart the hub from inside the
|
|
8
|
+
* request handler: restarting the hub kills the very process serving the
|
|
9
|
+
* request, so the response would die with the old binary before it could
|
|
10
|
+
* report success. The resolution (§5.3): the endpoint spawns THIS helper with
|
|
11
|
+
* `detached: true` + `proc.unref()` — the ONE legitimate detached process in
|
|
12
|
+
* the unified model, *because it must outlive the hub it's upgrading*. The
|
|
13
|
+
* helper owns the restart; the request handler returns 202 immediately.
|
|
14
|
+
*
|
|
15
|
+
* Detached + unref'd means: no controlling terminal tie, its own process
|
|
16
|
+
* group, and the parent (hub) exiting does NOT deliver SIGHUP/SIGTERM to it.
|
|
17
|
+
* So when the helper later tears the hub down, it keeps running to completion.
|
|
18
|
+
*
|
|
19
|
+
* ── WHAT IT DOES ───────────────────────────────────────────────────────────
|
|
20
|
+
*
|
|
21
|
+
* 1. Mark the on-disk status file `running` (the SPA polls it — it's a FILE,
|
|
22
|
+
* not the in-memory ops registry, precisely because the hub goes down
|
|
23
|
+
* mid-upgrade; see hub-upgrade-status.ts).
|
|
24
|
+
* 2. Rewrite the hub binary — REUSES `upgrade("hub", …)` from commands/
|
|
25
|
+
* upgrade.ts (the channel-aware `bun add -g @openparachute/hub@<channel>`
|
|
26
|
+
* / linked git-pull + downgrade guard). No duplicated rewrite logic.
|
|
27
|
+
* 3. Trigger the platform-appropriate restart:
|
|
28
|
+
* - **unit-managed (VM/Mac)** → `restartHubUnit` (systemctl restart /
|
|
29
|
+
* launchctl kickstart -k). The manager tears the old hub down, starts
|
|
30
|
+
* the new binary, which re-boots every module from services.json.
|
|
31
|
+
* - **container (no unit manager)** → the runtime re-runs CMD on the
|
|
32
|
+
* hub's exit, so the helper sends the old hub a graceful SIGTERM (the
|
|
33
|
+
* `serve` loop's SIGTERM handler stops children + the server cleanly,
|
|
34
|
+
* then the process exits → the runtime brings it back on the rewritten
|
|
35
|
+
* binary). The hub PID is passed in via `--hub-pid`.
|
|
36
|
+
*
|
|
37
|
+
* The `upgrade("hub", …)` call ALSO does the unit restart itself on a
|
|
38
|
+
* unit-managed box (its Phase-4 dual-dispatch — `supervisor: {}` opts into the
|
|
39
|
+
* `restartHubUnit` arm). So on VM/Mac the helper's rewrite step already
|
|
40
|
+
* restarts the unit; the helper does NOT double-restart. On a container,
|
|
41
|
+
* `upgrade` finds no unit (its restart arm degrades to the no-unit fallback,
|
|
42
|
+
* which is a detached lifecycle restart we DON'T want here) — so the helper
|
|
43
|
+
* passes `restartFn: noop` to upgrade and owns the container restart itself
|
|
44
|
+
* via the SIGTERM path. This keeps the restart authority unambiguous per
|
|
45
|
+
* platform.
|
|
46
|
+
*
|
|
47
|
+
* ── TESTABILITY ────────────────────────────────────────────────────────────
|
|
48
|
+
*
|
|
49
|
+
* `runHubUpgradeHelper` is the pure, injectable core. Every side effect — the
|
|
50
|
+
* status writes, the `upgrade()` call, the unit restart, the container exit
|
|
51
|
+
* signal — is a seam, so the rewrite-then-restart sequence + the container
|
|
52
|
+
* graceful-exit path are unit-tested with NO real `bun add -g`, NO real
|
|
53
|
+
* systemctl, and NO real process signal. Only the thin argv-parsing `main()`
|
|
54
|
+
* at the bottom touches the real OS, and it's only reached when this file is
|
|
55
|
+
* the entrypoint (`import.meta.main`).
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
import { type UpgradeOpts, upgrade as realUpgrade } from "./commands/upgrade.ts";
|
|
59
|
+
import { CONFIG_DIR } from "./config.ts";
|
|
60
|
+
import {
|
|
61
|
+
type HubUnitDeps,
|
|
62
|
+
type HubUnitManagerOpResult,
|
|
63
|
+
defaultHubUnitDeps,
|
|
64
|
+
isHubUnitInstalled,
|
|
65
|
+
restartHubUnit as realRestartHubUnit,
|
|
66
|
+
} from "./hub-unit.ts";
|
|
67
|
+
import {
|
|
68
|
+
type HubUpgradeStatus,
|
|
69
|
+
appendHubUpgradeStatus,
|
|
70
|
+
readHubUpgradeStatus,
|
|
71
|
+
} from "./hub-upgrade-status.ts";
|
|
72
|
+
|
|
73
|
+
export interface HubUpgradeHelperArgs {
|
|
74
|
+
/** Operation id (matches the status file's `operation_id`). */
|
|
75
|
+
operationId: string;
|
|
76
|
+
/** Closed-enum channel (validated by the endpoint before spawn). */
|
|
77
|
+
channel: "rc" | "latest";
|
|
78
|
+
/** PARACHUTE_HOME (where the status file + services.json live). */
|
|
79
|
+
configDir: string;
|
|
80
|
+
/**
|
|
81
|
+
* The PID of the hub process to gracefully terminate on the container path.
|
|
82
|
+
* Undefined on the unit-managed path (the manager owns the restart there).
|
|
83
|
+
*/
|
|
84
|
+
hubPid?: number;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Injectable side-effect seams (production wires the real impls). */
|
|
88
|
+
export interface HubUpgradeHelperDeps {
|
|
89
|
+
/** Rewrite the hub binary. Production proxies to `commands/upgrade.ts`. */
|
|
90
|
+
upgrade?: (svc: string, opts: UpgradeOpts) => Promise<number>;
|
|
91
|
+
/** Is a hub unit installed? (Decides unit-managed vs container restart.) */
|
|
92
|
+
isHubUnitInstalled?: (deps: HubUnitDeps) => boolean;
|
|
93
|
+
/** Restart the hub unit (unit-managed path). */
|
|
94
|
+
restartHubUnit?: (deps: HubUnitDeps) => HubUnitManagerOpResult;
|
|
95
|
+
/** Deps for the unit probes/ops. */
|
|
96
|
+
hubUnitDeps?: HubUnitDeps;
|
|
97
|
+
/**
|
|
98
|
+
* Send the graceful-exit signal to the hub (container path). Production
|
|
99
|
+
* `process.kill(pid, "SIGTERM")`; tests record the call.
|
|
100
|
+
*/
|
|
101
|
+
signalHub?: (pid: number, signal: NodeJS.Signals) => void;
|
|
102
|
+
/** Append to the on-disk status file (test seam). */
|
|
103
|
+
appendStatus?: (
|
|
104
|
+
configDir: string,
|
|
105
|
+
operationId: string,
|
|
106
|
+
patch: Partial<Pick<HubUpgradeStatus, "phase" | "error">>,
|
|
107
|
+
logLine?: string,
|
|
108
|
+
) => void;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* The pure helper core: rewrite the hub binary, then trigger the platform
|
|
113
|
+
* restart. Returns a terminal exit code (0 = restart dispatched / success).
|
|
114
|
+
* Records progress to the status file throughout.
|
|
115
|
+
*/
|
|
116
|
+
export async function runHubUpgradeHelper(
|
|
117
|
+
args: HubUpgradeHelperArgs,
|
|
118
|
+
deps: HubUpgradeHelperDeps = {},
|
|
119
|
+
): Promise<number> {
|
|
120
|
+
const upgrade = deps.upgrade ?? realUpgrade;
|
|
121
|
+
const unitInstalledFn = deps.isHubUnitInstalled ?? isHubUnitInstalled;
|
|
122
|
+
const restartUnit = deps.restartHubUnit ?? realRestartHubUnit;
|
|
123
|
+
const hubUnitDeps = deps.hubUnitDeps ?? defaultHubUnitDeps;
|
|
124
|
+
const signalHub = deps.signalHub ?? ((pid, signal) => process.kill(pid, signal));
|
|
125
|
+
const append = deps.appendStatus ?? appendHubUpgradeStatus;
|
|
126
|
+
const { configDir, operationId } = args;
|
|
127
|
+
|
|
128
|
+
append(
|
|
129
|
+
configDir,
|
|
130
|
+
operationId,
|
|
131
|
+
{ phase: "running" },
|
|
132
|
+
`hub-upgrade helper started (op ${operationId})`,
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
const unitManaged = unitInstalledFn(hubUnitDeps);
|
|
136
|
+
|
|
137
|
+
// ── Rewrite the binary ───────────────────────────────────────────────────
|
|
138
|
+
// REUSE commands/upgrade.ts for the channel-aware rewrite (bun add -g
|
|
139
|
+
// @openparachute/hub@<channel> / linked git-pull + downgrade guard) — but
|
|
140
|
+
// REWRITE ONLY: suppress upgrade's own restart with a no-op `restartFn`. The
|
|
141
|
+
// HELPER owns the restart explicitly below (the spec's "the helper owns the
|
|
142
|
+
// restart"), so the restart authority is unambiguous per platform rather than
|
|
143
|
+
// buried in upgrade.ts's dual-dispatch. `supervisor` is intentionally omitted
|
|
144
|
+
// so upgrade takes its detached arm with our no-op restartFn (a pure rewrite,
|
|
145
|
+
// no lifecycle restart fired).
|
|
146
|
+
const upgradeOpts: UpgradeOpts = {
|
|
147
|
+
channel: args.channel,
|
|
148
|
+
configDir,
|
|
149
|
+
restartFn: async () => 0,
|
|
150
|
+
log: (line) => append(configDir, operationId, {}, line),
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
let code: number;
|
|
154
|
+
try {
|
|
155
|
+
code = await upgrade("hub", upgradeOpts);
|
|
156
|
+
} catch (err) {
|
|
157
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
158
|
+
append(configDir, operationId, { phase: "failed", error: msg }, `hub-upgrade failed: ${msg}`);
|
|
159
|
+
return 1;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (code !== 0) {
|
|
163
|
+
append(
|
|
164
|
+
configDir,
|
|
165
|
+
operationId,
|
|
166
|
+
{ phase: "failed", error: `upgrade exited ${code}` },
|
|
167
|
+
`hub-upgrade rewrite failed (exit ${code}) — binary NOT restarted`,
|
|
168
|
+
);
|
|
169
|
+
return code;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Restart (helper-owned) ───────────────────────────────────────────────
|
|
173
|
+
if (unitManaged) {
|
|
174
|
+
// VM/Mac: restart the hub UNIT via the platform manager (systemctl restart
|
|
175
|
+
// / launchctl kickstart -k). The manager tears the old hub down (children
|
|
176
|
+
// die), starts the new binary, which re-boots every module from
|
|
177
|
+
// services.json. NEVER a PID signal — launchd KeepAlive / systemd
|
|
178
|
+
// Restart=always would fight it (R17). We mark `restarting`; we canNOT
|
|
179
|
+
// reliably write `succeeded` — the new hub's version is the SPA's success
|
|
180
|
+
// signal (it polls /health + /api/hub), not our file.
|
|
181
|
+
const res = restartUnit(hubUnitDeps);
|
|
182
|
+
for (const m of res.messages) append(configDir, operationId, {}, m);
|
|
183
|
+
if (res.outcome !== "ok") {
|
|
184
|
+
append(
|
|
185
|
+
configDir,
|
|
186
|
+
operationId,
|
|
187
|
+
{ phase: "failed", error: `hub unit restart ${res.outcome}` },
|
|
188
|
+
`hub binary rewritten but the unit restart ${res.outcome} — restart it manually`,
|
|
189
|
+
);
|
|
190
|
+
return 1;
|
|
191
|
+
}
|
|
192
|
+
append(
|
|
193
|
+
configDir,
|
|
194
|
+
operationId,
|
|
195
|
+
{ phase: "restarting" },
|
|
196
|
+
"hub unit restarted via the service manager — the SPA polls /health + version for the new binary",
|
|
197
|
+
);
|
|
198
|
+
return 0;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Container path: the rewrite landed on the persistent disk (the endpoint
|
|
202
|
+
// already gated on mode === "in-place"; an image-pinned hub never spawns a
|
|
203
|
+
// helper). Now signal the hub to exit gracefully so the container runtime
|
|
204
|
+
// re-runs CMD (`serve`) on the rewritten binary. The hub's SIGTERM handler
|
|
205
|
+
// (cli.ts serve case) stops supervised children + the server cleanly, then
|
|
206
|
+
// the process exits and the runtime brings it back.
|
|
207
|
+
append(
|
|
208
|
+
configDir,
|
|
209
|
+
operationId,
|
|
210
|
+
{ phase: "restarting" },
|
|
211
|
+
"container: signalling the hub to exit gracefully so the runtime restarts it on the new binary",
|
|
212
|
+
);
|
|
213
|
+
if (args.hubPid !== undefined && Number.isFinite(args.hubPid) && args.hubPid > 0) {
|
|
214
|
+
try {
|
|
215
|
+
signalHub(args.hubPid, "SIGTERM");
|
|
216
|
+
} catch (err) {
|
|
217
|
+
// The hub may have already exited (a racing restart). Not fatal — the
|
|
218
|
+
// rewrite is done; the runtime will bring it back on the new binary
|
|
219
|
+
// regardless. Record + succeed.
|
|
220
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
221
|
+
append(configDir, operationId, {}, `hub graceful-exit signal noted as already-gone (${msg})`);
|
|
222
|
+
}
|
|
223
|
+
} else {
|
|
224
|
+
append(
|
|
225
|
+
configDir,
|
|
226
|
+
operationId,
|
|
227
|
+
{},
|
|
228
|
+
"no hub pid provided — relying on the platform runtime's own restart to pick up the new binary",
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
return 0;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Entrypoint (only runs when invoked directly as `bun hub-upgrade-helper.ts`) ──
|
|
235
|
+
|
|
236
|
+
function parseArgs(argv: string[]): HubUpgradeHelperArgs | { error: string } {
|
|
237
|
+
let operationId: string | undefined;
|
|
238
|
+
let channel: string | undefined;
|
|
239
|
+
let configDir: string | undefined;
|
|
240
|
+
let hubPid: number | undefined;
|
|
241
|
+
for (let i = 0; i < argv.length; i++) {
|
|
242
|
+
const arg = argv[i];
|
|
243
|
+
const next = argv[i + 1];
|
|
244
|
+
switch (arg) {
|
|
245
|
+
// Each value-bearing flag guards `next !== undefined` so a truncated argv
|
|
246
|
+
// (flag last in argv, no value) leaves the value unset — surfaced as a
|
|
247
|
+
// clear "required" error below — rather than silently consuming the next
|
|
248
|
+
// flag as its value.
|
|
249
|
+
case "--op":
|
|
250
|
+
if (next !== undefined) operationId = next;
|
|
251
|
+
i++;
|
|
252
|
+
break;
|
|
253
|
+
case "--channel":
|
|
254
|
+
if (next !== undefined) channel = next;
|
|
255
|
+
i++;
|
|
256
|
+
break;
|
|
257
|
+
case "--config-dir":
|
|
258
|
+
if (next !== undefined) configDir = next;
|
|
259
|
+
i++;
|
|
260
|
+
break;
|
|
261
|
+
case "--hub-pid":
|
|
262
|
+
if (next !== undefined) hubPid = Number(next);
|
|
263
|
+
i++;
|
|
264
|
+
break;
|
|
265
|
+
default:
|
|
266
|
+
return { error: `unexpected argument "${arg}"` };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
if (!operationId) return { error: "--op <id> is required" };
|
|
270
|
+
if (channel !== "rc" && channel !== "latest") {
|
|
271
|
+
return { error: `--channel must be "rc" or "latest" (got "${channel ?? ""}")` };
|
|
272
|
+
}
|
|
273
|
+
const resolved: HubUpgradeHelperArgs = {
|
|
274
|
+
operationId,
|
|
275
|
+
channel,
|
|
276
|
+
configDir: configDir ?? CONFIG_DIR,
|
|
277
|
+
};
|
|
278
|
+
if (hubPid !== undefined && Number.isFinite(hubPid)) resolved.hubPid = hubPid;
|
|
279
|
+
return resolved;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function main(): Promise<number> {
|
|
283
|
+
const parsed = parseArgs(process.argv.slice(2));
|
|
284
|
+
if ("error" in parsed) {
|
|
285
|
+
console.error(`hub-upgrade-helper: ${parsed.error}`);
|
|
286
|
+
return 2;
|
|
287
|
+
}
|
|
288
|
+
// If the endpoint never seeded the status file (it always should), bail
|
|
289
|
+
// visibly rather than silently no-op'ing.
|
|
290
|
+
if (!readHubUpgradeStatus(parsed.configDir)) {
|
|
291
|
+
console.error(
|
|
292
|
+
`hub-upgrade-helper: no status file for op ${parsed.operationId} under ${parsed.configDir}`,
|
|
293
|
+
);
|
|
294
|
+
return 2;
|
|
295
|
+
}
|
|
296
|
+
return await runHubUpgradeHelper(parsed);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (import.meta.main) {
|
|
300
|
+
main()
|
|
301
|
+
.then((code) => process.exit(code))
|
|
302
|
+
.catch((err) => {
|
|
303
|
+
console.error("hub-upgrade-helper: fatal", err);
|
|
304
|
+
process.exit(1);
|
|
305
|
+
});
|
|
306
|
+
}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-place-vs-redeploy detection for `POST /api/hub/upgrade` (design
|
|
3
|
+
* 2026-06-01 §5.3 — the OPEN implementation detail flagged for D4).
|
|
4
|
+
*
|
|
5
|
+
* The hub-upgrade endpoint must decide, BEFORE it spawns the detached helper,
|
|
6
|
+
* whether an on-disk binary rewrite (`bun add -g @openparachute/hub@<channel>`
|
|
7
|
+
* / a linked git-pull) will actually PERSIST across the next restart:
|
|
8
|
+
*
|
|
9
|
+
* - **in-place** — the hub binary lives on a writable, persistent location
|
|
10
|
+
* (a bun-linked checkout, or a `bun add -g` install under a $BUN_INSTALL
|
|
11
|
+
* that survives restart). A rewrite + restart genuinely upgrades the hub.
|
|
12
|
+
*
|
|
13
|
+
* - **redeploy-required** — the hub binary is baked into a container image
|
|
14
|
+
* (Render/Fly image-pinned). `bun add -g` would write to the image's
|
|
15
|
+
* ephemeral layer and be LOST on the next container restart, so the rewrite
|
|
16
|
+
* is a misleading no-op. The honest path is a platform redeploy from the
|
|
17
|
+
* operator's dashboard, NOT a false "upgraded."
|
|
18
|
+
*
|
|
19
|
+
* ── THE HEURISTIC (conservative; flagged for review) ───────────────────────
|
|
20
|
+
*
|
|
21
|
+
* Signals, in priority order:
|
|
22
|
+
*
|
|
23
|
+
* 1. **bun-linked** (`detectHubInstallSource` → `bun-linked`): the hub runs
|
|
24
|
+
* from a git checkout on disk. A `git pull` in that checkout is always
|
|
25
|
+
* persistent (the checkout is the operator's own filesystem, not an image
|
|
26
|
+
* layer). → **in-place**. This is Aaron's dev box + every VM/Mac that
|
|
27
|
+
* bun-linked the hub.
|
|
28
|
+
*
|
|
29
|
+
* 2. **container, BUN_INSTALL on the persistent disk**: a container (the
|
|
30
|
+
* Render Blueprint pins `PARACHUTE_HOME=/parachute`) whose `$BUN_INSTALL`
|
|
31
|
+
* points INSIDE the persistent mount (`/parachute/...` — the same place
|
|
32
|
+
* runtime module installs land via `/api/modules/:short/install`). A
|
|
33
|
+
* `bun add -g` there writes to the mounted volume, which survives a
|
|
34
|
+
* container restart. → **in-place**. This is the "hub installed to the
|
|
35
|
+
* persistent disk" arm §5.3 calls out.
|
|
36
|
+
*
|
|
37
|
+
* 3. **container, BUN_INSTALL NOT on the persistent disk** (or unset): the
|
|
38
|
+
* hub is image-pinned — `bun add -g` writes to the ephemeral image layer
|
|
39
|
+
* and is lost on restart. → **redeploy-required**. This is the default
|
|
40
|
+
* Render/Fly image shape today (the Dockerfile `bun add`s the hub into the
|
|
41
|
+
* image; $BUN_INSTALL defaults to `/root/.bun`, not the mount).
|
|
42
|
+
*
|
|
43
|
+
* 4. **npm, non-container**: a `bun add -g` install on a VM/Mac (not a
|
|
44
|
+
* container). The global bun prefix is on the operator's own writable
|
|
45
|
+
* filesystem → persistent. → **in-place**.
|
|
46
|
+
*
|
|
47
|
+
* 5. **unknown / anything else**: we couldn't classify the install source.
|
|
48
|
+
* → **redeploy-required** (the honest fallback — §5.3: "When uncertain,
|
|
49
|
+
* prefer redeploy-required over a silent no-op"). The SPA then tells the
|
|
50
|
+
* operator to redeploy rather than promising an upgrade that may evaporate.
|
|
51
|
+
*
|
|
52
|
+
* ── FALSE-POSITIVE / FALSE-NEGATIVE RISK (for the reviewer) ─────────────────
|
|
53
|
+
*
|
|
54
|
+
* - **False "in-place" (the dangerous direction)** would tell the operator
|
|
55
|
+
* "upgraded" while the rewrite silently evaporates on the next restart.
|
|
56
|
+
* The only path that risks this is signal #2: a container whose
|
|
57
|
+
* `$BUN_INSTALL` is under the persistent mount but where the operator
|
|
58
|
+
* mounted the disk read-only, or where the bun cache (not the install) is
|
|
59
|
+
* what's on the mount. We mitigate by requiring `$BUN_INSTALL` to be a
|
|
60
|
+
* descendant of the persistent-home prefix — the strictest signal available
|
|
61
|
+
* without probing writability (which we can't do reliably from the request
|
|
62
|
+
* handler before spawning the helper). A residual risk remains; see the
|
|
63
|
+
* note in `detectHubUpgradeMode` on tightening this with a write-probe in
|
|
64
|
+
* the helper if it proves wrong in the field.
|
|
65
|
+
*
|
|
66
|
+
* - **False "redeploy-required" (the safe direction)** merely tells the
|
|
67
|
+
* operator to redeploy when an in-place upgrade would have worked — annoying
|
|
68
|
+
* but never destructive. Signals #3/#5 deliberately err here.
|
|
69
|
+
*
|
|
70
|
+
* Pure + injectable: no I/O beyond the (already-injectable) install-source
|
|
71
|
+
* detection. The env + srcDir are passed in so tests drive every branch.
|
|
72
|
+
*/
|
|
73
|
+
|
|
74
|
+
import { dirname } from "node:path";
|
|
75
|
+
import { fileURLToPath } from "node:url";
|
|
76
|
+
import { CONTAINER_HOME } from "./hub-control.ts";
|
|
77
|
+
import {
|
|
78
|
+
type DetectInstallSourceDeps,
|
|
79
|
+
type InstallSource,
|
|
80
|
+
detectHubInstallSource,
|
|
81
|
+
} from "./install-source.ts";
|
|
82
|
+
|
|
83
|
+
/** The two upgrade modes the SPA branches on. */
|
|
84
|
+
export type HubUpgradeMode = "in-place" | "redeploy-required";
|
|
85
|
+
|
|
86
|
+
export interface DetectHubUpgradeModeArgs {
|
|
87
|
+
/** Override `process.env` lookups (test seam). */
|
|
88
|
+
env?: Record<string, string | undefined>;
|
|
89
|
+
/**
|
|
90
|
+
* Directory used to locate the hub's package.json + classify install source.
|
|
91
|
+
* Defaults to the running source dir. Test seam.
|
|
92
|
+
*/
|
|
93
|
+
hubSrcDir?: string;
|
|
94
|
+
/** Pass-through deps for `detectHubInstallSource` (test seam). */
|
|
95
|
+
installSourceDeps?: DetectInstallSourceDeps;
|
|
96
|
+
/**
|
|
97
|
+
* Pre-classified install source — lets a caller that already ran
|
|
98
|
+
* `detectHubInstallSource` (e.g. the `/api/hub` handler) avoid a second
|
|
99
|
+
* filesystem walk. When set, `hubSrcDir`/`installSourceDeps` are ignored.
|
|
100
|
+
*/
|
|
101
|
+
source?: InstallSource;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface HubUpgradeModeResult {
|
|
105
|
+
mode: HubUpgradeMode;
|
|
106
|
+
/** The classified install source (surfaced for diagnostics + the SPA copy). */
|
|
107
|
+
source: InstallSource["kind"] | "container";
|
|
108
|
+
/** Short human-readable reason — surfaced in the 202 body + SPA + tests. */
|
|
109
|
+
reason: string;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* The Render Blueprint pins `PARACHUTE_HOME=/parachute` — the single most
|
|
114
|
+
* reliable container-mode signal the hub has (mirrors `api-hub.ts`'s
|
|
115
|
+
* container override; both use the shared `CONTAINER_HOME` constant). Fly uses
|
|
116
|
+
* the same pin via the shared image.
|
|
117
|
+
*/
|
|
118
|
+
function isContainer(env: Record<string, string | undefined>): boolean {
|
|
119
|
+
return env.PARACHUTE_HOME === CONTAINER_HOME;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* True when `$BUN_INSTALL` is a descendant of the persistent-home prefix —
|
|
124
|
+
* i.e. `bun add -g` writes land on the mounted volume that survives a
|
|
125
|
+
* container restart. The persistent home on the Render Blueprint is
|
|
126
|
+
* `/parachute`; we treat any `$BUN_INSTALL` under it as persistent.
|
|
127
|
+
*
|
|
128
|
+
* Strict (descendant-of), not a substring match, so a stray `/parachute` in
|
|
129
|
+
* an unrelated path component can't false-positive.
|
|
130
|
+
*/
|
|
131
|
+
function bunInstallOnPersistentDisk(env: Record<string, string | undefined>): boolean {
|
|
132
|
+
const bunInstall = env.BUN_INSTALL;
|
|
133
|
+
const home = env.PARACHUTE_HOME;
|
|
134
|
+
if (!bunInstall || !home) return false;
|
|
135
|
+
if (bunInstall === home) return true;
|
|
136
|
+
const prefix = home.endsWith("/") ? home : `${home}/`;
|
|
137
|
+
return bunInstall.startsWith(prefix);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Decide whether the hub is in-place-upgradable (rewrite + restart works) or
|
|
142
|
+
* image-pinned (redeploy-only). See the module docstring for the full
|
|
143
|
+
* heuristic + risk analysis.
|
|
144
|
+
*/
|
|
145
|
+
export function detectHubUpgradeMode(args: DetectHubUpgradeModeArgs = {}): HubUpgradeModeResult {
|
|
146
|
+
const env = args.env ?? process.env;
|
|
147
|
+
const hubSrcDir = args.hubSrcDir ?? dirname(fileURLToPath(import.meta.url));
|
|
148
|
+
const source = args.source ?? detectHubInstallSource(hubSrcDir, args.installSourceDeps);
|
|
149
|
+
|
|
150
|
+
const container = isContainer(env);
|
|
151
|
+
|
|
152
|
+
// Signal 1: bun-linked checkout. A `git pull` in the operator's own checkout
|
|
153
|
+
// is always persistent — even inside a container the checkout dir is on the
|
|
154
|
+
// operator's filesystem, not the ephemeral image layer. (In practice a
|
|
155
|
+
// container runs from /app/src image-pinned, not a checkout — but if it IS a
|
|
156
|
+
// checkout, in-place is correct.)
|
|
157
|
+
if (source.kind === "bun-linked") {
|
|
158
|
+
return {
|
|
159
|
+
mode: "in-place",
|
|
160
|
+
source: container ? "container" : "bun-linked",
|
|
161
|
+
reason: "bun-linked checkout — git pull + restart persists on disk",
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (container) {
|
|
166
|
+
// Signal 2: container with $BUN_INSTALL on the persistent mount → the
|
|
167
|
+
// `bun add -g` write survives a container restart.
|
|
168
|
+
if (bunInstallOnPersistentDisk(env)) {
|
|
169
|
+
return {
|
|
170
|
+
mode: "in-place",
|
|
171
|
+
source: "container",
|
|
172
|
+
reason:
|
|
173
|
+
"container with $BUN_INSTALL on the persistent disk — bun add -g persists across restart",
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
// Signal 3: container, image-pinned. `bun add -g` writes to the ephemeral
|
|
177
|
+
// image layer → lost on restart. The honest path is a platform redeploy.
|
|
178
|
+
//
|
|
179
|
+
// NOTE (reviewer): we could tighten signal-2 confidence by having the
|
|
180
|
+
// helper write-probe `$BUN_INSTALL` before committing to the rewrite. We
|
|
181
|
+
// deliberately do the cheaper env-based classification here and accept
|
|
182
|
+
// erring toward redeploy-required (the safe direction) when uncertain.
|
|
183
|
+
return {
|
|
184
|
+
mode: "redeploy-required",
|
|
185
|
+
source: "container",
|
|
186
|
+
reason:
|
|
187
|
+
"container image-pinned ($BUN_INSTALL not on the persistent disk) — bun add -g would be lost on the next container restart; redeploy from your platform dashboard instead",
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Signal 4: npm install on a VM/Mac (non-container). The global bun prefix is
|
|
192
|
+
// on the operator's own writable filesystem → persistent.
|
|
193
|
+
if (source.kind === "npm") {
|
|
194
|
+
return {
|
|
195
|
+
mode: "in-place",
|
|
196
|
+
source: "npm",
|
|
197
|
+
reason: "npm-installed on a persistent filesystem — bun add -g persists",
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Signal 5: unknown. Honest fallback — prefer redeploy-required over a silent
|
|
202
|
+
// no-op (§5.3).
|
|
203
|
+
return {
|
|
204
|
+
mode: "redeploy-required",
|
|
205
|
+
source: "unknown",
|
|
206
|
+
reason:
|
|
207
|
+
"could not classify the hub install source — redeploy from your platform dashboard to be safe",
|
|
208
|
+
};
|
|
209
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The on-disk status file for an in-flight `POST /api/hub/upgrade` operation
|
|
3
|
+
* (design 2026-06-01 §5.3 / D4).
|
|
4
|
+
*
|
|
5
|
+
* WHY A FILE, NOT THE IN-MEMORY OPERATIONS REGISTRY: a hub-upgrade tears the
|
|
6
|
+
* hub DOWN mid-operation (the whole point — the new binary has to take over).
|
|
7
|
+
* The module-ops `InMemoryOperationsRegistry` is process-local and evaporates
|
|
8
|
+
* when the hub restarts, so it CANNOT carry hub-upgrade progress across the
|
|
9
|
+
* restart the SPA is polling through. A JSON file under `PARACHUTE_HOME`
|
|
10
|
+
* survives the hub bounce: the detached helper writes progress to it while the
|
|
11
|
+
* old hub is dying, and the NEW hub reads it back to answer
|
|
12
|
+
* `GET /api/hub/upgrade/status`. (On a container the file lives on the
|
|
13
|
+
* persistent disk — same place the DB + module installs live.)
|
|
14
|
+
*
|
|
15
|
+
* The file is single-slot (one upgrade at a time — there is only one hub). A
|
|
16
|
+
* stale file from a prior upgrade is simply overwritten when a new one starts.
|
|
17
|
+
*
|
|
18
|
+
* Wire shape mirrors the module-ops `Operation` enough that the SPA's polling
|
|
19
|
+
* code reads familiarly: `status` + `log` + `error` + timestamps, plus the
|
|
20
|
+
* hub-specific `mode` / `target_version` / `channel` the SPA branches on.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from "node:fs";
|
|
24
|
+
import { dirname, join } from "node:path";
|
|
25
|
+
import type { HubUpgradeMode } from "./hub-upgrade-mode.ts";
|
|
26
|
+
|
|
27
|
+
/** Phases of a hub-upgrade, polled by the SPA. */
|
|
28
|
+
export type HubUpgradeStatusPhase =
|
|
29
|
+
/** The endpoint accepted the request + spawned the helper; not started yet. */
|
|
30
|
+
| "pending"
|
|
31
|
+
/** The helper is rewriting the binary / about to restart. */
|
|
32
|
+
| "running"
|
|
33
|
+
/**
|
|
34
|
+
* The rewrite + restart were dispatched. The SPA now switches to polling
|
|
35
|
+
* `/health` + the reported version directly — the helper may not get to
|
|
36
|
+
* write a terminal state (the hub it would report to is being torn down).
|
|
37
|
+
*/
|
|
38
|
+
| "restarting"
|
|
39
|
+
/**
|
|
40
|
+
* Terminal success. RESERVED / SPA-INFERRED: the helper does NOT write this —
|
|
41
|
+
* it can't reliably record `succeeded` before the hub bounce tears down the
|
|
42
|
+
* process. The SPA infers success from `/health` + the reported version
|
|
43
|
+
* (HubUpgradeCard), not from this phase. Kept in the enum so the SPA success
|
|
44
|
+
* detection + the 409 in-flight guard's terminal-phase check can reference it.
|
|
45
|
+
*/
|
|
46
|
+
| "succeeded"
|
|
47
|
+
/** Terminal failure (rewrite failed, downgrade refused, etc.). */
|
|
48
|
+
| "failed"
|
|
49
|
+
/**
|
|
50
|
+
* The endpoint determined the hub is image-pinned (redeploy-required) and did
|
|
51
|
+
* NOT spawn a helper — there's no in-place upgrade to run. The SPA shows
|
|
52
|
+
* "redeploy from your platform dashboard" instead of a progress spinner.
|
|
53
|
+
*/
|
|
54
|
+
| "redeploy-required";
|
|
55
|
+
|
|
56
|
+
export interface HubUpgradeStatus {
|
|
57
|
+
/** Opaque id minted by the endpoint; echoed in the 202 body for polling. */
|
|
58
|
+
operation_id: string;
|
|
59
|
+
phase: HubUpgradeStatusPhase;
|
|
60
|
+
/** In-place vs redeploy-required (the §5.3 detection result). */
|
|
61
|
+
mode: HubUpgradeMode;
|
|
62
|
+
/** The version the operator is currently on (read at request time). */
|
|
63
|
+
current_version: string;
|
|
64
|
+
/** Best-effort resolved target version (`npm view`), or null if unknown. */
|
|
65
|
+
target_version: string | null;
|
|
66
|
+
/** Closed-enum channel the rewrite targets. */
|
|
67
|
+
channel: "rc" | "latest";
|
|
68
|
+
/** Sparse progress log, appended by the helper. */
|
|
69
|
+
log: string[];
|
|
70
|
+
/** Error message when `phase === "failed"`. */
|
|
71
|
+
error?: string;
|
|
72
|
+
started_at: string;
|
|
73
|
+
finished_at?: string;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Path of the single-slot hub-upgrade status file under `configDir`. */
|
|
77
|
+
export function hubUpgradeStatusPath(configDir: string): string {
|
|
78
|
+
return join(configDir, "hub-upgrade-status.json");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Atomically write the status file (write-temp + rename) so a poll that lands
|
|
83
|
+
* mid-write never sees a half-serialized JSON. Creates the parent dir if
|
|
84
|
+
* absent (a never-initialized PARACHUTE_HOME).
|
|
85
|
+
*/
|
|
86
|
+
export function writeHubUpgradeStatus(configDir: string, status: HubUpgradeStatus): void {
|
|
87
|
+
const path = hubUpgradeStatusPath(configDir);
|
|
88
|
+
const dir = dirname(path);
|
|
89
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
90
|
+
const tmp = `${path}.tmp`;
|
|
91
|
+
writeFileSync(tmp, `${JSON.stringify(status, null, 2)}\n`, { mode: 0o600 });
|
|
92
|
+
renameSync(tmp, path);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Read the current status file, or null when none exists / is unreadable.
|
|
97
|
+
* Lenient: a malformed file reads as null (the SPA falls back to polling
|
|
98
|
+
* `/health` directly), never throws.
|
|
99
|
+
*/
|
|
100
|
+
export function readHubUpgradeStatus(configDir: string): HubUpgradeStatus | null {
|
|
101
|
+
const path = hubUpgradeStatusPath(configDir);
|
|
102
|
+
if (!existsSync(path)) return null;
|
|
103
|
+
try {
|
|
104
|
+
const parsed = JSON.parse(readFileSync(path, "utf8")) as unknown;
|
|
105
|
+
if (parsed && typeof parsed === "object" && "operation_id" in parsed) {
|
|
106
|
+
return parsed as HubUpgradeStatus;
|
|
107
|
+
}
|
|
108
|
+
return null;
|
|
109
|
+
} catch {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Append a log line + (optionally) advance the phase, persisting atomically.
|
|
116
|
+
* Used by the helper to record progress the SPA polls. A missing file (the
|
|
117
|
+
* endpoint should always seed it first) is a no-op rather than a throw — the
|
|
118
|
+
* helper's job is the upgrade, not bookkeeping.
|
|
119
|
+
*
|
|
120
|
+
* OPERATION GUARD: `operationId` is the op the caller (helper) was spawned
|
|
121
|
+
* with. The status file is single-slot — a newer `POST /api/hub/upgrade` will
|
|
122
|
+
* overwrite it with a fresh `operation_id`. A still-running helper from the
|
|
123
|
+
* SUPERSEDED operation must not clobber the newer operation's status with its
|
|
124
|
+
* stale progress. So we only write when the on-disk `operation_id` still
|
|
125
|
+
* matches `operationId`; otherwise the append is a NO-OP (the newer operation
|
|
126
|
+
* owns the slot now). This makes concurrent-upgrade status-file corruption
|
|
127
|
+
* impossible from the helper side.
|
|
128
|
+
*/
|
|
129
|
+
export function appendHubUpgradeStatus(
|
|
130
|
+
configDir: string,
|
|
131
|
+
operationId: string,
|
|
132
|
+
patch: Partial<Pick<HubUpgradeStatus, "phase" | "error">>,
|
|
133
|
+
logLine?: string,
|
|
134
|
+
): void {
|
|
135
|
+
const current = readHubUpgradeStatus(configDir);
|
|
136
|
+
if (!current) return;
|
|
137
|
+
// A newer operation superseded this one — do NOT clobber its status.
|
|
138
|
+
if (current.operation_id !== operationId) return;
|
|
139
|
+
const next: HubUpgradeStatus = { ...current };
|
|
140
|
+
if (patch.phase) next.phase = patch.phase;
|
|
141
|
+
if (patch.error !== undefined) next.error = patch.error;
|
|
142
|
+
if (logLine) next.log = [...current.log, logLine];
|
|
143
|
+
// `succeeded` is reserved/SPA-inferred (see HubUpgradeStatusPhase) — the
|
|
144
|
+
// helper can't reliably write it before the hub bounce, so in practice only
|
|
145
|
+
// `failed` reaches this branch. Both terminal phases stamp `finished_at`.
|
|
146
|
+
if (patch.phase === "succeeded" || patch.phase === "failed") {
|
|
147
|
+
next.finished_at = new Date().toISOString();
|
|
148
|
+
}
|
|
149
|
+
writeHubUpgradeStatus(configDir, next);
|
|
150
|
+
}
|