@openparachute/hub 0.6.2 → 0.6.3-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/api-modules-ops.test.ts +359 -3
- package/src/__tests__/api-modules.test.ts +54 -0
- package/src/__tests__/hub-unit.test.ts +574 -0
- package/src/__tests__/init.test.ts +219 -2
- package/src/__tests__/lifecycle.test.ts +423 -0
- package/src/__tests__/managed-unit.test.ts +575 -0
- package/src/__tests__/module-ops-client.test.ts +556 -0
- package/src/__tests__/port-probe.test.ts +23 -0
- package/src/__tests__/setup-wizard.test.ts +130 -0
- package/src/__tests__/status-supervisor.test.ts +569 -0
- package/src/__tests__/supervisor.test.ts +471 -6
- package/src/api-modules-ops.ts +221 -0
- package/src/api-modules.ts +18 -2
- package/src/cli.ts +14 -4
- package/src/cloudflare/connector-service.ts +117 -322
- package/src/commands/init.ts +225 -12
- package/src/commands/lifecycle.ts +366 -38
- package/src/commands/serve-boot.ts +71 -25
- package/src/commands/status.ts +596 -49
- package/src/hub-server.ts +11 -0
- package/src/hub-unit.ts +735 -0
- package/src/managed-unit.ts +674 -0
- package/src/module-ops-client.ts +457 -0
- package/src/port-probe.ts +50 -0
- package/src/setup-wizard.ts +80 -1
- package/src/supervisor.ts +360 -14
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { existsSync, openSync, readFileSync } from "node:fs";
|
|
2
|
-
import { Socket } from "node:net";
|
|
3
2
|
import { join } from "node:path";
|
|
4
3
|
import {
|
|
5
4
|
MissingDependencyError,
|
|
@@ -20,8 +19,30 @@ import {
|
|
|
20
19
|
} from "../hub-control.ts";
|
|
21
20
|
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
22
21
|
import { HUB_ORIGIN_ENV, deriveHubOrigin } from "../hub-origin.ts";
|
|
22
|
+
import {
|
|
23
|
+
type EnsureHubUnitOpts,
|
|
24
|
+
type EnsureHubUnitResult,
|
|
25
|
+
HUB_UNIT_DEFAULT_PORT,
|
|
26
|
+
type HubUnitDeps,
|
|
27
|
+
type HubUnitManagerOpResult,
|
|
28
|
+
defaultHubUnitDeps,
|
|
29
|
+
ensureHubUnit as ensureHubUnitImpl,
|
|
30
|
+
isHubUnitInstalled,
|
|
31
|
+
restartHubUnit as restartHubUnitImpl,
|
|
32
|
+
stopHubUnit as stopHubUnitImpl,
|
|
33
|
+
} from "../hub-unit.ts";
|
|
23
34
|
import { ModuleManifestError, readModuleManifest } from "../module-manifest.ts";
|
|
35
|
+
import {
|
|
36
|
+
type DriveModuleOpDeps,
|
|
37
|
+
type ModuleOp,
|
|
38
|
+
ModuleOpHttpError,
|
|
39
|
+
type ModuleOpResult,
|
|
40
|
+
NoOperatorTokenError,
|
|
41
|
+
OperatorTokenExpiredError,
|
|
42
|
+
driveModuleOp as driveModuleOpImpl,
|
|
43
|
+
} from "../module-ops-client.ts";
|
|
24
44
|
import { type OperatorIssuerHealStatus, selfHealOperatorTokenIssuer } from "../operator-token.ts";
|
|
45
|
+
import { type PortListeningFn, defaultPortListening } from "../port-probe.ts";
|
|
25
46
|
import {
|
|
26
47
|
type AliveFn,
|
|
27
48
|
clearPid,
|
|
@@ -98,42 +119,16 @@ export type KillFn = (pid: number, signal: NodeJS.Signals | number) => void;
|
|
|
98
119
|
export type SleepFn = (ms: number) => Promise<void>;
|
|
99
120
|
|
|
100
121
|
/**
|
|
101
|
-
*
|
|
102
|
-
*
|
|
103
|
-
*
|
|
104
|
-
*
|
|
105
|
-
*
|
|
106
|
-
*
|
|
107
|
-
*
|
|
108
|
-
*
|
|
109
|
-
* Tests inject a deterministic stub; production uses `defaultPortListening`.
|
|
122
|
+
* Port-readiness probe seam + its production impl now live in `port-probe.ts`
|
|
123
|
+
* (design 2026-06-01 §6.5) so the supervisor can share the exact same TCP
|
|
124
|
+
* connect-probe without dragging lifecycle's heavy import graph. Re-exported
|
|
125
|
+
* here so this module's public API (and its tests) are unchanged. Pairs with
|
|
126
|
+
* the spawn-then-die settle (hub#194) to catch the alive-but-never-bound shape
|
|
127
|
+
* (hub#487): a service that clears the liveness check but never binds its port
|
|
128
|
+
* because it's already held — `alive(pid)` says "running" while `status` shows
|
|
129
|
+
* it inactive because nothing answers on the port.
|
|
110
130
|
*/
|
|
111
|
-
export type PortListeningFn
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* Connect-probe: open a TCP socket to 127.0.0.1:<port> and see if it's
|
|
115
|
-
* accepted. A successful connect means *something* is listening; we close
|
|
116
|
-
* immediately. Connection refused / timeout means nothing is bound yet.
|
|
117
|
-
* `node:net` rather than `Bun.connect` because the latter has no clean
|
|
118
|
-
* "connection refused → false" without a custom socket handler, and the net
|
|
119
|
-
* Socket's `error`/`connect` events map directly onto the boolean we want.
|
|
120
|
-
*/
|
|
121
|
-
export const defaultPortListening: PortListeningFn = (port) =>
|
|
122
|
-
new Promise((resolve) => {
|
|
123
|
-
const socket = new Socket();
|
|
124
|
-
let settled = false;
|
|
125
|
-
const done = (listening: boolean) => {
|
|
126
|
-
if (settled) return;
|
|
127
|
-
settled = true;
|
|
128
|
-
socket.destroy();
|
|
129
|
-
resolve(listening);
|
|
130
|
-
};
|
|
131
|
-
socket.setTimeout(1000);
|
|
132
|
-
socket.once("connect", () => done(true));
|
|
133
|
-
socket.once("timeout", () => done(false));
|
|
134
|
-
socket.once("error", () => done(false));
|
|
135
|
-
socket.connect(port, "127.0.0.1");
|
|
136
|
-
});
|
|
131
|
+
export { type PortListeningFn, defaultPortListening };
|
|
137
132
|
|
|
138
133
|
/**
|
|
139
134
|
* Group-aware liveness: returns true if the process group (pgid == pid)
|
|
@@ -311,6 +306,58 @@ export interface LifecycleOpts {
|
|
|
311
306
|
log: (line: string) => void;
|
|
312
307
|
}) => Promise<OperatorIssuerHealStatus>;
|
|
313
308
|
};
|
|
309
|
+
/**
|
|
310
|
+
* Phase 3b supervisor-path seams (design §3.3). When a hub UNIT is installed
|
|
311
|
+
* (launchd/systemd/container — detected via {@link isHubUnitInstalled}),
|
|
312
|
+
* `start/stop/restart` drive the RUNNING hub's in-process Supervisor over the
|
|
313
|
+
* loopback module-ops API instead of spawning detached pidfile daemons. The
|
|
314
|
+
* detached arm (`spawner`/`hub.ensureRunning`/`hub.stop`) remains the no-unit
|
|
315
|
+
* fallback until Phase 5 retires it.
|
|
316
|
+
*
|
|
317
|
+
* Everything here is injectable so tests can (a) force the unit-installed
|
|
318
|
+
* branch without a real launchd/systemd, and (b) assert the module-ops /
|
|
319
|
+
* manager calls without a live hub. Production wires the real
|
|
320
|
+
* {@link driveModuleOp} / {@link ensureHubUnit} / {@link stopHubUnit} /
|
|
321
|
+
* {@link restartHubUnit} against an opened hub.db + the resolved hub origin.
|
|
322
|
+
*/
|
|
323
|
+
supervisor?: {
|
|
324
|
+
/**
|
|
325
|
+
* Is a hub unit installed (the dual-dispatch discriminant)? Production
|
|
326
|
+
* uses `isHubUnitInstalled(hubUnitDeps)`. Tests set this `true`/`false`
|
|
327
|
+
* directly to pick the branch deterministically. When set, it wins over
|
|
328
|
+
* the `hubUnitDeps`-derived detection.
|
|
329
|
+
*/
|
|
330
|
+
unitInstalled?: boolean;
|
|
331
|
+
/** Deps for the real `isHubUnitInstalled` probe + the hub-unit manager ops. */
|
|
332
|
+
hubUnitDeps?: HubUnitDeps;
|
|
333
|
+
/** Drive a per-module op against the running hub (reads operator.token). */
|
|
334
|
+
driveModuleOp?: (
|
|
335
|
+
short: string,
|
|
336
|
+
op: ModuleOp,
|
|
337
|
+
deps: DriveModuleOpDeps,
|
|
338
|
+
) => Promise<ModuleOpResult>;
|
|
339
|
+
/** Ensure the hub unit is up before a module op (§3.2). */
|
|
340
|
+
ensureHubUnit?: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
|
|
341
|
+
/** Stop the hub unit via the platform manager (NEVER a PID signal, §3.3). */
|
|
342
|
+
stopHubUnit?: (deps: HubUnitDeps) => HubUnitManagerOpResult;
|
|
343
|
+
/** Restart the hub unit via the platform manager (NEVER a PID signal, §3.3). */
|
|
344
|
+
restartHubUnit?: (deps: HubUnitDeps) => HubUnitManagerOpResult;
|
|
345
|
+
/**
|
|
346
|
+
* Probe whether the loopback hub answers `/health`. Used by `stop <svc>`:
|
|
347
|
+
* if the hub is down, the supervised module is already down (children die
|
|
348
|
+
* with the hub) → report "already stopped" WITHOUT starting the hub.
|
|
349
|
+
* Production reuses the hub-unit deps' `probeHealth`.
|
|
350
|
+
*/
|
|
351
|
+
probeHubHealth?: (port: number) => Promise<boolean>;
|
|
352
|
+
/**
|
|
353
|
+
* Open the hub DB used to validate/auto-rotate the operator token in
|
|
354
|
+
* `driveModuleOp`. Production opens `<configDir>/hub.db`; tests inject an
|
|
355
|
+
* in-memory/seeded db. Returns a handle the caller closes.
|
|
356
|
+
*/
|
|
357
|
+
openDb?: (configDir: string) => import("bun:sqlite").Database;
|
|
358
|
+
/** Loopback hub base URL override (default derives from the hub port). */
|
|
359
|
+
baseUrl?: string;
|
|
360
|
+
};
|
|
314
361
|
}
|
|
315
362
|
|
|
316
363
|
interface Resolved {
|
|
@@ -337,6 +384,21 @@ interface Resolved {
|
|
|
337
384
|
configDir: string;
|
|
338
385
|
log: (line: string) => void;
|
|
339
386
|
}) => Promise<OperatorIssuerHealStatus>;
|
|
387
|
+
sup: ResolvedSupervisor;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** Resolved Phase 3b supervisor-path seams (see `LifecycleOpts.supervisor`). */
|
|
391
|
+
interface ResolvedSupervisor {
|
|
392
|
+
/** Whether a hub unit is installed — the dual-dispatch discriminant. */
|
|
393
|
+
unitInstalled: boolean;
|
|
394
|
+
hubUnitDeps: HubUnitDeps;
|
|
395
|
+
driveModuleOp: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
|
|
396
|
+
ensureHubUnit: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
|
|
397
|
+
stopHubUnit: (deps: HubUnitDeps) => HubUnitManagerOpResult;
|
|
398
|
+
restartHubUnit: (deps: HubUnitDeps) => HubUnitManagerOpResult;
|
|
399
|
+
probeHubHealth: (port: number) => Promise<boolean>;
|
|
400
|
+
openDb: (configDir: string) => import("bun:sqlite").Database;
|
|
401
|
+
baseUrl: string | undefined;
|
|
340
402
|
}
|
|
341
403
|
|
|
342
404
|
/**
|
|
@@ -404,9 +466,64 @@ function resolve(opts: LifecycleOpts): Resolved {
|
|
|
404
466
|
ensureHub: opts.hub?.ensureRunning ?? ensureHubRunning,
|
|
405
467
|
stopHubFn: opts.hub?.stop ?? stopHub,
|
|
406
468
|
selfHealOperatorTokenFn: opts.hub?.selfHealOperatorToken ?? defaultSelfHealOperatorToken,
|
|
469
|
+
sup: resolveSupervisor(opts.supervisor),
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Resolve the Phase 3b supervisor-path seams (the dual-dispatch arm).
|
|
475
|
+
*
|
|
476
|
+
* The discriminant `unitInstalled` decides which arm a verb takes:
|
|
477
|
+
* - When the caller PROVIDES a `supervisor` block (even `{}`, which the
|
|
478
|
+
* production CLI dispatch passes), `unitInstalled` is the explicit override
|
|
479
|
+
* if set, else the real `isHubUnitInstalled` probe over the hub-unit deps —
|
|
480
|
+
* so on a box with a launchd/systemd hub unit the verbs drive the running
|
|
481
|
+
* supervisor, and on a legacy detached box they take the detached arm.
|
|
482
|
+
* - When the caller OMITS `supervisor` entirely (the shape of every existing
|
|
483
|
+
* lifecycle test, which never opts into the new path), `unitInstalled`
|
|
484
|
+
* defaults to `false` → the detached arm. This keeps those tests
|
|
485
|
+
* DETERMINISTIC regardless of whether the test host happens to have a real
|
|
486
|
+
* hub unit installed. New Phase 3b tests opt into the supervisor arm by
|
|
487
|
+
* passing `supervisor: { unitInstalled: true, … }`.
|
|
488
|
+
*/
|
|
489
|
+
function resolveSupervisor(opts: LifecycleOpts["supervisor"]): ResolvedSupervisor {
|
|
490
|
+
const hubUnitDeps = opts?.hubUnitDeps ?? defaultHubUnitDeps;
|
|
491
|
+
// No `supervisor` block at all → detached arm, deterministically. Only probe
|
|
492
|
+
// the real filesystem when the caller opted into the new path (production CLI
|
|
493
|
+
// passes `supervisor: {}`; tests pass the seams they want to assert).
|
|
494
|
+
const unitInstalled =
|
|
495
|
+
opts === undefined ? false : (opts.unitInstalled ?? isHubUnitInstalled(hubUnitDeps));
|
|
496
|
+
return {
|
|
497
|
+
unitInstalled,
|
|
498
|
+
hubUnitDeps,
|
|
499
|
+
driveModuleOp: opts?.driveModuleOp ?? driveModuleOpImpl,
|
|
500
|
+
ensureHubUnit: opts?.ensureHubUnit ?? ensureHubUnitImpl,
|
|
501
|
+
stopHubUnit: opts?.stopHubUnit ?? stopHubUnitImpl,
|
|
502
|
+
restartHubUnit: opts?.restartHubUnit ?? restartHubUnitImpl,
|
|
503
|
+
probeHubHealth: opts?.probeHubHealth ?? hubUnitDeps.probeHealth,
|
|
504
|
+
openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
|
|
505
|
+
baseUrl: opts?.baseUrl,
|
|
407
506
|
};
|
|
408
507
|
}
|
|
409
508
|
|
|
509
|
+
/**
|
|
510
|
+
* Resolve the hub origin used as the operator token's `iss` validator in the
|
|
511
|
+
* supervisor path. Unlike {@link resolveHubOrigin} (which returns `undefined`
|
|
512
|
+
* for pure loopback so the spawn env omits PARACHUTE_HUB_ORIGIN), the operator
|
|
513
|
+
* token ALWAYS carries an `iss`, so this falls back to the canonical loopback
|
|
514
|
+
* origin. Mirrors `commands/auth.ts`'s `resolveHubIssuer` so the issuer the CLI
|
|
515
|
+
* validates the token against matches what `auth rotate-operator` minted under.
|
|
516
|
+
* The fallback differs cosmetically — here `readHubPort(configDir) ??
|
|
517
|
+
* HUB_UNIT_DEFAULT_PORT`, in auth.ts `127.0.0.1:${HUB_DEFAULT_PORT}` — but both
|
|
518
|
+
* resolve to 1939 under canonical-ports today, so they agree in practice.
|
|
519
|
+
* TODO: consolidate with auth.ts:resolveHubIssuer to prevent drift.
|
|
520
|
+
*/
|
|
521
|
+
function resolveOperatorTokenIssuer(hubOrigin: string | undefined, configDir: string): string {
|
|
522
|
+
if (hubOrigin) return hubOrigin;
|
|
523
|
+
const port = readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
524
|
+
return `http://127.0.0.1:${port}`;
|
|
525
|
+
}
|
|
526
|
+
|
|
410
527
|
/**
|
|
411
528
|
* Source of truth order for `PARACHUTE_HUB_ORIGIN`:
|
|
412
529
|
* 1. explicit override (flag / opt)
|
|
@@ -574,6 +691,12 @@ async function resolveTargets(
|
|
|
574
691
|
|
|
575
692
|
export async function start(svc: string | undefined, opts: LifecycleOpts = {}): Promise<number> {
|
|
576
693
|
const r = resolve(opts);
|
|
694
|
+
// Phase 3b dual-dispatch (design §3.3). On a box with a hub unit installed,
|
|
695
|
+
// drive the RUNNING supervisor; otherwise fall through to the unchanged
|
|
696
|
+
// detached arm below. Phase 5 deletes the else-arm — keep this a clean
|
|
697
|
+
// top-level branch so that deletion is a one-liner.
|
|
698
|
+
if (r.sup.unitInstalled) return startViaSupervisor(svc, r);
|
|
699
|
+
// --- no-unit detached fallback (unchanged; preserved until Phase 5) ---
|
|
577
700
|
if (svc === HUB_SVC) return startHubSvc(r);
|
|
578
701
|
const picked = await resolveTargets(svc, r.manifestPath);
|
|
579
702
|
if ("error" in picked) {
|
|
@@ -815,6 +938,10 @@ function persistVaultHubOriginForStart(r: Resolved): void {
|
|
|
815
938
|
|
|
816
939
|
export async function stop(svc: string | undefined, opts: LifecycleOpts = {}): Promise<number> {
|
|
817
940
|
const r = resolve(opts);
|
|
941
|
+
// Phase 3b dual-dispatch (design §3.3). Unit-installed → drive the supervisor
|
|
942
|
+
// / platform manager; else the unchanged detached arm below.
|
|
943
|
+
if (r.sup.unitInstalled) return stopViaSupervisor(svc, r);
|
|
944
|
+
// --- no-unit detached fallback (unchanged; preserved until Phase 5) ---
|
|
818
945
|
if (svc === HUB_SVC) return stopHubSvc(r);
|
|
819
946
|
const picked = await resolveTargets(svc, r.manifestPath);
|
|
820
947
|
if ("error" in picked) {
|
|
@@ -866,9 +993,210 @@ export async function stop(svc: string | undefined, opts: LifecycleOpts = {}): P
|
|
|
866
993
|
}
|
|
867
994
|
|
|
868
995
|
export async function restart(svc: string | undefined, opts: LifecycleOpts = {}): Promise<number> {
|
|
869
|
-
const
|
|
996
|
+
const r = resolve(opts);
|
|
997
|
+
// Phase 3b dual-dispatch (design §3.3). Unit-installed → drive the supervisor
|
|
998
|
+
// / platform manager (with the 404-fallthrough for modules, §6.2); else the
|
|
999
|
+
// unchanged detached stop-then-start below.
|
|
1000
|
+
if (r.sup.unitInstalled) return restartViaSupervisor(svc, r);
|
|
1001
|
+
// --- no-unit detached fallback (unchanged; preserved until Phase 5) ---
|
|
1002
|
+
// Pass `supervisor: undefined` to the inner stop/start so their own
|
|
1003
|
+
// `resolveSupervisor` short-circuits to `unitInstalled: false` without
|
|
1004
|
+
// re-probing `isHubUnitInstalled` (two redundant `stat`s per call) — we
|
|
1005
|
+
// already resolved no-unit above, so both inner calls would re-take this
|
|
1006
|
+
// same detached arm regardless. Behavior-preserving; just drops the probes.
|
|
1007
|
+
const detachedOpts = { ...opts, supervisor: undefined };
|
|
1008
|
+
const stopCode = await stop(svc, detachedOpts);
|
|
870
1009
|
if (stopCode !== 0) return stopCode;
|
|
871
|
-
return await start(svc,
|
|
1010
|
+
return await start(svc, detachedOpts);
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
// ---------------------------------------------------------------------------
|
|
1014
|
+
// Phase 3b supervisor-path verb dispatch (design §3.3).
|
|
1015
|
+
//
|
|
1016
|
+
// These are the NEW arm of the dual-dispatch: when a hub unit is installed,
|
|
1017
|
+
// `start/stop/restart` drive the RUNNING hub's in-process Supervisor over the
|
|
1018
|
+
// loopback module-ops API (per-module verbs) or the platform manager (hub
|
|
1019
|
+
// verbs / no-svc). The detached arm above is untouched and Phase 5 deletes it
|
|
1020
|
+
// + this comment block's `unitInstalled` guard, collapsing to this path only.
|
|
1021
|
+
// ---------------------------------------------------------------------------
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Drive a single module-op against the running hub, mapping the module-ops
|
|
1025
|
+
* client's errors to actionable CLI output (§3.1). Opens hub.db (to validate /
|
|
1026
|
+
* auto-rotate the operator token), resolves the issuer the token was minted
|
|
1027
|
+
* under, and closes the db afterward. Returns the result on success; on a
|
|
1028
|
+
* surfaced error returns `undefined` so the caller can decide (e.g. the restart
|
|
1029
|
+
* 404-fallthrough). Re-throws nothing the caller can't handle: the operator-
|
|
1030
|
+
* token / HTTP errors are caught here and printed.
|
|
1031
|
+
*/
|
|
1032
|
+
async function driveSupervisorOp(
|
|
1033
|
+
short: string,
|
|
1034
|
+
op: ModuleOp,
|
|
1035
|
+
r: Resolved,
|
|
1036
|
+
): Promise<{ result?: ModuleOpResult; httpError?: ModuleOpHttpError; failed: boolean }> {
|
|
1037
|
+
const issuer = resolveOperatorTokenIssuer(r.hubOrigin, r.configDir);
|
|
1038
|
+
const db = r.sup.openDb(r.configDir);
|
|
1039
|
+
try {
|
|
1040
|
+
const deps: DriveModuleOpDeps = {
|
|
1041
|
+
db,
|
|
1042
|
+
issuer,
|
|
1043
|
+
configDir: r.configDir,
|
|
1044
|
+
...(r.sup.baseUrl !== undefined ? { baseUrl: r.sup.baseUrl } : {}),
|
|
1045
|
+
};
|
|
1046
|
+
const result = await r.sup.driveModuleOp(short, op, deps);
|
|
1047
|
+
return { result, failed: false };
|
|
1048
|
+
} catch (err) {
|
|
1049
|
+
if (err instanceof NoOperatorTokenError || err instanceof OperatorTokenExpiredError) {
|
|
1050
|
+
// Surface the already-actionable message (don't raw-throw a 401, §3.1).
|
|
1051
|
+
r.log(`✗ ${short}: ${err.message}`);
|
|
1052
|
+
return { failed: true };
|
|
1053
|
+
}
|
|
1054
|
+
if (err instanceof ModuleOpHttpError) {
|
|
1055
|
+
// Return the typed HTTP error so the caller can branch (404-fallthrough,
|
|
1056
|
+
// not_installed hint). Callers that don't branch print it via
|
|
1057
|
+
// `surfaceModuleOpHttpError`.
|
|
1058
|
+
return { httpError: err, failed: true };
|
|
1059
|
+
}
|
|
1060
|
+
// Unknown error — surface its message rather than crashing the CLI.
|
|
1061
|
+
r.log(`✗ ${short}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1062
|
+
return { failed: true };
|
|
1063
|
+
} finally {
|
|
1064
|
+
db.close();
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
/** Print a module-ops HTTP error with an actionable hint for the known codes. */
|
|
1069
|
+
function surfaceModuleOpHttpError(short: string, err: ModuleOpHttpError, r: Resolved): void {
|
|
1070
|
+
if (err.status === 400 && err.code === "not_installed") {
|
|
1071
|
+
r.log(
|
|
1072
|
+
`✗ ${short} is not installed — run \`parachute install ${short}\` first, then \`parachute start ${short}\`.`,
|
|
1073
|
+
);
|
|
1074
|
+
return;
|
|
1075
|
+
}
|
|
1076
|
+
r.log(`✗ ${short}: ${err.message}`);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
/**
|
|
1080
|
+
* Ensure the hub unit is up, mapping `ensureHubUnit`'s structured outcome to a
|
|
1081
|
+
* CLI exit signal. Returns true when the hub is up (already-up / started),
|
|
1082
|
+
* false when it isn't (and the messages were surfaced). The `no-unit` outcome
|
|
1083
|
+
* shouldn't reach here under the dual-dispatch (we only take the supervisor arm
|
|
1084
|
+
* when a unit IS installed), but it's handled defensively.
|
|
1085
|
+
*/
|
|
1086
|
+
async function ensureHubForOp(r: Resolved, port: number): Promise<boolean> {
|
|
1087
|
+
const ensured = await r.sup.ensureHubUnit({
|
|
1088
|
+
port,
|
|
1089
|
+
deps: r.sup.hubUnitDeps,
|
|
1090
|
+
log: r.log,
|
|
1091
|
+
});
|
|
1092
|
+
if (ensured.outcome === "already-up" || ensured.outcome === "started") return true;
|
|
1093
|
+
// Defensive / unreachable under dual-dispatch: this arm catches the `no-unit`
|
|
1094
|
+
// outcome (and any other non-up outcome), but we only reach `ensureHubForOp`
|
|
1095
|
+
// on the supervisor path, which is gated on `unitInstalled === true` — the
|
|
1096
|
+
// same `isHubUnitInstalled` probe that makes `ensureHubUnit` return `no-unit`
|
|
1097
|
+
// only when it's false. So `no-unit` can't surface here in production; it's
|
|
1098
|
+
// harmless surface. Candidate for removal in the Phase 5 bridge-collapse —
|
|
1099
|
+
// the deletion sweep should not overlook this branch.
|
|
1100
|
+
for (const m of ensured.messages) r.log(m);
|
|
1101
|
+
return false;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
/** `start <svc>` / `start` (no svc) over the supervisor (§3.3). */
|
|
1105
|
+
async function startViaSupervisor(svc: string | undefined, r: Resolved): Promise<number> {
|
|
1106
|
+
const port = readHubPort(r.configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
1107
|
+
// `start hub` / `start` (no svc): ensure the hub unit is up — it transitively
|
|
1108
|
+
// boots every installed module from services.json via bootSupervisedModules.
|
|
1109
|
+
if (svc === HUB_SVC || svc === undefined) {
|
|
1110
|
+
const up = await ensureHubForOp(r, port);
|
|
1111
|
+
if (!up) return 1;
|
|
1112
|
+
r.log(svc === HUB_SVC ? "✓ hub is up." : "✓ hub is up (all installed modules booted).");
|
|
1113
|
+
return 0;
|
|
1114
|
+
}
|
|
1115
|
+
// `start <svc>`: ensure the hub is up first (chicken-and-egg §3.2), then drive
|
|
1116
|
+
// a pure supervisor.start of the already-installed module.
|
|
1117
|
+
if (!(await ensureHubForOp(r, port))) return 1;
|
|
1118
|
+
const { result, httpError, failed } = await driveSupervisorOp(svc, "start", r);
|
|
1119
|
+
if (httpError) {
|
|
1120
|
+
surfaceModuleOpHttpError(svc, httpError, r);
|
|
1121
|
+
return 1;
|
|
1122
|
+
}
|
|
1123
|
+
if (failed || !result) return 1;
|
|
1124
|
+
r.log(`✓ ${svc} started.`);
|
|
1125
|
+
return 0;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
/** `stop <svc>` / `stop` (no svc) over the supervisor / platform manager (§3.3). */
|
|
1129
|
+
async function stopViaSupervisor(svc: string | undefined, r: Resolved): Promise<number> {
|
|
1130
|
+
const port = readHubPort(r.configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
1131
|
+
// `stop hub` / `stop` (no svc): stop the hub UNIT via the platform manager.
|
|
1132
|
+
// MUST go through the manager — a PID signal would be undone by launchd
|
|
1133
|
+
// KeepAlive / systemd Restart=always (R17). Children die with the hub.
|
|
1134
|
+
if (svc === HUB_SVC || svc === undefined) {
|
|
1135
|
+
const res = r.sup.stopHubUnit(r.sup.hubUnitDeps);
|
|
1136
|
+
for (const m of res.messages) r.log(m);
|
|
1137
|
+
if (res.outcome === "ok") {
|
|
1138
|
+
r.log("✓ hub stopped (all supervised modules stopped with it).");
|
|
1139
|
+
return 0;
|
|
1140
|
+
}
|
|
1141
|
+
return 1;
|
|
1142
|
+
}
|
|
1143
|
+
// `stop <svc>`: a supervised module dies WITH the hub. If the hub isn't
|
|
1144
|
+
// reachable, the module is already down — report success WITHOUT starting the
|
|
1145
|
+
// hub (do NOT ensureHubUnit just to stop one module). Only when the hub is up
|
|
1146
|
+
// do we drive the supervisor's stop.
|
|
1147
|
+
if (!(await r.sup.probeHubHealth(port))) {
|
|
1148
|
+
r.log(`${svc} already stopped (the hub isn't running, so its modules are down).`);
|
|
1149
|
+
return 0;
|
|
1150
|
+
}
|
|
1151
|
+
const { httpError, failed, result } = await driveSupervisorOp(svc, "stop", r);
|
|
1152
|
+
if (httpError) {
|
|
1153
|
+
surfaceModuleOpHttpError(svc, httpError, r);
|
|
1154
|
+
return 1;
|
|
1155
|
+
}
|
|
1156
|
+
if (failed || !result) return 1;
|
|
1157
|
+
r.log(`✓ ${svc} stopped.`);
|
|
1158
|
+
return 0;
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
/** `restart <svc>` / `restart` (no svc) over the supervisor / manager (§3.3). */
|
|
1162
|
+
async function restartViaSupervisor(svc: string | undefined, r: Resolved): Promise<number> {
|
|
1163
|
+
// `restart hub` / `restart` (no svc): restart the hub UNIT via the platform
|
|
1164
|
+
// manager. NOT a per-module fan-out — restarting the hub re-boots all modules
|
|
1165
|
+
// anyway. MUST go through the manager (never a PID signal, R17).
|
|
1166
|
+
if (svc === HUB_SVC || svc === undefined) {
|
|
1167
|
+
const res = r.sup.restartHubUnit(r.sup.hubUnitDeps);
|
|
1168
|
+
for (const m of res.messages) r.log(m);
|
|
1169
|
+
if (res.outcome === "ok") {
|
|
1170
|
+
r.log("✓ hub restarted (all modules re-booted).");
|
|
1171
|
+
return 0;
|
|
1172
|
+
}
|
|
1173
|
+
return 1;
|
|
1174
|
+
}
|
|
1175
|
+
// `restart <svc>`: ensure the hub is up, then drive supervisor.restart.
|
|
1176
|
+
const port = readHubPort(r.configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
1177
|
+
if (!(await ensureHubForOp(r, port))) return 1;
|
|
1178
|
+
const restartRes = await driveSupervisorOp(svc, "restart", r);
|
|
1179
|
+
if (restartRes.httpError) {
|
|
1180
|
+
// 404-fallthrough (§6.2): a module that isn't currently supervised (crashed
|
|
1181
|
+
// out of budget, skipped at boot, installed out-of-band) returns 404
|
|
1182
|
+
// `not_supervised`. `restart` must be total over module state (matching the
|
|
1183
|
+
// detached stop+start), so fall through to a pure `start`.
|
|
1184
|
+
if (restartRes.httpError.status === 404 && restartRes.httpError.code === "not_supervised") {
|
|
1185
|
+
const startRes = await driveSupervisorOp(svc, "start", r);
|
|
1186
|
+
if (startRes.httpError) {
|
|
1187
|
+
surfaceModuleOpHttpError(svc, startRes.httpError, r);
|
|
1188
|
+
return 1;
|
|
1189
|
+
}
|
|
1190
|
+
if (startRes.failed || !startRes.result) return 1;
|
|
1191
|
+
r.log(`✓ ${svc} started.`);
|
|
1192
|
+
return 0;
|
|
1193
|
+
}
|
|
1194
|
+
surfaceModuleOpHttpError(svc, restartRes.httpError, r);
|
|
1195
|
+
return 1;
|
|
1196
|
+
}
|
|
1197
|
+
if (restartRes.failed || !restartRes.result) return 1;
|
|
1198
|
+
r.log(`✓ ${svc} restarted.`);
|
|
1199
|
+
return 0;
|
|
872
1200
|
}
|
|
873
1201
|
|
|
874
1202
|
/**
|
|
@@ -47,6 +47,62 @@ export interface BootedModule {
|
|
|
47
47
|
readonly reason?: string;
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
export interface SpawnReqShape {
|
|
51
|
+
short: string;
|
|
52
|
+
cmd: readonly string[];
|
|
53
|
+
cwd?: string;
|
|
54
|
+
env?: Record<string, string>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface BuildSpawnRequestOpts {
|
|
58
|
+
/** Config dir ($PARACHUTE_HOME). Used to read the module's per-service `.env`. */
|
|
59
|
+
readonly configDir: string;
|
|
60
|
+
/** Canonical hub origin → child env `PARACHUTE_HUB_ORIGIN`. Skipped when absent. */
|
|
61
|
+
readonly hubOrigin?: string;
|
|
62
|
+
/**
|
|
63
|
+
* Extra env merged on top of the derived env (PORT / .env / HUB_ORIGIN).
|
|
64
|
+
* Wins over all of them. Used by the API `start` handler's test seam +
|
|
65
|
+
* first-boot vault-name pass-through (`spawnEnv`). Empty/absent on the
|
|
66
|
+
* boot path.
|
|
67
|
+
*/
|
|
68
|
+
readonly extraEnv?: Record<string, string>;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build the `Supervisor.start` request for a single module, identically on
|
|
73
|
+
* both the serve-boot path and the `POST /api/modules/:short/start` handler.
|
|
74
|
+
*
|
|
75
|
+
* Env layering (later wins):
|
|
76
|
+
* 1. `PORT` from the services.json `entry.port` — overrides hub's own PORT
|
|
77
|
+
* so supervised children honor their canonical port assignment
|
|
78
|
+
* (hub#356/#357).
|
|
79
|
+
* 2. per-service `.env` at `<configDir>/<short>/.env` — operator-configured
|
|
80
|
+
* values (e.g. scribe provider keys) override the bare PORT.
|
|
81
|
+
* 3. `PARACHUTE_HUB_ORIGIN` = `opts.hubOrigin` — anchors the child's `iss`
|
|
82
|
+
* expectation to the value hub mints with (hub#365).
|
|
83
|
+
* 4. `opts.extraEnv` — test seam / first-boot pass-through; wins last.
|
|
84
|
+
*
|
|
85
|
+
* `cwd` is set to `entry.installDir` when present (third-party modules ship
|
|
86
|
+
* relative startCmds that need it; first-party fallbacks use absolute / PATH
|
|
87
|
+
* binaries so cwd is a no-op there).
|
|
88
|
+
*/
|
|
89
|
+
export function buildModuleSpawnRequest(
|
|
90
|
+
short: string,
|
|
91
|
+
entry: ServiceEntry,
|
|
92
|
+
cmd: readonly string[],
|
|
93
|
+
opts: BuildSpawnRequestOpts,
|
|
94
|
+
): SpawnReqShape {
|
|
95
|
+
const fileEnv = readEnvFileValues(join(opts.configDir, short, ".env"));
|
|
96
|
+
const env: Record<string, string> = { PORT: String(entry.port), ...fileEnv };
|
|
97
|
+
if (opts.hubOrigin) env[HUB_ORIGIN_ENV] = opts.hubOrigin;
|
|
98
|
+
if (opts.extraEnv) Object.assign(env, opts.extraEnv);
|
|
99
|
+
|
|
100
|
+
const req: SpawnReqShape = { short, cmd };
|
|
101
|
+
if (entry.installDir) req.cwd = entry.installDir;
|
|
102
|
+
if (Object.keys(env).length > 0) req.env = env;
|
|
103
|
+
return req;
|
|
104
|
+
}
|
|
105
|
+
|
|
50
106
|
/**
|
|
51
107
|
* Walk services.json, spawn every manageable module via the
|
|
52
108
|
* supervisor. Returns a per-module decision log so the caller can
|
|
@@ -92,32 +148,22 @@ export async function bootSupervisedModules(
|
|
|
92
148
|
continue;
|
|
93
149
|
}
|
|
94
150
|
|
|
95
|
-
// PORT override (hub#357 — third spawn site missed by hub#356)
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const env: Record<string, string> = { PORT: String(entry.port), ...fileEnv };
|
|
104
|
-
if (opts.hubOrigin) env[HUB_ORIGIN_ENV] = opts.hubOrigin;
|
|
105
|
-
|
|
106
|
-
const req: {
|
|
107
|
-
short: string;
|
|
108
|
-
cmd: readonly string[];
|
|
109
|
-
cwd?: string;
|
|
110
|
-
env?: Record<string, string>;
|
|
111
|
-
} = {
|
|
112
|
-
short,
|
|
113
|
-
cmd,
|
|
114
|
-
};
|
|
115
|
-
// Third-party modules ship clean relative startCmds — cwd:
|
|
116
|
-
// installDir makes them resolve. First-party fallbacks use
|
|
117
|
-
// absolute / PATH binaries so cwd is a no-op there.
|
|
118
|
-
if (entry.installDir) req.cwd = entry.installDir;
|
|
119
|
-
if (Object.keys(env).length > 0) req.env = env;
|
|
151
|
+
// PORT override (hub#357 — third spawn site missed by hub#356), per-service
|
|
152
|
+
// .env merge, and PARACHUTE_HUB_ORIGIN propagation (hub#365) all live in the
|
|
153
|
+
// shared `buildModuleSpawnRequest` so the `POST /api/modules/:short/start`
|
|
154
|
+
// handler builds an identical request (design 2026-06-01 §3.3).
|
|
155
|
+
const req = buildModuleSpawnRequest(short, entry, cmd, {
|
|
156
|
+
configDir: opts.configDir,
|
|
157
|
+
...(opts.hubOrigin !== undefined ? { hubOrigin: opts.hubOrigin } : {}),
|
|
158
|
+
});
|
|
120
159
|
|
|
160
|
+
// Serial await, not Promise.all: `supervisor.start` now carries a bounded
|
|
161
|
+
// post-spawn port-readiness gate (DEFAULT_START_READY_MS), so boot latency
|
|
162
|
+
// is the SUM of each slow-binding module's gate wait before `Bun.serve`
|
|
163
|
+
// comes up. Intentional — sequential boot keeps the start-error/install-card
|
|
164
|
+
// surface ordered and avoids a thundering-herd of port probes. Don't switch
|
|
165
|
+
// to `Promise.all` without accounting for the gate (it'd overlap the waits
|
|
166
|
+
// but also fire N concurrent readiness probes mid-boot).
|
|
121
167
|
await supervisor.start(req);
|
|
122
168
|
log(`[supervisor] ${short}: started (cmd=${cmd.join(" ")}).`);
|
|
123
169
|
results.push({ short, entryName: entry.name, status: "started" });
|