@openparachute/hub 0.6.1 → 0.6.3-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/account-home-ui.test.ts +34 -0
- package/src/__tests__/api-modules-ops.test.ts +359 -3
- package/src/__tests__/api-modules.test.ts +54 -0
- package/src/__tests__/cloudflare-connector-service.test.ts +441 -0
- package/src/__tests__/expose-cloudflare.test.ts +272 -0
- package/src/__tests__/hub-unit.test.ts +574 -0
- package/src/__tests__/init.test.ts +219 -2
- package/src/__tests__/lifecycle.test.ts +423 -0
- package/src/__tests__/managed-unit.test.ts +575 -0
- package/src/__tests__/module-ops-client.test.ts +556 -0
- package/src/__tests__/port-probe.test.ts +23 -0
- package/src/__tests__/setup-wizard.test.ts +130 -0
- package/src/__tests__/status-supervisor.test.ts +569 -0
- package/src/__tests__/supervisor.test.ts +471 -6
- package/src/account-home-ui.ts +4 -1
- package/src/api-modules-ops.ts +221 -0
- package/src/api-modules.ts +18 -2
- package/src/cli.ts +14 -4
- package/src/cloudflare/connector-service.ts +273 -0
- package/src/cloudflare/state.ts +13 -1
- package/src/commands/expose-cloudflare.ts +143 -10
- package/src/commands/init.ts +225 -12
- package/src/commands/lifecycle.ts +366 -38
- package/src/commands/serve-boot.ts +71 -25
- package/src/commands/status.ts +596 -49
- package/src/hub-server.ts +11 -0
- package/src/hub-unit.ts +735 -0
- package/src/managed-unit.ts +674 -0
- package/src/module-ops-client.ts +457 -0
- package/src/port-probe.ts +50 -0
- package/src/setup-wizard.ts +80 -1
- package/src/supervisor.ts +360 -14
package/src/commands/status.ts
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
|
+
import type { Database } from "bun:sqlite";
|
|
1
2
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
2
3
|
import { HUB_SVC, readHubPort } from "../hub-control.ts";
|
|
4
|
+
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
5
|
+
import {
|
|
6
|
+
HUB_UNIT_DEFAULT_PORT,
|
|
7
|
+
type HubUnitDeps,
|
|
8
|
+
type HubUnitState,
|
|
9
|
+
type HubUnitStateResult,
|
|
10
|
+
defaultHubUnitDeps,
|
|
11
|
+
isHubUnitInstalled,
|
|
12
|
+
queryHubUnitState as queryHubUnitStateImpl,
|
|
13
|
+
} from "../hub-unit.ts";
|
|
3
14
|
import {
|
|
4
15
|
type DetectInstallSourceDeps,
|
|
5
16
|
detectHubInstallSource,
|
|
@@ -7,6 +18,13 @@ import {
|
|
|
7
18
|
formatInstallSourceLabel,
|
|
8
19
|
isStale,
|
|
9
20
|
} from "../install-source.ts";
|
|
21
|
+
import {
|
|
22
|
+
type DriveModuleOpDeps,
|
|
23
|
+
type ModuleStatesResult,
|
|
24
|
+
NoOperatorTokenError,
|
|
25
|
+
OperatorTokenExpiredError,
|
|
26
|
+
fetchModuleStates as fetchModuleStatesImpl,
|
|
27
|
+
} from "../module-ops-client.ts";
|
|
10
28
|
import { type AliveFn, defaultAlive, formatUptime, processState } from "../process-state.ts";
|
|
11
29
|
import { canonicalPortForManifest, getSpec, shortNameForManifest } from "../service-spec.ts";
|
|
12
30
|
import { type ServiceEntry, readManifest } from "../services-manifest.ts";
|
|
@@ -34,6 +52,54 @@ export interface StatusOpts {
|
|
|
34
52
|
* source classification doesn't depend on the test runner's location.
|
|
35
53
|
*/
|
|
36
54
|
hubSrcDir?: string;
|
|
55
|
+
/**
|
|
56
|
+
* Phase 3c supervisor-path seams (design §6.4). When a hub UNIT is installed
|
|
57
|
+
* (launchd/systemd/container — detected via {@link isHubUnitInstalled}),
|
|
58
|
+
* `status` reads the hub row from the PLATFORM MANAGER (`queryHubUnitState`)
|
|
59
|
+
* + `/health`, and the module rows from the RUNNING supervisor (`GET
|
|
60
|
+
* /api/modules` via the operator-token→Bearer path). On a legacy detached box
|
|
61
|
+
* (no hub unit) it keeps the EXACT pidfile/`processState` behavior, unchanged
|
|
62
|
+
* until Phase 5 retires it.
|
|
63
|
+
*
|
|
64
|
+
* Everything here is injectable so tests force either arm without a real
|
|
65
|
+
* launchd/systemd/socket/HTTP call. Production wires the real machinery; the
|
|
66
|
+
* read paths are bounded + degrade gracefully on every failure (no manager,
|
|
67
|
+
* hub down, no token, API error) so `status` never hangs or crashes.
|
|
68
|
+
*/
|
|
69
|
+
supervisor?: {
|
|
70
|
+
/**
|
|
71
|
+
* Is a hub unit installed (the dual-dispatch discriminant)? Production uses
|
|
72
|
+
* `isHubUnitInstalled(hubUnitDeps)`. Tests set this `true`/`false` directly
|
|
73
|
+
* to pick the branch deterministically. When set, it wins over the
|
|
74
|
+
* `hubUnitDeps`-derived detection.
|
|
75
|
+
*
|
|
76
|
+
* Defaulting: when the caller OMITS the `supervisor` block entirely (every
|
|
77
|
+
* existing status test), the arm defaults to detached — so those tests stay
|
|
78
|
+
* deterministic regardless of whether the test host has a real hub unit.
|
|
79
|
+
*/
|
|
80
|
+
unitInstalled?: boolean;
|
|
81
|
+
/** Deps for `isHubUnitInstalled` + `queryHubUnitState` + the `/health` probe. */
|
|
82
|
+
hubUnitDeps?: HubUnitDeps;
|
|
83
|
+
/** Query the platform manager for the hub unit's run-state (§6.4 hub row). */
|
|
84
|
+
queryHubUnitState?: (deps: HubUnitDeps) => HubUnitStateResult;
|
|
85
|
+
/**
|
|
86
|
+
* Probe whether the loopback hub answers `/health`. The liveness signal for
|
|
87
|
+
* the hub row (§6.4) AND the gate for reading module states: if the hub is
|
|
88
|
+
* down, skip the API read and show modules degraded. Production reuses the
|
|
89
|
+
* hub-unit deps' bounded `probeHealth`.
|
|
90
|
+
*/
|
|
91
|
+
probeHubHealth?: (port: number) => Promise<boolean>;
|
|
92
|
+
/** Read the running supervisor's module states (§6.4 module rows). */
|
|
93
|
+
fetchModuleStates?: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
|
|
94
|
+
/**
|
|
95
|
+
* Open the hub DB used to validate/auto-rotate the operator token in
|
|
96
|
+
* `fetchModuleStates`. Production opens `<configDir>/hub.db`; tests inject a
|
|
97
|
+
* seeded db. Returns a handle the caller closes.
|
|
98
|
+
*/
|
|
99
|
+
openDb?: (configDir: string) => Database;
|
|
100
|
+
/** Loopback hub base URL override (default derives from the hub port). */
|
|
101
|
+
baseUrl?: string;
|
|
102
|
+
};
|
|
37
103
|
}
|
|
38
104
|
|
|
39
105
|
export interface ProbeResult {
|
|
@@ -154,6 +220,16 @@ interface StatusRow {
|
|
|
154
220
|
* just showing it inactive. Cleared on the next successful start.
|
|
155
221
|
*/
|
|
156
222
|
startErrorNote?: string;
|
|
223
|
+
/**
|
|
224
|
+
* Hub-row-only manager-context note (Phase 3c, §6.4). Surfaces the platform
|
|
225
|
+
* manager's view when it adds signal the STATE column can't carry:
|
|
226
|
+
* - "container runtime (managed)" on Render/Fly (no on-box manager).
|
|
227
|
+
* - "service manager reports active; /health not answering yet (starting or
|
|
228
|
+
* unhealthy)" when the unit is up but the hub isn't serving.
|
|
229
|
+
* - the manager's failed-unit detail / last-exit code.
|
|
230
|
+
* Printed on a continuation line like the other notes.
|
|
231
|
+
*/
|
|
232
|
+
managerNote?: string;
|
|
157
233
|
}
|
|
158
234
|
|
|
159
235
|
/**
|
|
@@ -170,6 +246,70 @@ function urlForEntry(entry: ServiceEntry, short: string | undefined): string | u
|
|
|
170
246
|
return `http://127.0.0.1:${entry.port}${first}`;
|
|
171
247
|
}
|
|
172
248
|
|
|
249
|
+
/**
|
|
250
|
+
* The MANIFEST-derived portion of a module row — identical regardless of
|
|
251
|
+
* whether the row's run-state comes from a pidfile (detached arm) or the
|
|
252
|
+
* supervisor (Phase 3c). Extracting it keeps the two arms in lockstep on
|
|
253
|
+
* port/version/URL/drift/source/stale and the persisted `lastStartError` note,
|
|
254
|
+
* so the only per-arm difference is the run-state fields (STATE / PID / UPTIME).
|
|
255
|
+
*
|
|
256
|
+
* Pure over the manifest entry + install-source deps; no process / network
|
|
257
|
+
* read. Shared so the detached arm stays behavior-identical (existing tests
|
|
258
|
+
* guard it) while the supervisor arm reuses the exact same derivation.
|
|
259
|
+
*/
|
|
260
|
+
interface ManifestRowBase {
|
|
261
|
+
short: string | undefined;
|
|
262
|
+
url: string | undefined;
|
|
263
|
+
driftWarning?: string;
|
|
264
|
+
sourceLabel: string;
|
|
265
|
+
staleNote?: string;
|
|
266
|
+
/** The persisted `lastStartError` note (detached preflight wrote it). */
|
|
267
|
+
manifestStartErrorNote?: string;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function manifestRowBase(
|
|
271
|
+
entry: ServiceEntry,
|
|
272
|
+
installSourceDeps: DetectInstallSourceDeps,
|
|
273
|
+
): ManifestRowBase {
|
|
274
|
+
// Third-party rows (with `installDir`) live under `~/.parachute/<entry.name>/`,
|
|
275
|
+
// matching what `parachute start` uses as the short. First-party rows still
|
|
276
|
+
// map manifestName → short via the canonical fallback.
|
|
277
|
+
const short = shortNameForManifest(entry.name) ?? (entry.installDir ? entry.name : undefined);
|
|
278
|
+
const url = urlForEntry(entry, short);
|
|
279
|
+
|
|
280
|
+
// Canonical-port drift detection (hub#195). Only fires for known first-party
|
|
281
|
+
// services where we have a canonical assignment. Third-party rows have no
|
|
282
|
+
// canonical to compare against. Informational — operators may have moved a
|
|
283
|
+
// service off canonical deliberately.
|
|
284
|
+
const canonical = canonicalPortForManifest(entry.name);
|
|
285
|
+
const driftWarning =
|
|
286
|
+
canonical !== undefined && canonical !== entry.port
|
|
287
|
+
? `canonical port is ${canonical}`
|
|
288
|
+
: undefined;
|
|
289
|
+
|
|
290
|
+
// Install-source detection (hub#243). One filesystem walk + maybe one
|
|
291
|
+
// `git rev-parse` per row. Failures degrade silently to `unknown`.
|
|
292
|
+
const detectArgs: { entryName: string; installDir?: string } = { entryName: entry.name };
|
|
293
|
+
if (entry.installDir !== undefined) detectArgs.installDir = entry.installDir;
|
|
294
|
+
const source = detectInstallSource(detectArgs, installSourceDeps);
|
|
295
|
+
const sourceLabel = formatInstallSourceLabel(source);
|
|
296
|
+
const staleNote = isStale(entry.version, source)
|
|
297
|
+
? `STALE: services.json cached ${entry.version}; live package.json ${source.livePackageVersion}`
|
|
298
|
+
: undefined;
|
|
299
|
+
|
|
300
|
+
// Persisted last-start failure (lifecycle preflight wrote a missing-dependency
|
|
301
|
+
// wire onto services.json). Surface a one-line summary; the full install
|
|
302
|
+
// recipe lives in services.json + the admin SPA card.
|
|
303
|
+
const manifestStartErrorNote =
|
|
304
|
+
entry.lastStartError !== undefined
|
|
305
|
+
? entry.lastStartError.binary !== undefined
|
|
306
|
+
? `failed to start: ${entry.lastStartError.binary} not installed — run \`parachute status\` detail or see /admin/modules for install steps`
|
|
307
|
+
: `failed to start: ${entry.lastStartError.error_description.split("\n")[0]}`
|
|
308
|
+
: undefined;
|
|
309
|
+
|
|
310
|
+
return { short, url, driftWarning, sourceLabel, staleNote, manifestStartErrorNote };
|
|
311
|
+
}
|
|
312
|
+
|
|
173
313
|
function hubRow(
|
|
174
314
|
configDir: string,
|
|
175
315
|
alive: AliveFn,
|
|
@@ -217,6 +357,36 @@ export async function status(opts: StatusOpts = {}): Promise<number> {
|
|
|
217
357
|
const hubSrcDir = opts.hubSrcDir ?? import.meta.dir;
|
|
218
358
|
|
|
219
359
|
const manifest = readManifest(manifestPath);
|
|
360
|
+
|
|
361
|
+
// Phase 3c dual-dispatch (design §6.4). On a box with a hub unit installed
|
|
362
|
+
// (launchd/systemd/container), read the hub row from the platform manager +
|
|
363
|
+
// `/health` and the module rows from the RUNNING supervisor; otherwise fall
|
|
364
|
+
// through to the unchanged detached arm below. Phase 5 deletes the else-arm —
|
|
365
|
+
// keep this a clean top-level branch so that deletion is a one-liner.
|
|
366
|
+
//
|
|
367
|
+
// Branched BEFORE the detached arm's empty-manifest early return: on a
|
|
368
|
+
// unit-managed box the hub row is meaningful even with zero modules installed
|
|
369
|
+
// (the hub IS running under a unit), so the supervisor arm renders the hub row
|
|
370
|
+
// + a "no modules" table rather than the detached "No services installed yet."
|
|
371
|
+
const sup = resolveStatusSupervisor(opts.supervisor);
|
|
372
|
+
if (sup.unitInstalled) {
|
|
373
|
+
const rows = await buildSupervisorRows({
|
|
374
|
+
manifest,
|
|
375
|
+
configDir,
|
|
376
|
+
installSourceDeps,
|
|
377
|
+
hubSrcDir,
|
|
378
|
+
sup,
|
|
379
|
+
});
|
|
380
|
+
renderRows(rows, print);
|
|
381
|
+
// The supervisor arm marks a row `healthy: false` + `!skipped` only when the
|
|
382
|
+
// supervisor (or the hub-row manager/health composition) says so (crashed /
|
|
383
|
+
// failing) — same exit contract as the detached arm: a stopped/inactive row
|
|
384
|
+
// is expected (skipped, exit 0), a `failing` one exits 1.
|
|
385
|
+
const anyUnhealthy = rows.some((r) => !r.skipped && !r.healthy);
|
|
386
|
+
return anyUnhealthy ? 1 : 0;
|
|
387
|
+
}
|
|
388
|
+
// --- no-unit detached fallback (unchanged; preserved until Phase 5) ---
|
|
389
|
+
|
|
220
390
|
if (manifest.services.length === 0) {
|
|
221
391
|
print("No services installed yet.");
|
|
222
392
|
print("Try: parachute install vault");
|
|
@@ -235,10 +405,16 @@ export async function status(opts: StatusOpts = {}): Promise<number> {
|
|
|
235
405
|
*/
|
|
236
406
|
const rows: StatusRow[] = await Promise.all(
|
|
237
407
|
manifest.services.map(async (entry) => {
|
|
238
|
-
//
|
|
239
|
-
//
|
|
240
|
-
|
|
241
|
-
|
|
408
|
+
// MANIFEST-derived fields shared with the supervisor arm (port/version/
|
|
409
|
+
// URL/drift/source/stale + the persisted lastStartError note).
|
|
410
|
+
const {
|
|
411
|
+
short,
|
|
412
|
+
url,
|
|
413
|
+
driftWarning,
|
|
414
|
+
sourceLabel,
|
|
415
|
+
staleNote,
|
|
416
|
+
manifestStartErrorNote: startErrorNote,
|
|
417
|
+
} = manifestRowBase(entry, installSourceDeps);
|
|
242
418
|
const proc = short ? processState(short, configDir, alive) : undefined;
|
|
243
419
|
|
|
244
420
|
const pidLabel =
|
|
@@ -246,43 +422,6 @@ export async function status(opts: StatusOpts = {}): Promise<number> {
|
|
|
246
422
|
const uptimeLabel =
|
|
247
423
|
proc?.status === "running" && proc.startedAt ? formatUptime(proc.startedAt, nowDate) : "-";
|
|
248
424
|
|
|
249
|
-
const url = urlForEntry(entry, short);
|
|
250
|
-
|
|
251
|
-
// Canonical-port drift detection (hub#195). Only fires for known
|
|
252
|
-
// first-party services where we have a canonical assignment. Third-party
|
|
253
|
-
// rows have no canonical to compare against. Warning is informational —
|
|
254
|
-
// operators may have moved a service off canonical deliberately.
|
|
255
|
-
// Note: multi-vault instance rows (`parachute-vault-<instance>`) don't
|
|
256
|
-
// match a canonical manifest name, so drift warnings don't fire for
|
|
257
|
-
// them. Intentional — see `canonicalPortForManifest` for the rationale.
|
|
258
|
-
const canonical = canonicalPortForManifest(entry.name);
|
|
259
|
-
const driftWarning =
|
|
260
|
-
canonical !== undefined && canonical !== entry.port
|
|
261
|
-
? `canonical port is ${canonical}`
|
|
262
|
-
: undefined;
|
|
263
|
-
|
|
264
|
-
// Install-source detection (hub#243). One filesystem walk + maybe one
|
|
265
|
-
// `git rev-parse` per row. Failures degrade silently to `unknown` —
|
|
266
|
-
// status output should never error out on a missing checkout dir.
|
|
267
|
-
const detectArgs: { entryName: string; installDir?: string } = { entryName: entry.name };
|
|
268
|
-
if (entry.installDir !== undefined) detectArgs.installDir = entry.installDir;
|
|
269
|
-
const source = detectInstallSource(detectArgs, installSourceDeps);
|
|
270
|
-
const sourceLabel = formatInstallSourceLabel(source);
|
|
271
|
-
const staleNote = isStale(entry.version, source)
|
|
272
|
-
? `STALE: services.json cached ${entry.version}; live package.json ${source.livePackageVersion}`
|
|
273
|
-
: undefined;
|
|
274
|
-
|
|
275
|
-
// Persisted last-start failure (lifecycle preflight wrote a missing-
|
|
276
|
-
// dependency wire). Surface a one-line summary; the full install recipe
|
|
277
|
-
// lives in services.json + the admin SPA card. Keeps `parachute status`
|
|
278
|
-
// scannable while still telling the operator "this is why it's down."
|
|
279
|
-
const startErrorNote =
|
|
280
|
-
entry.lastStartError !== undefined
|
|
281
|
-
? entry.lastStartError.binary !== undefined
|
|
282
|
-
? `failed to start: ${entry.lastStartError.binary} not installed — run \`parachute status\` detail or see /admin/modules for install steps`
|
|
283
|
-
: `failed to start: ${entry.lastStartError.error_description.split("\n")[0]}`
|
|
284
|
-
: undefined;
|
|
285
|
-
|
|
286
425
|
// Only skip probe when we know the process is dead (PID file was
|
|
287
426
|
// present but kill(pid, 0) failed). "unknown" status (no PID file)
|
|
288
427
|
// still probes — externally-managed services should report health.
|
|
@@ -354,6 +493,25 @@ export async function status(opts: StatusOpts = {}): Promise<number> {
|
|
|
354
493
|
const hub = hubRow(configDir, alive, nowDate, hubSrcDir, installSourceDeps);
|
|
355
494
|
if (hub) rows.push(hub);
|
|
356
495
|
|
|
496
|
+
renderRows(rows, print);
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Overall exit: non-zero if any *probed* service is unhealthy. A stopped
|
|
500
|
+
* service is expected ("I haven't started it yet"), not a failure — users
|
|
501
|
+
* want `parachute status` to return 0 after a fresh install before they
|
|
502
|
+
* `parachute start`. Health regressions among running services still 1.
|
|
503
|
+
*/
|
|
504
|
+
const anyUnhealthy = rows.some((r) => !r.skipped && !r.healthy);
|
|
505
|
+
return anyUnhealthy ? 1 : 0;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Render the status table + continuation lines. Shared by the detached arm and
|
|
510
|
+
* the Phase 3c supervisor arm so the table shape (design-system.md §6 columns +
|
|
511
|
+
* the `→`/`!` continuation prefixes) is identical regardless of where each
|
|
512
|
+
* row's run-state was sourced. Pure over `rows` + the `print` sink.
|
|
513
|
+
*/
|
|
514
|
+
function renderRows(rows: StatusRow[], print: (line: string) => void): void {
|
|
357
515
|
// Header per design-system.md §6 "CLI status column shape":
|
|
358
516
|
// SERVICE PORT VERSION STATE PID UPTIME LATENCY SOURCE
|
|
359
517
|
// Pre-F shape was SERVICE PORT VERSION PROCESS PID UPTIME HEALTH LATENCY
|
|
@@ -397,17 +555,406 @@ export async function status(opts: StatusOpts = {}): Promise<number> {
|
|
|
397
555
|
if (row.stateLabel === "failing" && row.healthDetail !== "-" && row.healthDetail.length > 0) {
|
|
398
556
|
print(` ! probe: ${row.healthDetail}`);
|
|
399
557
|
}
|
|
558
|
+
if (row.managerNote) print(` ! ${row.managerNote}`);
|
|
400
559
|
if (row.driftWarning) print(` ! ${row.driftWarning}`);
|
|
401
560
|
if (row.staleNote) print(` ! ${row.staleNote}`);
|
|
402
561
|
if (row.startErrorNote) print(` ! ${row.startErrorNote}`);
|
|
403
562
|
}
|
|
563
|
+
}
|
|
404
564
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
565
|
+
// ---------------------------------------------------------------------------
|
|
566
|
+
// Phase 3c supervisor-path status (design §6.4).
|
|
567
|
+
//
|
|
568
|
+
// When a hub unit is installed, `status` reads the hub row from the PLATFORM
|
|
569
|
+
// MANAGER (`queryHubUnitState`) + a `/health` probe, and the module rows from
|
|
570
|
+
// the RUNNING supervisor (`GET /api/modules` via the operator-token→Bearer
|
|
571
|
+
// path). Every read is bounded + degrades gracefully — `status` is a
|
|
572
|
+
// diagnostic and must NEVER hang or crash regardless of hub/manager/token
|
|
573
|
+
// state. The detached arm above is untouched; Phase 5 deletes it.
|
|
574
|
+
// ---------------------------------------------------------------------------
|
|
575
|
+
|
|
576
|
+
/** Resolved Phase 3c supervisor-path seams (see `StatusOpts.supervisor`). */
|
|
577
|
+
interface ResolvedStatusSupervisor {
|
|
578
|
+
/** Whether a hub unit is installed — the dual-dispatch discriminant. */
|
|
579
|
+
unitInstalled: boolean;
|
|
580
|
+
hubUnitDeps: HubUnitDeps;
|
|
581
|
+
queryHubUnitState: (deps: HubUnitDeps) => HubUnitStateResult;
|
|
582
|
+
probeHubHealth: (port: number) => Promise<boolean>;
|
|
583
|
+
fetchModuleStates: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
|
|
584
|
+
openDb: (configDir: string) => Database;
|
|
585
|
+
baseUrl: string | undefined;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Resolve the Phase 3c supervisor-path seams. Mirrors lifecycle.ts's
|
|
590
|
+
* `resolveSupervisor` discriminant policy:
|
|
591
|
+
* - No `supervisor` block at all (every existing status test) → detached arm,
|
|
592
|
+
* deterministically (no real-filesystem probe).
|
|
593
|
+
* - A `supervisor` block present → explicit `unitInstalled` override if set,
|
|
594
|
+
* else the real `isHubUnitInstalled` probe over the hub-unit deps.
|
|
595
|
+
*/
|
|
596
|
+
function resolveStatusSupervisor(opts: StatusOpts["supervisor"]): ResolvedStatusSupervisor {
|
|
597
|
+
const hubUnitDeps = opts?.hubUnitDeps ?? defaultHubUnitDeps;
|
|
598
|
+
const unitInstalled =
|
|
599
|
+
opts === undefined ? false : (opts.unitInstalled ?? isHubUnitInstalled(hubUnitDeps));
|
|
600
|
+
return {
|
|
601
|
+
unitInstalled,
|
|
602
|
+
hubUnitDeps,
|
|
603
|
+
queryHubUnitState: opts?.queryHubUnitState ?? queryHubUnitStateImpl,
|
|
604
|
+
probeHubHealth: opts?.probeHubHealth ?? hubUnitDeps.probeHealth,
|
|
605
|
+
fetchModuleStates: opts?.fetchModuleStates ?? fetchModuleStatesImpl,
|
|
606
|
+
openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
|
|
607
|
+
baseUrl: opts?.baseUrl,
|
|
608
|
+
};
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
/**
|
|
612
|
+
* Resolve the issuer the operator token is validated against — the hub's
|
|
613
|
+
* current loopback origin. Mirrors lifecycle.ts's `resolveOperatorTokenIssuer`
|
|
614
|
+
* fallback (`readHubPort ?? HUB_UNIT_DEFAULT_PORT`); both resolve to 1939 under
|
|
615
|
+
* canonical-ports, so they agree with what `auth rotate-operator` minted under.
|
|
616
|
+
*/
|
|
617
|
+
function statusOperatorTokenIssuer(configDir: string): string {
|
|
618
|
+
return `http://127.0.0.1:${readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT}`;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Map a supervisor `ModuleState.status` to the canonical STATE rollup
|
|
623
|
+
* (design-system.md §6). `running` is `active`; `crashed` is `failing`;
|
|
624
|
+
* `starting` / `restarting` are `pending` (in-flight operator-visible
|
|
625
|
+
* transition); `stopped` is `inactive`. An unknown/absent status (module not
|
|
626
|
+
* tracked by the supervisor — never booted, skipped at boot) is `inactive`.
|
|
627
|
+
*/
|
|
628
|
+
function mapSupervisorStatus(status: string | null): {
|
|
629
|
+
stateLabel: StateLabel;
|
|
630
|
+
healthy: boolean;
|
|
631
|
+
skipped: boolean;
|
|
632
|
+
} {
|
|
633
|
+
switch (status) {
|
|
634
|
+
case "running":
|
|
635
|
+
return { stateLabel: "active", healthy: true, skipped: false };
|
|
636
|
+
case "crashed":
|
|
637
|
+
return { stateLabel: "failing", healthy: false, skipped: false };
|
|
638
|
+
case "starting":
|
|
639
|
+
case "restarting":
|
|
640
|
+
// In-flight transition — supervised, mid-operation. `pending` is the
|
|
641
|
+
// canonical "needs-attention transient" rollup; treat as not-a-failure
|
|
642
|
+
// (skipped) so a mid-restart module doesn't flip `status` to exit 1.
|
|
643
|
+
return { stateLabel: "pending", healthy: true, skipped: true };
|
|
644
|
+
default:
|
|
645
|
+
// stopped / null / unknown — operator-stopped or never started. The
|
|
646
|
+
// `skipped: true` + `healthy: false` pairing is DELIBERATE, not a mismatch:
|
|
647
|
+
// - `healthy: false` is honest — an inactive module is genuinely not
|
|
648
|
+
// serving (so a detail renderer can style it as down, not green).
|
|
649
|
+
// - `skipped: true` keeps the exit-code check (`rows.some(r => !r.skipped
|
|
650
|
+
// && !r.healthy)` at the call site, ~:385) from counting an
|
|
651
|
+
// operator-stopped module as a FAILURE — `parachute stop vault` then
|
|
652
|
+
// `status` must still exit 0.
|
|
653
|
+
// This is the same combination + exit semantics the detached arm uses for
|
|
654
|
+
// its `inactive` (operator-stopped) rows.
|
|
655
|
+
return { stateLabel: "inactive", healthy: false, skipped: true };
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Format a supervisor `startError` (the structured missing-dependency /
|
|
661
|
+
* started-but-unbound wire, §6.5) into the same one-line note the detached arm
|
|
662
|
+
* shows from `services.json.lastStartError` (#188). Returns undefined when
|
|
663
|
+
* there's no usable detail.
|
|
664
|
+
*/
|
|
665
|
+
function supervisorStartErrorNote(startError: unknown): string | undefined {
|
|
666
|
+
if (!startError || typeof startError !== "object") return undefined;
|
|
667
|
+
const e = startError as { binary?: unknown; error_description?: unknown };
|
|
668
|
+
if (typeof e.binary === "string" && e.binary.length > 0) {
|
|
669
|
+
return `failed to start: ${e.binary} not installed — see /admin/modules for install steps`;
|
|
670
|
+
}
|
|
671
|
+
if (typeof e.error_description === "string" && e.error_description.length > 0) {
|
|
672
|
+
return `failed to start: ${e.error_description.split("\n")[0]}`;
|
|
673
|
+
}
|
|
674
|
+
return undefined;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
interface BuildSupervisorRowsArgs {
|
|
678
|
+
manifest: ReturnType<typeof readManifest>;
|
|
679
|
+
configDir: string;
|
|
680
|
+
installSourceDeps: DetectInstallSourceDeps;
|
|
681
|
+
hubSrcDir: string;
|
|
682
|
+
sup: ResolvedStatusSupervisor;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Build the full status rows on a UNIT-MANAGED box (design §6.4): module rows
|
|
687
|
+
* from the running supervisor, the hub row from the platform manager + /health.
|
|
688
|
+
* Never throws — every read is wrapped + degrades to a sensible readout.
|
|
689
|
+
*/
|
|
690
|
+
async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<StatusRow[]> {
|
|
691
|
+
const { manifest, configDir, installSourceDeps, hubSrcDir, sup } = args;
|
|
692
|
+
const port = readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
693
|
+
|
|
694
|
+
// Probe the hub once: it's both the hub row's liveness signal AND the gate for
|
|
695
|
+
// whether the supervisor (module states) is reachable. Bounded; never throws.
|
|
696
|
+
let hubHealthy = false;
|
|
697
|
+
try {
|
|
698
|
+
hubHealthy = await sup.probeHubHealth(port);
|
|
699
|
+
} catch {
|
|
700
|
+
hubHealthy = false;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// Read the running supervisor's module states — ONLY when the hub answers
|
|
704
|
+
// (children die with the hub, so a down hub means every module is down; no
|
|
705
|
+
// point calling, and the call would just connection-refuse). Degrade on every
|
|
706
|
+
// failure path: no token, expired token, HTTP error, anything — `status`
|
|
707
|
+
// shows what it can rather than crashing.
|
|
708
|
+
let states: ModuleStatesResult | undefined;
|
|
709
|
+
let moduleReadNote: string | undefined;
|
|
710
|
+
if (hubHealthy) {
|
|
711
|
+
const db = sup.openDb(configDir);
|
|
712
|
+
try {
|
|
713
|
+
states = await sup.fetchModuleStates({
|
|
714
|
+
db,
|
|
715
|
+
issuer: statusOperatorTokenIssuer(configDir),
|
|
716
|
+
configDir,
|
|
717
|
+
...(sup.baseUrl !== undefined ? { baseUrl: sup.baseUrl } : {}),
|
|
718
|
+
});
|
|
719
|
+
} catch (err) {
|
|
720
|
+
if (err instanceof NoOperatorTokenError || err instanceof OperatorTokenExpiredError) {
|
|
721
|
+
// No / expired operator token: we can't read module run-state, but the
|
|
722
|
+
// hub is up. Show the manifest-derived rows with an actionable note —
|
|
723
|
+
// do NOT 401-crash status (§6.4 graceful degradation).
|
|
724
|
+
moduleReadNote =
|
|
725
|
+
"couldn't read live module state — run `parachute auth rotate-operator` to mint an operator token";
|
|
726
|
+
} else {
|
|
727
|
+
// HTTP error / parse / anything else — degrade with the message.
|
|
728
|
+
moduleReadNote = `couldn't read live module state (${
|
|
729
|
+
err instanceof Error ? err.message : String(err)
|
|
730
|
+
})`;
|
|
731
|
+
}
|
|
732
|
+
} finally {
|
|
733
|
+
db.close();
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
const stateByShort = new Map<string, ModuleStatesResult["modules"][number]>();
|
|
738
|
+
for (const m of states?.modules ?? []) {
|
|
739
|
+
if (m.short) stateByShort.set(m.short, m);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
const rows: StatusRow[] = manifest.services.map((entry) => {
|
|
743
|
+
const base = manifestRowBase(entry, installSourceDeps);
|
|
744
|
+
const snap = base.short ? stateByShort.get(base.short) : undefined;
|
|
745
|
+
|
|
746
|
+
if (!hubHealthy) {
|
|
747
|
+
// Hub is down → every supervised module is down with it. Show `inactive`
|
|
748
|
+
// (expected, not a failure) with a note rather than a probe failure.
|
|
749
|
+
return {
|
|
750
|
+
service: entry.name,
|
|
751
|
+
port: String(entry.port),
|
|
752
|
+
version: entry.version,
|
|
753
|
+
stateLabel: "inactive",
|
|
754
|
+
pidLabel: "-",
|
|
755
|
+
uptimeLabel: "-",
|
|
756
|
+
healthDetail: "-",
|
|
757
|
+
latencyLabel: "-",
|
|
758
|
+
sourceLabel: base.sourceLabel,
|
|
759
|
+
url: base.url,
|
|
760
|
+
healthy: false,
|
|
761
|
+
skipped: true,
|
|
762
|
+
...(base.driftWarning ? { driftWarning: base.driftWarning } : {}),
|
|
763
|
+
...(base.staleNote ? { staleNote: base.staleNote } : {}),
|
|
764
|
+
managerNote: "hub is down — its modules are stopped",
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const { stateLabel, healthy, skipped } = mapSupervisorStatus(snap?.supervisor_status ?? null);
|
|
769
|
+
// Prefer the supervisor's structured start-error (live), else the persisted
|
|
770
|
+
// services.json note — same friendly surface either way (#188).
|
|
771
|
+
const startErrorNote =
|
|
772
|
+
supervisorStartErrorNote(snap?.supervisor_start_error) ?? base.manifestStartErrorNote;
|
|
773
|
+
const healthDetail =
|
|
774
|
+
stateLabel === "failing" ? `supervisor: ${snap?.supervisor_status ?? "crashed"}` : "-";
|
|
775
|
+
|
|
776
|
+
const row: StatusRow = {
|
|
777
|
+
service: entry.name,
|
|
778
|
+
port: String(entry.port),
|
|
779
|
+
version: entry.version,
|
|
780
|
+
stateLabel,
|
|
781
|
+
pidLabel: snap?.pid !== undefined && snap?.pid !== null ? String(snap.pid) : "-",
|
|
782
|
+
uptimeLabel: "-",
|
|
783
|
+
healthDetail,
|
|
784
|
+
latencyLabel: "-",
|
|
785
|
+
sourceLabel: base.sourceLabel,
|
|
786
|
+
url: base.url,
|
|
787
|
+
healthy,
|
|
788
|
+
skipped,
|
|
789
|
+
};
|
|
790
|
+
if (base.driftWarning) row.driftWarning = base.driftWarning;
|
|
791
|
+
if (base.staleNote) row.staleNote = base.staleNote;
|
|
792
|
+
if (startErrorNote) row.startErrorNote = startErrorNote;
|
|
793
|
+
// Surface the degraded-read note ONCE — on the first module row so the
|
|
794
|
+
// operator sees why run-state is missing, without repeating it on every row.
|
|
795
|
+
if (moduleReadNote) {
|
|
796
|
+
row.managerNote = moduleReadNote;
|
|
797
|
+
moduleReadNote = undefined;
|
|
798
|
+
}
|
|
799
|
+
return row;
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
const hub = buildSupervisorHubRow({
|
|
803
|
+
configDir,
|
|
804
|
+
hubSrcDir,
|
|
805
|
+
installSourceDeps,
|
|
806
|
+
sup,
|
|
807
|
+
port,
|
|
808
|
+
hubHealthy,
|
|
809
|
+
});
|
|
810
|
+
// If the degraded-read note never landed on a module row (empty manifest),
|
|
811
|
+
// surface it on the hub row so the operator still sees the actionable hint.
|
|
812
|
+
if (moduleReadNote && !hub.managerNote) hub.managerNote = moduleReadNote;
|
|
813
|
+
rows.push(hub);
|
|
814
|
+
return rows;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
interface BuildSupervisorHubRowArgs {
|
|
818
|
+
configDir: string;
|
|
819
|
+
hubSrcDir: string;
|
|
820
|
+
installSourceDeps: DetectInstallSourceDeps;
|
|
821
|
+
sup: ResolvedStatusSupervisor;
|
|
822
|
+
port: number;
|
|
823
|
+
hubHealthy: boolean;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
/**
|
|
827
|
+
* Build the hub row from the platform manager + /health (design §6.4). The
|
|
828
|
+
* manager's `queryHubUnitState` is the run-state; `/health` is the liveness
|
|
829
|
+
* signal. Composition:
|
|
830
|
+
* - manager `active` + /health OK → `active` (running).
|
|
831
|
+
* - manager `active` + /health down → `failing` with a "starting/unhealthy"
|
|
832
|
+
* note (the unit is up but not serving yet).
|
|
833
|
+
* - manager `failed` → `failing` (surface the last-exit code).
|
|
834
|
+
* - manager `inactive` → `inactive`.
|
|
835
|
+
* - no on-box manager (container/Render/Fly) → lean on /health for liveness;
|
|
836
|
+
* report "container runtime (managed)".
|
|
837
|
+
* Never throws — a manager-query failure degrades to the /health verdict.
|
|
838
|
+
*/
|
|
839
|
+
function buildSupervisorHubRow(args: BuildSupervisorHubRowArgs): StatusRow {
|
|
840
|
+
const { configDir, hubSrcDir, installSourceDeps, sup, port, hubHealthy } = args;
|
|
841
|
+
const source = detectHubInstallSource(hubSrcDir, installSourceDeps);
|
|
842
|
+
const base: Omit<StatusRow, "stateLabel" | "pidLabel" | "uptimeLabel" | "healthy" | "skipped"> & {
|
|
843
|
+
healthDetail: string;
|
|
844
|
+
} = {
|
|
845
|
+
service: "parachute-hub (internal)",
|
|
846
|
+
port: String(port),
|
|
847
|
+
version: source.livePackageVersion ?? "-",
|
|
848
|
+
healthDetail: "-",
|
|
849
|
+
latencyLabel: "-",
|
|
850
|
+
sourceLabel: formatInstallSourceLabel(source),
|
|
851
|
+
url: `http://127.0.0.1:${port}`,
|
|
852
|
+
};
|
|
853
|
+
|
|
854
|
+
let managerState: HubUnitState;
|
|
855
|
+
let lastExitCode: number | undefined;
|
|
856
|
+
try {
|
|
857
|
+
const q = sup.queryHubUnitState(sup.hubUnitDeps);
|
|
858
|
+
managerState = q.state;
|
|
859
|
+
lastExitCode = q.lastExitCode;
|
|
860
|
+
} catch {
|
|
861
|
+
// The manager query must never crash status — fall back to /health only.
|
|
862
|
+
managerState = "unknown";
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
// No on-box manager (container / Render / Fly): there's nothing to query —
|
|
866
|
+
// `/health` is the sole liveness signal. Report the managed-runtime nuance.
|
|
867
|
+
if (managerState === "no-manager") {
|
|
868
|
+
return {
|
|
869
|
+
...base,
|
|
870
|
+
stateLabel: hubHealthy ? "active" : "failing",
|
|
871
|
+
pidLabel: "-",
|
|
872
|
+
uptimeLabel: "-",
|
|
873
|
+
healthDetail: hubHealthy ? "-" : "down",
|
|
874
|
+
healthy: hubHealthy,
|
|
875
|
+
skipped: hubHealthy,
|
|
876
|
+
managerNote: "container runtime (managed)",
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// Manager says failed: surface it as `failing` with the last-exit code even if
|
|
881
|
+
// a respawn happens to be answering /health right now.
|
|
882
|
+
if (managerState === "failed") {
|
|
883
|
+
return {
|
|
884
|
+
...base,
|
|
885
|
+
stateLabel: "failing",
|
|
886
|
+
pidLabel: "-",
|
|
887
|
+
uptimeLabel: "-",
|
|
888
|
+
healthDetail: hubHealthy ? "service manager reports failed" : "down",
|
|
889
|
+
healthy: false,
|
|
890
|
+
skipped: false,
|
|
891
|
+
managerNote:
|
|
892
|
+
lastExitCode !== undefined
|
|
893
|
+
? `service manager reports the hub unit failed (last exit code ${lastExitCode})`
|
|
894
|
+
: "service manager reports the hub unit failed",
|
|
895
|
+
};
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
// Manager says active.
|
|
899
|
+
if (managerState === "active") {
|
|
900
|
+
if (hubHealthy) {
|
|
901
|
+
return {
|
|
902
|
+
...base,
|
|
903
|
+
stateLabel: "active",
|
|
904
|
+
pidLabel: "-",
|
|
905
|
+
uptimeLabel: "-",
|
|
906
|
+
healthy: true,
|
|
907
|
+
skipped: true,
|
|
908
|
+
};
|
|
909
|
+
}
|
|
910
|
+
// Active per the manager but not answering /health: starting up or wedged.
|
|
911
|
+
return {
|
|
912
|
+
...base,
|
|
913
|
+
stateLabel: "failing",
|
|
914
|
+
pidLabel: "-",
|
|
915
|
+
uptimeLabel: "-",
|
|
916
|
+
healthDetail: "manager active, /health not answering",
|
|
917
|
+
healthy: false,
|
|
918
|
+
skipped: false,
|
|
919
|
+
managerNote:
|
|
920
|
+
"service manager reports active; /health not answering yet (starting or unhealthy)",
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
// Manager says activating: transient bring-up. If /health already answers,
|
|
925
|
+
// call it active; else show it as pending (in-flight).
|
|
926
|
+
if (managerState === "activating") {
|
|
927
|
+
return {
|
|
928
|
+
...base,
|
|
929
|
+
stateLabel: hubHealthy ? "active" : "pending",
|
|
930
|
+
pidLabel: "-",
|
|
931
|
+
uptimeLabel: "-",
|
|
932
|
+
healthy: true,
|
|
933
|
+
skipped: true,
|
|
934
|
+
...(hubHealthy ? {} : { managerNote: "service manager reports the hub unit is starting" }),
|
|
935
|
+
};
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
// Manager says inactive / unknown / no-unit (defensive — no-unit shouldn't
|
|
939
|
+
// reach here under the dual-dispatch). Trust /health as the tiebreaker: if the
|
|
940
|
+
// hub somehow answers, show active; else inactive.
|
|
941
|
+
if (hubHealthy) {
|
|
942
|
+
return {
|
|
943
|
+
...base,
|
|
944
|
+
stateLabel: "active",
|
|
945
|
+
pidLabel: "-",
|
|
946
|
+
uptimeLabel: "-",
|
|
947
|
+
healthy: true,
|
|
948
|
+
skipped: true,
|
|
949
|
+
};
|
|
950
|
+
}
|
|
951
|
+
return {
|
|
952
|
+
...base,
|
|
953
|
+
stateLabel: "inactive",
|
|
954
|
+
pidLabel: "-",
|
|
955
|
+
uptimeLabel: "-",
|
|
956
|
+
healthy: false,
|
|
957
|
+
skipped: true,
|
|
958
|
+
...(managerState === "unknown" ? { managerNote: "service manager state unknown" } : {}),
|
|
959
|
+
};
|
|
413
960
|
}
|