@openparachute/hub 0.7.4-rc.1 → 0.7.4-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -80,6 +80,12 @@ interface SupervisorArmOpts {
|
|
|
80
80
|
hubHealthy: boolean;
|
|
81
81
|
moduleStates?: ModuleStatesResult;
|
|
82
82
|
fetchModuleStatesImpl?: () => Promise<ModuleStatesResult>;
|
|
83
|
+
/**
|
|
84
|
+
* Inject the unauthenticated module-liveness probe (#700). Defaults to "every
|
|
85
|
+
* module is down" so the degraded-read tests don't accidentally hit the
|
|
86
|
+
* network; specific tests override to mark a module live.
|
|
87
|
+
*/
|
|
88
|
+
probeModuleHealth?: (port: number, health: string) => Promise<boolean>;
|
|
83
89
|
}
|
|
84
90
|
|
|
85
91
|
/** Drive `status` through the supervisor arm with fully stubbed seams. */
|
|
@@ -96,6 +102,7 @@ function supervisorOpts(configDir: string, path: string, o: SupervisorArmOpts) {
|
|
|
96
102
|
fetchModuleStates:
|
|
97
103
|
o.fetchModuleStatesImpl ??
|
|
98
104
|
(async () => o.moduleStates ?? { supervisorAvailable: true, modules: [] }),
|
|
105
|
+
probeModuleHealth: o.probeModuleHealth ?? (async () => false),
|
|
99
106
|
openDb: fakeOpenDb as unknown as (configDir: string) => import("bun:sqlite").Database,
|
|
100
107
|
},
|
|
101
108
|
};
|
|
@@ -377,7 +384,7 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
|
|
|
377
384
|
}
|
|
378
385
|
});
|
|
379
386
|
|
|
380
|
-
test("no operator token
|
|
387
|
+
test("no operator token (fresh box, no admin) → note targets set-password, NOT rotate-operator (#700)", async () => {
|
|
381
388
|
const { path, configDir, cleanup } = makeTempPath();
|
|
382
389
|
try {
|
|
383
390
|
upsertService(
|
|
@@ -392,15 +399,121 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
|
|
|
392
399
|
fetchModuleStatesImpl: async () => {
|
|
393
400
|
throw new NoOperatorTokenError();
|
|
394
401
|
},
|
|
402
|
+
// No probe-live module here → row stays inactive (exit 0).
|
|
403
|
+
probeModuleHealth: async () => false,
|
|
395
404
|
}),
|
|
396
405
|
print: (l) => lines.push(l),
|
|
397
406
|
});
|
|
398
407
|
// We could not read run-state, but didn't crash. The module row falls back
|
|
399
|
-
// to `inactive` (no supervisor snapshot) — a stopped row is exit 0.
|
|
408
|
+
// to `inactive` (no supervisor snapshot, probe down) — a stopped row is exit 0.
|
|
400
409
|
expect(code).toBe(0);
|
|
401
410
|
const out = lines.join("\n");
|
|
402
411
|
expect(out).toMatch(/parachute-vault/);
|
|
403
|
-
|
|
412
|
+
// #700: a fresh box has no admin, so rotate-operator would itself error.
|
|
413
|
+
// The note must point at set-password and must NOT be the bare
|
|
414
|
+
// rotate-operator guidance.
|
|
415
|
+
expect(out).toMatch(/parachute auth set-password/);
|
|
416
|
+
expect(out).not.toMatch(/run `parachute auth rotate-operator` to mint an operator token/);
|
|
417
|
+
const vaultLine = lines.find((l) => l.includes("parachute-vault"));
|
|
418
|
+
expect(vaultLine).toMatch(/\binactive\b/);
|
|
419
|
+
} finally {
|
|
420
|
+
cleanup();
|
|
421
|
+
}
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
test("no operator token + module answers /health probe → LIVE (active), not inactive (#700)", async () => {
|
|
425
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
426
|
+
try {
|
|
427
|
+
upsertService(
|
|
428
|
+
{ name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
|
|
429
|
+
path,
|
|
430
|
+
);
|
|
431
|
+
const probed: Array<{ port: number; health: string }> = [];
|
|
432
|
+
const lines: string[] = [];
|
|
433
|
+
const code = await status({
|
|
434
|
+
...supervisorOpts(configDir, path, {
|
|
435
|
+
managerState: { state: "active" },
|
|
436
|
+
hubHealthy: true,
|
|
437
|
+
fetchModuleStatesImpl: async () => {
|
|
438
|
+
throw new NoOperatorTokenError();
|
|
439
|
+
},
|
|
440
|
+
// vault is genuinely up — its /health answers (2xx or 401 → live).
|
|
441
|
+
probeModuleHealth: async (port, health) => {
|
|
442
|
+
probed.push({ port, health });
|
|
443
|
+
return true;
|
|
444
|
+
},
|
|
445
|
+
}),
|
|
446
|
+
print: (l) => lines.push(l),
|
|
447
|
+
});
|
|
448
|
+
expect(code).toBe(0);
|
|
449
|
+
// The probe targeted the module's own port + health path from the manifest.
|
|
450
|
+
expect(probed).toEqual([{ port: 1940, health: "/health" }]);
|
|
451
|
+
const vaultLine = lines.find((l) => l.includes("parachute-vault"));
|
|
452
|
+
expect(vaultLine).toMatch(/\bactive\b/);
|
|
453
|
+
expect(vaultLine).not.toMatch(/\binactive\b/);
|
|
454
|
+
const out = lines.join("\n");
|
|
455
|
+
// The row is labelled as probe-derived so the operator knows it's thin.
|
|
456
|
+
expect(out).toMatch(/live via unauthenticated health probe/);
|
|
457
|
+
// The degraded-read hint still appears (why PID/uptime are absent).
|
|
458
|
+
expect(out).toMatch(/parachute auth set-password/);
|
|
459
|
+
} finally {
|
|
460
|
+
cleanup();
|
|
461
|
+
}
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
test("degraded read + module probe FAILS → row stays inactive (#700)", async () => {
|
|
465
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
466
|
+
try {
|
|
467
|
+
upsertService(
|
|
468
|
+
{ name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
|
|
469
|
+
path,
|
|
470
|
+
);
|
|
471
|
+
const lines: string[] = [];
|
|
472
|
+
const code = await status({
|
|
473
|
+
...supervisorOpts(configDir, path, {
|
|
474
|
+
managerState: { state: "active" },
|
|
475
|
+
hubHealthy: true,
|
|
476
|
+
fetchModuleStatesImpl: async () => {
|
|
477
|
+
throw new NoOperatorTokenError();
|
|
478
|
+
},
|
|
479
|
+
probeModuleHealth: async () => false,
|
|
480
|
+
}),
|
|
481
|
+
print: (l) => lines.push(l),
|
|
482
|
+
});
|
|
483
|
+
expect(code).toBe(0);
|
|
484
|
+
const vaultLine = lines.find((l) => l.includes("parachute-vault"));
|
|
485
|
+
expect(vaultLine).toMatch(/\binactive\b/);
|
|
486
|
+
const out = lines.join("\n");
|
|
487
|
+
expect(out).not.toMatch(/live via unauthenticated health probe/);
|
|
488
|
+
} finally {
|
|
489
|
+
cleanup();
|
|
490
|
+
}
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
test("a throwing module probe never crashes status — row degrades to inactive (#700)", async () => {
|
|
494
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
495
|
+
try {
|
|
496
|
+
upsertService(
|
|
497
|
+
{ name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
|
|
498
|
+
path,
|
|
499
|
+
);
|
|
500
|
+
const lines: string[] = [];
|
|
501
|
+
const code = await status({
|
|
502
|
+
...supervisorOpts(configDir, path, {
|
|
503
|
+
managerState: { state: "active" },
|
|
504
|
+
hubHealthy: true,
|
|
505
|
+
fetchModuleStatesImpl: async () => {
|
|
506
|
+
throw new NoOperatorTokenError();
|
|
507
|
+
},
|
|
508
|
+
probeModuleHealth: async () => {
|
|
509
|
+
throw new Error("probe exploded");
|
|
510
|
+
},
|
|
511
|
+
}),
|
|
512
|
+
print: (l) => lines.push(l),
|
|
513
|
+
});
|
|
514
|
+
expect(code).toBe(0);
|
|
515
|
+
const vaultLine = lines.find((l) => l.includes("parachute-vault"));
|
|
516
|
+
expect(vaultLine).toMatch(/\binactive\b/);
|
|
404
517
|
} finally {
|
|
405
518
|
cleanup();
|
|
406
519
|
}
|
|
@@ -433,6 +546,42 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
|
|
|
433
546
|
}
|
|
434
547
|
});
|
|
435
548
|
|
|
549
|
+
test("expired operator token + module answers /health probe → LIVE (active) (#700)", async () => {
|
|
550
|
+
// Symmetry with the no-token case: the unauthenticated probe fallback fires
|
|
551
|
+
// on ANY degraded read where the hub is up + run-state is missing, so an
|
|
552
|
+
// expired-token box still shows a genuinely-serving module as `active`.
|
|
553
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
554
|
+
try {
|
|
555
|
+
upsertService(
|
|
556
|
+
{ name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
|
|
557
|
+
path,
|
|
558
|
+
);
|
|
559
|
+
const lines: string[] = [];
|
|
560
|
+
const code = await status({
|
|
561
|
+
...supervisorOpts(configDir, path, {
|
|
562
|
+
managerState: { state: "active" },
|
|
563
|
+
hubHealthy: true,
|
|
564
|
+
fetchModuleStatesImpl: async () => {
|
|
565
|
+
throw new OperatorTokenExpiredError(
|
|
566
|
+
"token expired — run `parachute auth rotate-operator`",
|
|
567
|
+
);
|
|
568
|
+
},
|
|
569
|
+
probeModuleHealth: async () => true,
|
|
570
|
+
}),
|
|
571
|
+
print: (l) => lines.push(l),
|
|
572
|
+
});
|
|
573
|
+
expect(code).toBe(0);
|
|
574
|
+
const vaultLine = lines.find((l) => l.includes("parachute-vault"));
|
|
575
|
+
expect(vaultLine).toMatch(/\bactive\b/);
|
|
576
|
+
const out = lines.join("\n");
|
|
577
|
+
expect(out).toMatch(/live via unauthenticated health probe/);
|
|
578
|
+
// The expired-token degraded-read hint still points at rotate-operator.
|
|
579
|
+
expect(out).toMatch(/rotate-operator/);
|
|
580
|
+
} finally {
|
|
581
|
+
cleanup();
|
|
582
|
+
}
|
|
583
|
+
});
|
|
584
|
+
|
|
436
585
|
test("API error reading module states → degrade with the message, no crash", async () => {
|
|
437
586
|
const { path, configDir, cleanup } = makeTempPath();
|
|
438
587
|
try {
|
package/src/commands/status.ts
CHANGED
|
@@ -19,8 +19,8 @@ import {
|
|
|
19
19
|
} from "../install-source.ts";
|
|
20
20
|
import {
|
|
21
21
|
type DriveModuleOpDeps,
|
|
22
|
-
type ModuleStatesResult,
|
|
23
22
|
type ModuleStateSnapshot,
|
|
23
|
+
type ModuleStatesResult,
|
|
24
24
|
NoOperatorTokenError,
|
|
25
25
|
OperatorTokenExpiredError,
|
|
26
26
|
fetchModuleStates as fetchModuleStatesImpl,
|
|
@@ -71,6 +71,17 @@ export interface StatusOpts {
|
|
|
71
71
|
probeHubHealth?: (port: number) => Promise<boolean>;
|
|
72
72
|
/** Read the running supervisor's module states (§6.4 module rows). */
|
|
73
73
|
fetchModuleStates?: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
|
|
74
|
+
/**
|
|
75
|
+
* Unauthenticated module-liveness probe (#700). Used ONLY on the degraded
|
|
76
|
+
* path where the supervisor run-state read couldn't run (no/expired/invalid
|
|
77
|
+
* operator token, or any API error) but the hub itself is up: probes a
|
|
78
|
+
* module's own `/health` directly on its loopback port. Treats 2xx AND 401
|
|
79
|
+
* as live (mirrors the "auth-gated health = healthy" rule, #423: a module
|
|
80
|
+
* that answers 401 is authenticated-but-alive, not down). Bounded; never
|
|
81
|
+
* throws. Production reuses the same bounded fetch shape as the hub probe;
|
|
82
|
+
* tests inject so they don't hit the network.
|
|
83
|
+
*/
|
|
84
|
+
probeModuleHealth?: (port: number, health: string) => Promise<boolean>;
|
|
74
85
|
/**
|
|
75
86
|
* Open the hub DB used to validate/auto-rotate the operator token in
|
|
76
87
|
* `fetchModuleStates`. Production opens `<configDir>/hub.db`; tests inject a
|
|
@@ -162,6 +173,15 @@ interface StatusRow {
|
|
|
162
173
|
* Printed on a continuation line like the other notes.
|
|
163
174
|
*/
|
|
164
175
|
managerNote?: string;
|
|
176
|
+
/**
|
|
177
|
+
* Set on a module row whose STATE was derived from an unauthenticated
|
|
178
|
+
* `/health` probe rather than the supervisor's run-state (#700) — the
|
|
179
|
+
* degraded-read fallback (no/expired operator token, or an API error) where
|
|
180
|
+
* the module is genuinely serving. Tells the operator the row is live-but-
|
|
181
|
+
* thin: no PID/uptime/structured run-state until they sign in. Printed on a
|
|
182
|
+
* continuation line like the other notes.
|
|
183
|
+
*/
|
|
184
|
+
probeNote?: string;
|
|
165
185
|
}
|
|
166
186
|
|
|
167
187
|
/**
|
|
@@ -319,6 +339,7 @@ function renderRows(rows: StatusRow[], print: (line: string) => void): void {
|
|
|
319
339
|
print(` ! probe: ${row.healthDetail}`);
|
|
320
340
|
}
|
|
321
341
|
if (row.managerNote) print(` ! ${row.managerNote}`);
|
|
342
|
+
if (row.probeNote) print(` → ${row.probeNote}`);
|
|
322
343
|
if (row.driftWarning) print(` ! ${row.driftWarning}`);
|
|
323
344
|
if (row.staleNote) print(` ! ${row.staleNote}`);
|
|
324
345
|
if (row.startErrorNote) print(` ! ${row.startErrorNote}`);
|
|
@@ -336,12 +357,33 @@ function renderRows(rows: StatusRow[], print: (line: string) => void): void {
|
|
|
336
357
|
// in Phase 5b.
|
|
337
358
|
// ---------------------------------------------------------------------------
|
|
338
359
|
|
|
360
|
+
/**
|
|
361
|
+
* Default unauthenticated module-liveness probe (#700). A bounded `fetch` to the
|
|
362
|
+
* module's own `http://127.0.0.1:<port><health>`. Treats 2xx AND 401 as live —
|
|
363
|
+
* an auth-gated `/health` that answers 401 is authenticated-but-alive, not down
|
|
364
|
+
* (the "auth-gated health = healthy" rule, #423). Any other status / network
|
|
365
|
+
* error / timeout → false. 1.5s timeout, mirroring hub-unit's `defaultProbeHealth`.
|
|
366
|
+
*/
|
|
367
|
+
async function defaultProbeModuleHealth(port: number, health: string): Promise<boolean> {
|
|
368
|
+
try {
|
|
369
|
+
const res = await fetch(`http://127.0.0.1:${port}${health}`, {
|
|
370
|
+
signal: AbortSignal.timeout(1500),
|
|
371
|
+
// Loopback-only target, but never chase a redirect off-box (defensive).
|
|
372
|
+
redirect: "manual",
|
|
373
|
+
});
|
|
374
|
+
return res.ok || res.status === 401;
|
|
375
|
+
} catch {
|
|
376
|
+
return false;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
339
380
|
/** Resolved supervisor-path seams (see `StatusOpts.supervisor`). */
|
|
340
381
|
interface ResolvedStatusSupervisor {
|
|
341
382
|
hubUnitDeps: HubUnitDeps;
|
|
342
383
|
queryHubUnitState: (deps: HubUnitDeps) => HubUnitStateResult;
|
|
343
384
|
probeHubHealth: (port: number) => Promise<boolean>;
|
|
344
385
|
fetchModuleStates: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
|
|
386
|
+
probeModuleHealth: (port: number, health: string) => Promise<boolean>;
|
|
345
387
|
openDb: (configDir: string) => Database;
|
|
346
388
|
baseUrl: string | undefined;
|
|
347
389
|
}
|
|
@@ -357,6 +399,7 @@ function resolveStatusSupervisor(opts: StatusOpts["supervisor"]): ResolvedStatus
|
|
|
357
399
|
queryHubUnitState: opts?.queryHubUnitState ?? queryHubUnitStateImpl,
|
|
358
400
|
probeHubHealth: opts?.probeHubHealth ?? hubUnitDeps.probeHealth,
|
|
359
401
|
fetchModuleStates: opts?.fetchModuleStates ?? fetchModuleStatesImpl,
|
|
402
|
+
probeModuleHealth: opts?.probeModuleHealth ?? defaultProbeModuleHealth,
|
|
360
403
|
openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
|
|
361
404
|
baseUrl: opts?.baseUrl,
|
|
362
405
|
};
|
|
@@ -471,10 +514,17 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
|
|
|
471
514
|
...(sup.baseUrl !== undefined ? { baseUrl: sup.baseUrl } : {}),
|
|
472
515
|
});
|
|
473
516
|
} catch (err) {
|
|
474
|
-
if (err instanceof NoOperatorTokenError
|
|
475
|
-
// No
|
|
476
|
-
//
|
|
477
|
-
//
|
|
517
|
+
if (err instanceof NoOperatorTokenError) {
|
|
518
|
+
// No operator token AND none can be minted yet — on a fresh box the
|
|
519
|
+
// first admin doesn't exist, so `rotate-operator` would itself hard-error
|
|
520
|
+
// ("no hub users yet"). Point at `set-password` (create the first admin),
|
|
521
|
+
// the actual unblocking step. We still can't read run-state, but the hub
|
|
522
|
+
// is up — degrade gracefully (§6.4), do NOT 401-crash status (#700).
|
|
523
|
+
moduleReadNote =
|
|
524
|
+
"couldn't read live module state — run `parachute auth set-password` to create the first admin (then `parachute auth rotate-operator`)";
|
|
525
|
+
} else if (err instanceof OperatorTokenExpiredError) {
|
|
526
|
+
// Token exists but is stale: an admin already exists, so re-minting works.
|
|
527
|
+
// Keep the rotate-operator guidance.
|
|
478
528
|
moduleReadNote =
|
|
479
529
|
"couldn't read live module state — run `parachute auth rotate-operator` to mint an operator token";
|
|
480
530
|
} else {
|
|
@@ -500,6 +550,26 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
|
|
|
500
550
|
if (m.short && !stateByShort.has(m.short)) stateByShort.set(m.short, m);
|
|
501
551
|
}
|
|
502
552
|
|
|
553
|
+
// Unauthenticated-liveness fallback (#700). On the degraded path — the hub is
|
|
554
|
+
// up but we couldn't read supervisor run-state (no/expired operator token, or
|
|
555
|
+
// an API error) — probe each module's own `/health` directly so a module that
|
|
556
|
+
// is genuinely serving reads LIVE instead of being mapped null→`inactive`
|
|
557
|
+
// (which falsely told fresh-box operators a working install was broken). Keyed
|
|
558
|
+
// by the unique `entry.name`; probed concurrently, bounded, never throws.
|
|
559
|
+
const probeAlive = new Map<string, boolean>();
|
|
560
|
+
if (hubHealthy && !states) {
|
|
561
|
+
await Promise.all(
|
|
562
|
+
manifest.services.map(async (entry) => {
|
|
563
|
+
try {
|
|
564
|
+
const alive = await sup.probeModuleHealth(entry.port, entry.health);
|
|
565
|
+
if (alive) probeAlive.set(entry.name, true);
|
|
566
|
+
} catch {
|
|
567
|
+
// Probe must never crash status — absent from the map = treated as down.
|
|
568
|
+
}
|
|
569
|
+
}),
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
|
|
503
573
|
const rows: StatusRow[] = manifest.services.map((entry) => {
|
|
504
574
|
const base = manifestRowBase(entry, installSourceDeps);
|
|
505
575
|
const snap = base.short ? stateByShort.get(base.short) : undefined;
|
|
@@ -526,6 +596,39 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
|
|
|
526
596
|
};
|
|
527
597
|
}
|
|
528
598
|
|
|
599
|
+
// Degraded read, but the module answered an unauthenticated `/health` probe
|
|
600
|
+
// (#700): show it LIVE instead of null→`inactive`. We can't surface PID/
|
|
601
|
+
// uptime/structured run-state (those need the operator token), so keep the
|
|
602
|
+
// degraded `moduleReadNote` AND add a probe-derived continuation note so the
|
|
603
|
+
// operator understands the row is from a liveness probe, not full supervisor
|
|
604
|
+
// state. `skipped: true` keeps a working install at exit 0.
|
|
605
|
+
if (!snap && probeAlive.get(entry.name)) {
|
|
606
|
+
const row: StatusRow = {
|
|
607
|
+
service: entry.name,
|
|
608
|
+
port: String(entry.port),
|
|
609
|
+
version: entry.version,
|
|
610
|
+
stateLabel: "active",
|
|
611
|
+
pidLabel: "-",
|
|
612
|
+
uptimeLabel: "-",
|
|
613
|
+
healthDetail: "-",
|
|
614
|
+
latencyLabel: "-",
|
|
615
|
+
sourceLabel: base.sourceLabel,
|
|
616
|
+
url: base.url,
|
|
617
|
+
healthy: true,
|
|
618
|
+
skipped: true,
|
|
619
|
+
};
|
|
620
|
+
row.probeNote = "live via unauthenticated health probe — sign in for full supervisor state";
|
|
621
|
+
if (base.driftWarning) row.driftWarning = base.driftWarning;
|
|
622
|
+
if (base.staleNote) row.staleNote = base.staleNote;
|
|
623
|
+
if (base.manifestStartErrorNote) row.startErrorNote = base.manifestStartErrorNote;
|
|
624
|
+
// Surface the degraded-read note ONCE (first module row), same as below.
|
|
625
|
+
if (moduleReadNote) {
|
|
626
|
+
row.managerNote = moduleReadNote;
|
|
627
|
+
moduleReadNote = undefined;
|
|
628
|
+
}
|
|
629
|
+
return row;
|
|
630
|
+
}
|
|
631
|
+
|
|
529
632
|
const { stateLabel, healthy, skipped } = mapSupervisorStatus(snap?.supervisor_status ?? null);
|
|
530
633
|
// Prefer the supervisor's structured start-error (live), else the persisted
|
|
531
634
|
// services.json note — same friendly surface either way (#188).
|