@openparachute/hub 0.7.4-rc.1 → 0.7.4-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/hub",
3
- "version": "0.7.4-rc.1",
3
+ "version": "0.7.4-rc.2",
4
4
  "description": "parachute — the local hub for the Parachute ecosystem (discovery, ports, lifecycle, soon OAuth).",
5
5
  "license": "AGPL-3.0",
6
6
  "publishConfig": {
@@ -80,6 +80,12 @@ interface SupervisorArmOpts {
80
80
  hubHealthy: boolean;
81
81
  moduleStates?: ModuleStatesResult;
82
82
  fetchModuleStatesImpl?: () => Promise<ModuleStatesResult>;
83
+ /**
84
+ * Inject the unauthenticated module-liveness probe (#700). Defaults to "every
85
+ * module is down" so the degraded-read tests don't accidentally hit the
86
+ * network; specific tests override to mark a module live.
87
+ */
88
+ probeModuleHealth?: (port: number, health: string) => Promise<boolean>;
83
89
  }
84
90
 
85
91
  /** Drive `status` through the supervisor arm with fully stubbed seams. */
@@ -96,6 +102,7 @@ function supervisorOpts(configDir: string, path: string, o: SupervisorArmOpts) {
96
102
  fetchModuleStates:
97
103
  o.fetchModuleStatesImpl ??
98
104
  (async () => o.moduleStates ?? { supervisorAvailable: true, modules: [] }),
105
+ probeModuleHealth: o.probeModuleHealth ?? (async () => false),
99
106
  openDb: fakeOpenDb as unknown as (configDir: string) => import("bun:sqlite").Database,
100
107
  },
101
108
  };
@@ -377,7 +384,7 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
377
384
  }
378
385
  });
379
386
 
380
- test("no operator token graceful degrade (manifest rows + actionable hint), no 401 crash", async () => {
387
+ test("no operator token (fresh box, no admin) note targets set-password, NOT rotate-operator (#700)", async () => {
381
388
  const { path, configDir, cleanup } = makeTempPath();
382
389
  try {
383
390
  upsertService(
@@ -392,15 +399,121 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
392
399
  fetchModuleStatesImpl: async () => {
393
400
  throw new NoOperatorTokenError();
394
401
  },
402
+ // No probe-live module here → row stays inactive (exit 0).
403
+ probeModuleHealth: async () => false,
395
404
  }),
396
405
  print: (l) => lines.push(l),
397
406
  });
398
407
  // We could not read run-state, but didn't crash. The module row falls back
399
- // to `inactive` (no supervisor snapshot) — a stopped row is exit 0.
408
+ // to `inactive` (no supervisor snapshot, probe down) — a stopped row is exit 0.
400
409
  expect(code).toBe(0);
401
410
  const out = lines.join("\n");
402
411
  expect(out).toMatch(/parachute-vault/);
403
- expect(out).toMatch(/run `parachute auth rotate-operator`/);
412
+ // #700: a fresh box has no admin, so rotate-operator would itself error.
413
+ // The note must point at set-password and must NOT be the bare
414
+ // rotate-operator guidance.
415
+ expect(out).toMatch(/parachute auth set-password/);
416
+ expect(out).not.toMatch(/run `parachute auth rotate-operator` to mint an operator token/);
417
+ const vaultLine = lines.find((l) => l.includes("parachute-vault"));
418
+ expect(vaultLine).toMatch(/\binactive\b/);
419
+ } finally {
420
+ cleanup();
421
+ }
422
+ });
423
+
424
+ test("no operator token + module answers /health probe → LIVE (active), not inactive (#700)", async () => {
425
+ const { path, configDir, cleanup } = makeTempPath();
426
+ try {
427
+ upsertService(
428
+ { name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
429
+ path,
430
+ );
431
+ const probed: Array<{ port: number; health: string }> = [];
432
+ const lines: string[] = [];
433
+ const code = await status({
434
+ ...supervisorOpts(configDir, path, {
435
+ managerState: { state: "active" },
436
+ hubHealthy: true,
437
+ fetchModuleStatesImpl: async () => {
438
+ throw new NoOperatorTokenError();
439
+ },
440
+ // vault is genuinely up — its /health answers (2xx or 401 → live).
441
+ probeModuleHealth: async (port, health) => {
442
+ probed.push({ port, health });
443
+ return true;
444
+ },
445
+ }),
446
+ print: (l) => lines.push(l),
447
+ });
448
+ expect(code).toBe(0);
449
+ // The probe targeted the module's own port + health path from the manifest.
450
+ expect(probed).toEqual([{ port: 1940, health: "/health" }]);
451
+ const vaultLine = lines.find((l) => l.includes("parachute-vault"));
452
+ expect(vaultLine).toMatch(/\bactive\b/);
453
+ expect(vaultLine).not.toMatch(/\binactive\b/);
454
+ const out = lines.join("\n");
455
+ // The row is labelled as probe-derived so the operator knows it's thin.
456
+ expect(out).toMatch(/live via unauthenticated health probe/);
457
+ // The degraded-read hint still appears (why PID/uptime are absent).
458
+ expect(out).toMatch(/parachute auth set-password/);
459
+ } finally {
460
+ cleanup();
461
+ }
462
+ });
463
+
464
+ test("degraded read + module probe FAILS → row stays inactive (#700)", async () => {
465
+ const { path, configDir, cleanup } = makeTempPath();
466
+ try {
467
+ upsertService(
468
+ { name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
469
+ path,
470
+ );
471
+ const lines: string[] = [];
472
+ const code = await status({
473
+ ...supervisorOpts(configDir, path, {
474
+ managerState: { state: "active" },
475
+ hubHealthy: true,
476
+ fetchModuleStatesImpl: async () => {
477
+ throw new NoOperatorTokenError();
478
+ },
479
+ probeModuleHealth: async () => false,
480
+ }),
481
+ print: (l) => lines.push(l),
482
+ });
483
+ expect(code).toBe(0);
484
+ const vaultLine = lines.find((l) => l.includes("parachute-vault"));
485
+ expect(vaultLine).toMatch(/\binactive\b/);
486
+ const out = lines.join("\n");
487
+ expect(out).not.toMatch(/live via unauthenticated health probe/);
488
+ } finally {
489
+ cleanup();
490
+ }
491
+ });
492
+
493
+ test("a throwing module probe never crashes status — row degrades to inactive (#700)", async () => {
494
+ const { path, configDir, cleanup } = makeTempPath();
495
+ try {
496
+ upsertService(
497
+ { name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
498
+ path,
499
+ );
500
+ const lines: string[] = [];
501
+ const code = await status({
502
+ ...supervisorOpts(configDir, path, {
503
+ managerState: { state: "active" },
504
+ hubHealthy: true,
505
+ fetchModuleStatesImpl: async () => {
506
+ throw new NoOperatorTokenError();
507
+ },
508
+ probeModuleHealth: async () => {
509
+ throw new Error("probe exploded");
510
+ },
511
+ }),
512
+ print: (l) => lines.push(l),
513
+ });
514
+ expect(code).toBe(0);
515
+ const vaultLine = lines.find((l) => l.includes("parachute-vault"));
516
+ expect(vaultLine).toMatch(/\binactive\b/);
404
517
  } finally {
405
518
  cleanup();
406
519
  }
@@ -433,6 +546,42 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
433
546
  }
434
547
  });
435
548
 
549
+ test("expired operator token + module answers /health probe → LIVE (active) (#700)", async () => {
550
+ // Symmetry with the no-token case: the unauthenticated probe fallback fires
551
+ // on ANY degraded read where the hub is up + run-state is missing, so an
552
+ // expired-token box still shows a genuinely-serving module as `active`.
553
+ const { path, configDir, cleanup } = makeTempPath();
554
+ try {
555
+ upsertService(
556
+ { name: "parachute-vault", port: 1940, paths: ["/"], health: "/health", version: "0.6.2" },
557
+ path,
558
+ );
559
+ const lines: string[] = [];
560
+ const code = await status({
561
+ ...supervisorOpts(configDir, path, {
562
+ managerState: { state: "active" },
563
+ hubHealthy: true,
564
+ fetchModuleStatesImpl: async () => {
565
+ throw new OperatorTokenExpiredError(
566
+ "token expired — run `parachute auth rotate-operator`",
567
+ );
568
+ },
569
+ probeModuleHealth: async () => true,
570
+ }),
571
+ print: (l) => lines.push(l),
572
+ });
573
+ expect(code).toBe(0);
574
+ const vaultLine = lines.find((l) => l.includes("parachute-vault"));
575
+ expect(vaultLine).toMatch(/\bactive\b/);
576
+ const out = lines.join("\n");
577
+ expect(out).toMatch(/live via unauthenticated health probe/);
578
+ // The expired-token degraded-read hint still points at rotate-operator.
579
+ expect(out).toMatch(/rotate-operator/);
580
+ } finally {
581
+ cleanup();
582
+ }
583
+ });
584
+
436
585
  test("API error reading module states → degrade with the message, no crash", async () => {
437
586
  const { path, configDir, cleanup } = makeTempPath();
438
587
  try {
@@ -19,8 +19,8 @@ import {
19
19
  } from "../install-source.ts";
20
20
  import {
21
21
  type DriveModuleOpDeps,
22
- type ModuleStatesResult,
23
22
  type ModuleStateSnapshot,
23
+ type ModuleStatesResult,
24
24
  NoOperatorTokenError,
25
25
  OperatorTokenExpiredError,
26
26
  fetchModuleStates as fetchModuleStatesImpl,
@@ -71,6 +71,17 @@ export interface StatusOpts {
71
71
  probeHubHealth?: (port: number) => Promise<boolean>;
72
72
  /** Read the running supervisor's module states (§6.4 module rows). */
73
73
  fetchModuleStates?: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
74
+ /**
75
+ * Unauthenticated module-liveness probe (#700). Used ONLY on the degraded
76
+ * path where the supervisor run-state read couldn't run (no/expired/invalid
77
+ * operator token, or any API error) but the hub itself is up: probes a
78
+ * module's own `/health` directly on its loopback port. Treats 2xx AND 401
79
+ * as live (mirrors the "auth-gated health = healthy" rule, #423: a module
80
+ * that answers 401 is authenticated-but-alive, not down). Bounded; never
81
+ * throws. Production reuses the same bounded fetch shape as the hub probe;
82
+ * tests inject so they don't hit the network.
83
+ */
84
+ probeModuleHealth?: (port: number, health: string) => Promise<boolean>;
74
85
  /**
75
86
  * Open the hub DB used to validate/auto-rotate the operator token in
76
87
  * `fetchModuleStates`. Production opens `<configDir>/hub.db`; tests inject a
@@ -162,6 +173,15 @@ interface StatusRow {
162
173
  * Printed on a continuation line like the other notes.
163
174
  */
164
175
  managerNote?: string;
176
+ /**
177
+ * Set on a module row whose STATE was derived from an unauthenticated
178
+ * `/health` probe rather than the supervisor's run-state (#700) — the
179
+ * degraded-read fallback (no/expired operator token, or an API error) where
180
+ * the module is genuinely serving. Tells the operator the row is live-but-
181
+ * thin: no PID/uptime/structured run-state until they sign in. Printed on a
182
+ * continuation line like the other notes.
183
+ */
184
+ probeNote?: string;
165
185
  }
166
186
 
167
187
  /**
@@ -319,6 +339,7 @@ function renderRows(rows: StatusRow[], print: (line: string) => void): void {
319
339
  print(` ! probe: ${row.healthDetail}`);
320
340
  }
321
341
  if (row.managerNote) print(` ! ${row.managerNote}`);
342
+ if (row.probeNote) print(` → ${row.probeNote}`);
322
343
  if (row.driftWarning) print(` ! ${row.driftWarning}`);
323
344
  if (row.staleNote) print(` ! ${row.staleNote}`);
324
345
  if (row.startErrorNote) print(` ! ${row.startErrorNote}`);
@@ -336,12 +357,33 @@ function renderRows(rows: StatusRow[], print: (line: string) => void): void {
336
357
  // in Phase 5b.
337
358
  // ---------------------------------------------------------------------------
338
359
 
360
+ /**
361
+ * Default unauthenticated module-liveness probe (#700). A bounded `fetch` to the
362
+ * module's own `http://127.0.0.1:<port><health>`. Treats 2xx AND 401 as live —
363
+ * an auth-gated `/health` that answers 401 is authenticated-but-alive, not down
364
+ * (the "auth-gated health = healthy" rule, #423). Any other status / network
365
+ * error / timeout → false. 1.5s timeout, mirroring hub-unit's `defaultProbeHealth`.
366
+ */
367
+ async function defaultProbeModuleHealth(port: number, health: string): Promise<boolean> {
368
+ try {
369
+ const res = await fetch(`http://127.0.0.1:${port}${health}`, {
370
+ signal: AbortSignal.timeout(1500),
371
+ // Loopback-only target, but never chase a redirect off-box (defensive).
372
+ redirect: "manual",
373
+ });
374
+ return res.ok || res.status === 401;
375
+ } catch {
376
+ return false;
377
+ }
378
+ }
379
+
339
380
  /** Resolved supervisor-path seams (see `StatusOpts.supervisor`). */
340
381
  interface ResolvedStatusSupervisor {
341
382
  hubUnitDeps: HubUnitDeps;
342
383
  queryHubUnitState: (deps: HubUnitDeps) => HubUnitStateResult;
343
384
  probeHubHealth: (port: number) => Promise<boolean>;
344
385
  fetchModuleStates: (deps: DriveModuleOpDeps) => Promise<ModuleStatesResult>;
386
+ probeModuleHealth: (port: number, health: string) => Promise<boolean>;
345
387
  openDb: (configDir: string) => Database;
346
388
  baseUrl: string | undefined;
347
389
  }
@@ -357,6 +399,7 @@ function resolveStatusSupervisor(opts: StatusOpts["supervisor"]): ResolvedStatus
357
399
  queryHubUnitState: opts?.queryHubUnitState ?? queryHubUnitStateImpl,
358
400
  probeHubHealth: opts?.probeHubHealth ?? hubUnitDeps.probeHealth,
359
401
  fetchModuleStates: opts?.fetchModuleStates ?? fetchModuleStatesImpl,
402
+ probeModuleHealth: opts?.probeModuleHealth ?? defaultProbeModuleHealth,
360
403
  openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
361
404
  baseUrl: opts?.baseUrl,
362
405
  };
@@ -471,10 +514,17 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
471
514
  ...(sup.baseUrl !== undefined ? { baseUrl: sup.baseUrl } : {}),
472
515
  });
473
516
  } catch (err) {
474
- if (err instanceof NoOperatorTokenError || err instanceof OperatorTokenExpiredError) {
475
- // No / expired operator token: we can't read module run-state, but the
476
- // hub is up. Show the manifest-derived rows with an actionable note —
477
- // do NOT 401-crash status (§6.4 graceful degradation).
517
+ if (err instanceof NoOperatorTokenError) {
518
+ // No operator token AND none can be minted yet on a fresh box the
519
+ // first admin doesn't exist, so `rotate-operator` would itself hard-error
520
+ // ("no hub users yet"). Point at `set-password` (create the first admin),
521
+ // the actual unblocking step. We still can't read run-state, but the hub
522
+ // is up — degrade gracefully (§6.4), do NOT 401-crash status (#700).
523
+ moduleReadNote =
524
+ "couldn't read live module state — run `parachute auth set-password` to create the first admin (then `parachute auth rotate-operator`)";
525
+ } else if (err instanceof OperatorTokenExpiredError) {
526
+ // Token exists but is stale: an admin already exists, so re-minting works.
527
+ // Keep the rotate-operator guidance.
478
528
  moduleReadNote =
479
529
  "couldn't read live module state — run `parachute auth rotate-operator` to mint an operator token";
480
530
  } else {
@@ -500,6 +550,26 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
500
550
  if (m.short && !stateByShort.has(m.short)) stateByShort.set(m.short, m);
501
551
  }
502
552
 
553
+ // Unauthenticated-liveness fallback (#700). On the degraded path — the hub is
554
+ // up but we couldn't read supervisor run-state (no/expired operator token, or
555
+ // an API error) — probe each module's own `/health` directly so a module that
556
+ // is genuinely serving reads LIVE instead of being mapped null→`inactive`
557
+ // (which falsely told fresh-box operators a working install was broken). Keyed
558
+ // by the unique `entry.name`; probed concurrently, bounded, never throws.
559
+ const probeAlive = new Map<string, boolean>();
560
+ if (hubHealthy && !states) {
561
+ await Promise.all(
562
+ manifest.services.map(async (entry) => {
563
+ try {
564
+ const alive = await sup.probeModuleHealth(entry.port, entry.health);
565
+ if (alive) probeAlive.set(entry.name, true);
566
+ } catch {
567
+ // Probe must never crash status — absent from the map = treated as down.
568
+ }
569
+ }),
570
+ );
571
+ }
572
+
503
573
  const rows: StatusRow[] = manifest.services.map((entry) => {
504
574
  const base = manifestRowBase(entry, installSourceDeps);
505
575
  const snap = base.short ? stateByShort.get(base.short) : undefined;
@@ -526,6 +596,39 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
526
596
  };
527
597
  }
528
598
 
599
+ // Degraded read, but the module answered an unauthenticated `/health` probe
600
+ // (#700): show it LIVE instead of null→`inactive`. We can't surface PID/
601
+ // uptime/structured run-state (those need the operator token), so keep the
602
+ // degraded `moduleReadNote` AND add a probe-derived continuation note so the
603
+ // operator understands the row is from a liveness probe, not full supervisor
604
+ // state. `skipped: true` keeps a working install at exit 0.
605
+ if (!snap && probeAlive.get(entry.name)) {
606
+ const row: StatusRow = {
607
+ service: entry.name,
608
+ port: String(entry.port),
609
+ version: entry.version,
610
+ stateLabel: "active",
611
+ pidLabel: "-",
612
+ uptimeLabel: "-",
613
+ healthDetail: "-",
614
+ latencyLabel: "-",
615
+ sourceLabel: base.sourceLabel,
616
+ url: base.url,
617
+ healthy: true,
618
+ skipped: true,
619
+ };
620
+ row.probeNote = "live via unauthenticated health probe — sign in for full supervisor state";
621
+ if (base.driftWarning) row.driftWarning = base.driftWarning;
622
+ if (base.staleNote) row.staleNote = base.staleNote;
623
+ if (base.manifestStartErrorNote) row.startErrorNote = base.manifestStartErrorNote;
624
+ // Surface the degraded-read note ONCE (first module row), same as below.
625
+ if (moduleReadNote) {
626
+ row.managerNote = moduleReadNote;
627
+ moduleReadNote = undefined;
628
+ }
629
+ return row;
630
+ }
631
+
529
632
  const { stateLabel, healthy, skipped } = mapSupervisorStatus(snap?.supervisor_status ?? null);
530
633
  // Prefer the supervisor's structured start-error (live), else the persisted
531
634
  // services.json note — same friendly surface either way (#188).