@openparachute/hub 0.7.6-rc.1 → 0.7.6-rc.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/doctor.test.ts +131 -0
- package/src/__tests__/hub-instance.test.ts +297 -0
- package/src/__tests__/hub-server.test.ts +169 -0
- package/src/__tests__/status-supervisor.test.ts +112 -0
- package/src/commands/doctor.ts +167 -4
- package/src/commands/serve.ts +52 -0
- package/src/commands/status.ts +42 -1
- package/src/hub-instance.ts +365 -0
- package/src/hub-server.ts +89 -1
|
@@ -3,6 +3,7 @@ import { mkdtempSync, rmSync } from "node:fs";
|
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { status } from "../commands/status.ts";
|
|
6
|
+
import type { SelfProbeState } from "../hub-instance.ts";
|
|
6
7
|
import type { HubUnitDeps, HubUnitStateResult } from "../hub-unit.ts";
|
|
7
8
|
import {
|
|
8
9
|
type ModuleStatesResult,
|
|
@@ -86,6 +87,12 @@ interface SupervisorArmOpts {
|
|
|
86
87
|
* network; specific tests override to mark a module live.
|
|
87
88
|
*/
|
|
88
89
|
probeModuleHealth?: (port: number, health: string) => Promise<boolean>;
|
|
90
|
+
/**
|
|
91
|
+
* Loopback-hijack self-probe verdict read off `hub-instance.json` (hub#737).
|
|
92
|
+
* Defaults to "no verdict on disk" (undefined) so existing tests are
|
|
93
|
+
* unaffected; the hijack tests inject a `hijacked` / `ok` verdict.
|
|
94
|
+
*/
|
|
95
|
+
readInstanceState?: (configDir: string) => SelfProbeState | undefined;
|
|
89
96
|
}
|
|
90
97
|
|
|
91
98
|
/** Drive `status` through the supervisor arm with fully stubbed seams. */
|
|
@@ -104,6 +111,7 @@ function supervisorOpts(configDir: string, path: string, o: SupervisorArmOpts) {
|
|
|
104
111
|
(async () => o.moduleStates ?? { supervisorAvailable: true, modules: [] }),
|
|
105
112
|
probeModuleHealth: o.probeModuleHealth ?? (async () => false),
|
|
106
113
|
openDb: fakeOpenDb as unknown as (configDir: string) => import("bun:sqlite").Database,
|
|
114
|
+
readInstanceState: o.readInstanceState ?? (() => undefined),
|
|
107
115
|
},
|
|
108
116
|
};
|
|
109
117
|
}
|
|
@@ -651,3 +659,107 @@ describe("status — Phase 3c supervisor arm: module rows", () => {
|
|
|
651
659
|
// manager + supervisor. The supervisor-path readout is exercised throughout the
|
|
652
660
|
// suites above; a box with no hub unit degrades gracefully (manager `no-unit` /
|
|
653
661
|
// `/health` down → inactive rows), which the hub-row + module-row suites cover.
|
|
662
|
+
|
|
663
|
+
describe("status — loopback-hijack override (hub#737)", () => {
|
|
664
|
+
test("selfProbe hijacked flips the hub row to failing despite a healthy /health", async () => {
|
|
665
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
666
|
+
try {
|
|
667
|
+
const lines: string[] = [];
|
|
668
|
+
const opts = supervisorOpts(configDir, path, {
|
|
669
|
+
// The rogue answers /health 200, so the raw liveness probe says healthy —
|
|
670
|
+
// the on-disk self-probe verdict is what corrects the row.
|
|
671
|
+
managerState: { state: "active" },
|
|
672
|
+
hubHealthy: true,
|
|
673
|
+
moduleStates: { supervisorAvailable: true, modules: [] },
|
|
674
|
+
readInstanceState: () => ({
|
|
675
|
+
status: "hijacked",
|
|
676
|
+
checkedAt: "2026-07-02T00:00:00.000Z",
|
|
677
|
+
observedInstance: "rogue-hub",
|
|
678
|
+
}),
|
|
679
|
+
});
|
|
680
|
+
const code = await status({ ...opts, print: (l) => lines.push(l) });
|
|
681
|
+
expect(code).toBe(1);
|
|
682
|
+
const out = lines.join("\n");
|
|
683
|
+
const hubLine = lines.find((l) => l.includes("parachute-hub (internal)"));
|
|
684
|
+
expect(hubLine).toMatch(/\bfailing\b/);
|
|
685
|
+
expect(out).toContain("LOOPBACK HIJACK on :1939");
|
|
686
|
+
expect(out).toMatch(/lsof -nP -iTCP:1939 -sTCP:LISTEN/);
|
|
687
|
+
} finally {
|
|
688
|
+
cleanup();
|
|
689
|
+
}
|
|
690
|
+
});
|
|
691
|
+
|
|
692
|
+
test("hub down + STALE hijacked verdict on disk → NO phantom hijack, normal down-hub row", async () => {
|
|
693
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
694
|
+
try {
|
|
695
|
+
const lines: string[] = [];
|
|
696
|
+
// A hard-killed hub can leave a stale `hijacked` verdict in hub-instance.json
|
|
697
|
+
// (it's only cleared on a graceful stop). With nothing answering loopback
|
|
698
|
+
// (hubHealthy=false), status must render the ordinary down-hub row, not a
|
|
699
|
+
// phantom LOOPBACK HIJACK warning.
|
|
700
|
+
const opts = supervisorOpts(configDir, path, {
|
|
701
|
+
managerState: { state: "inactive" },
|
|
702
|
+
hubHealthy: false,
|
|
703
|
+
readInstanceState: () => ({
|
|
704
|
+
status: "hijacked",
|
|
705
|
+
checkedAt: "2026-07-02T00:00:00.000Z",
|
|
706
|
+
observedInstance: "rogue-from-a-past-run",
|
|
707
|
+
}),
|
|
708
|
+
});
|
|
709
|
+
const code = await status({ ...opts, print: (l) => lines.push(l) });
|
|
710
|
+
const out = lines.join("\n");
|
|
711
|
+
expect(out).not.toContain("LOOPBACK HIJACK");
|
|
712
|
+
const hubLine = lines.find((l) => l.includes("parachute-hub (internal)"));
|
|
713
|
+
expect(hubLine).toMatch(/\binactive\b/);
|
|
714
|
+
// An inactive hub is `skipped` (expected-stopped), so exit 0 — the point is
|
|
715
|
+
// simply that no phantom hijack was injected on top of the normal row.
|
|
716
|
+
expect(code).toBe(0);
|
|
717
|
+
} finally {
|
|
718
|
+
cleanup();
|
|
719
|
+
}
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
test("selfProbe ok leaves a healthy hub row untouched (active)", async () => {
|
|
723
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
724
|
+
try {
|
|
725
|
+
const lines: string[] = [];
|
|
726
|
+
const opts = supervisorOpts(configDir, path, {
|
|
727
|
+
managerState: { state: "active" },
|
|
728
|
+
hubHealthy: true,
|
|
729
|
+
moduleStates: { supervisorAvailable: true, modules: [] },
|
|
730
|
+
readInstanceState: () => ({
|
|
731
|
+
status: "ok",
|
|
732
|
+
checkedAt: "2026-07-02T00:00:00.000Z",
|
|
733
|
+
}),
|
|
734
|
+
});
|
|
735
|
+
const code = await status({ ...opts, print: (l) => lines.push(l) });
|
|
736
|
+
expect(code).toBe(0);
|
|
737
|
+
const hubLine = lines.find((l) => l.includes("parachute-hub (internal)"));
|
|
738
|
+
expect(hubLine).toMatch(/\bactive\b/);
|
|
739
|
+
expect(lines.join("\n")).not.toContain("LOOPBACK HIJACK");
|
|
740
|
+
} finally {
|
|
741
|
+
cleanup();
|
|
742
|
+
}
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
test("no self-probe verdict on disk → no override (default read returns undefined)", async () => {
|
|
746
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
747
|
+
try {
|
|
748
|
+
const lines: string[] = [];
|
|
749
|
+
// No readInstanceState override + no file on disk → the default reader
|
|
750
|
+
// returns undefined and the row is unchanged.
|
|
751
|
+
const code = await status({
|
|
752
|
+
...supervisorOpts(configDir, path, {
|
|
753
|
+
managerState: { state: "active" },
|
|
754
|
+
hubHealthy: true,
|
|
755
|
+
moduleStates: { supervisorAvailable: true, modules: [] },
|
|
756
|
+
}),
|
|
757
|
+
print: (l) => lines.push(l),
|
|
758
|
+
});
|
|
759
|
+
expect(code).toBe(0);
|
|
760
|
+
expect(lines.join("\n")).not.toContain("LOOPBACK HIJACK");
|
|
761
|
+
} finally {
|
|
762
|
+
cleanup();
|
|
763
|
+
}
|
|
764
|
+
});
|
|
765
|
+
});
|
package/src/commands/doctor.ts
CHANGED
|
@@ -53,6 +53,13 @@ import { decodeJwt } from "jose";
|
|
|
53
53
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
54
54
|
import { type ExposeState, readExposeState } from "../expose-state.ts";
|
|
55
55
|
import { HUB_SVC, readHubPort } from "../hub-control.ts";
|
|
56
|
+
import {
|
|
57
|
+
HIJACK_INCIDENT_REF,
|
|
58
|
+
type HubInstanceRecord,
|
|
59
|
+
type LoopbackProbe,
|
|
60
|
+
probeLoopbackInstance,
|
|
61
|
+
readHubInstanceFile,
|
|
62
|
+
} from "../hub-instance.ts";
|
|
56
63
|
import {
|
|
57
64
|
HUB_UNIT_DEFAULT_PORT,
|
|
58
65
|
type HubUnitDeps,
|
|
@@ -157,6 +164,25 @@ export interface DoctorDeps {
|
|
|
157
164
|
* readline; tests inject a canned answer.
|
|
158
165
|
*/
|
|
159
166
|
readLine?: (prompt: string) => Promise<string>;
|
|
167
|
+
/**
|
|
168
|
+
* Loopback-hijack check (hub#737): read THIS hub's on-disk identity
|
|
169
|
+
* (`hub-instance.json`, written by the running `serve`). Default
|
|
170
|
+
* {@link readHubInstanceFile}; tests inject a fixture record (or null).
|
|
171
|
+
*/
|
|
172
|
+
readInstanceRecord?: (configDir: string) => HubInstanceRecord | null;
|
|
173
|
+
/**
|
|
174
|
+
* Loopback-hijack check: probe `127.0.0.1:<port>/health` and read its
|
|
175
|
+
* `instance`. Default {@link probeLoopbackInstance}; tests inject the
|
|
176
|
+
* matched / mismatched / unreachable outcomes.
|
|
177
|
+
*/
|
|
178
|
+
probeLoopbackInstance?: (port: number) => Promise<LoopbackProbe>;
|
|
179
|
+
/**
|
|
180
|
+
* Loopback-hijack check: count LISTEN sockets on the hub port (a second
|
|
181
|
+
* listener is the OrbStack-shadow fingerprint). Default shells `lsof`;
|
|
182
|
+
* returns `undefined` when it can't determine a count (lsof absent / errored)
|
|
183
|
+
* so the check degrades to the instance comparison alone. Tests inject a count.
|
|
184
|
+
*/
|
|
185
|
+
countHubListeners?: (port: number) => number | undefined;
|
|
160
186
|
}
|
|
161
187
|
|
|
162
188
|
export interface DoctorOpts {
|
|
@@ -217,6 +243,33 @@ async function defaultProbePublicHealth(origin: string): Promise<boolean> {
|
|
|
217
243
|
}
|
|
218
244
|
}
|
|
219
245
|
|
|
246
|
+
/**
|
|
247
|
+
* Count LISTEN sockets on `port` via `lsof`. A hijack shows TWO (this hub's
|
|
248
|
+
* wildcard bind + the shadowing process's specific loopback bind). Bounded +
|
|
249
|
+
* best-effort: returns `undefined` on any failure (lsof absent, non-zero exit,
|
|
250
|
+
* unparseable) so the check degrades to the instance-comparison signal alone
|
|
251
|
+
* rather than false-flagging. Counts DISTINCT pids across the LISTEN rows.
|
|
252
|
+
*/
|
|
253
|
+
function defaultCountHubListeners(port: number): number | undefined {
|
|
254
|
+
try {
|
|
255
|
+
const proc = Bun.spawnSync(["lsof", "-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-FpP"], {
|
|
256
|
+
stdout: "pipe",
|
|
257
|
+
stderr: "ignore",
|
|
258
|
+
});
|
|
259
|
+
// lsof exits non-zero when there are zero matches — that's a real "0", not
|
|
260
|
+
// an error. Only treat a missing binary (spawn failure) as indeterminate.
|
|
261
|
+
if (proc.exitCode !== 0 && (proc.stdout?.length ?? 0) === 0) return 0;
|
|
262
|
+
const text = new TextDecoder().decode(proc.stdout ?? new Uint8Array());
|
|
263
|
+
const pids = new Set<string>();
|
|
264
|
+
for (const line of text.split("\n")) {
|
|
265
|
+
if (line.startsWith("p")) pids.add(line.slice(1));
|
|
266
|
+
}
|
|
267
|
+
return pids.size;
|
|
268
|
+
} catch {
|
|
269
|
+
return undefined;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
220
273
|
/** Both ends of the pipe must be a TTY for an interactive confirm to make sense. */
|
|
221
274
|
function defaultIsInteractive(): boolean {
|
|
222
275
|
return Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
@@ -243,6 +296,9 @@ interface ResolvedDeps {
|
|
|
243
296
|
now: () => Date;
|
|
244
297
|
isInteractive: () => boolean;
|
|
245
298
|
readLine: (prompt: string) => Promise<string>;
|
|
299
|
+
readInstanceRecord: (configDir: string) => HubInstanceRecord | null;
|
|
300
|
+
probeLoopbackInstance: (port: number) => Promise<LoopbackProbe>;
|
|
301
|
+
countHubListeners: (port: number) => number | undefined;
|
|
246
302
|
}
|
|
247
303
|
|
|
248
304
|
function resolveDeps(d: DoctorDeps | undefined): ResolvedDeps {
|
|
@@ -257,6 +313,9 @@ function resolveDeps(d: DoctorDeps | undefined): ResolvedDeps {
|
|
|
257
313
|
now: d?.now ?? (() => new Date()),
|
|
258
314
|
isInteractive: d?.isInteractive ?? defaultIsInteractive,
|
|
259
315
|
readLine: d?.readLine ?? defaultReadLine,
|
|
316
|
+
readInstanceRecord: d?.readInstanceRecord ?? readHubInstanceFile,
|
|
317
|
+
probeLoopbackInstance: d?.probeLoopbackInstance ?? probeLoopbackInstance,
|
|
318
|
+
countHubListeners: d?.countHubListeners ?? defaultCountHubListeners,
|
|
260
319
|
};
|
|
261
320
|
}
|
|
262
321
|
|
|
@@ -337,6 +396,110 @@ async function checkHubReachable(configDir: string, deps: ResolvedDeps): Promise
|
|
|
337
396
|
};
|
|
338
397
|
}
|
|
339
398
|
|
|
399
|
+
/**
|
|
400
|
+
* Loopback-hijack detection (hub#737) — the 2026-07-02 P0's root trigger. This
|
|
401
|
+
* hub binds `*:<port>` (wildcard); a foreign process that grabs a SPECIFIC
|
|
402
|
+
* `127.0.0.1:<port>` bind (classically an OrbStack VM auto-forwarding the port)
|
|
403
|
+
* WINS all loopback traffic, so every module's JWKS/API call silently reaches
|
|
404
|
+
* the wrong hub. Detection compares THIS hub's on-disk identity nonce
|
|
405
|
+
* (`hub-instance.json`, written by `serve`) to what a loopback `/health`
|
|
406
|
+
* actually returns:
|
|
407
|
+
* - no instance file → the running hub predates nonce detection, or isn't
|
|
408
|
+
* running under `serve` (the Hub check owns "down") → PASS (benign info,
|
|
409
|
+
* never a false FAIL per #717).
|
|
410
|
+
* - loopback not answering → defer to the Hub check → PASS (info).
|
|
411
|
+
* - loopback nonce === ours → loopback reaches THIS hub. A second LISTEN on
|
|
412
|
+
* the port (lsof) is a latent shadow → WARN; a single listener → PASS.
|
|
413
|
+
* - loopback nonce ≠ ours (or missing) → ACTIVE HIJACK → FAIL with the exact
|
|
414
|
+
* lsof/orb remediation + the incident reference. Detect-only (no `--fix`).
|
|
415
|
+
* Never throws — every read is bounded + degrades to a benign verdict.
|
|
416
|
+
*/
|
|
417
|
+
async function checkLoopbackHijack(configDir: string, deps: ResolvedDeps): Promise<CheckResult> {
|
|
418
|
+
const port = readHubPort(configDir) ?? HUB_UNIT_DEFAULT_PORT;
|
|
419
|
+
const title = `No loopback hijack on :${port}`;
|
|
420
|
+
|
|
421
|
+
let record: HubInstanceRecord | null = null;
|
|
422
|
+
try {
|
|
423
|
+
record = deps.readInstanceRecord(configDir);
|
|
424
|
+
} catch {
|
|
425
|
+
record = null;
|
|
426
|
+
}
|
|
427
|
+
if (!record) {
|
|
428
|
+
return {
|
|
429
|
+
name: "loopback-hijack",
|
|
430
|
+
title,
|
|
431
|
+
status: "pass",
|
|
432
|
+
detail:
|
|
433
|
+
"no hub-instance.json — the running hub predates loopback-nonce detection or isn't running under `parachute serve` (see the Hub check)",
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
let probe: LoopbackProbe;
|
|
438
|
+
try {
|
|
439
|
+
probe = await deps.probeLoopbackInstance(port);
|
|
440
|
+
} catch {
|
|
441
|
+
probe = { reachable: false };
|
|
442
|
+
}
|
|
443
|
+
if (!probe.reachable) {
|
|
444
|
+
return {
|
|
445
|
+
name: "loopback-hijack",
|
|
446
|
+
title,
|
|
447
|
+
status: "pass",
|
|
448
|
+
detail: `loopback /health on 127.0.0.1:${port} didn't answer — nothing to compare (the Hub check covers a down hub)`,
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Reachable but a DIFFERENT identity answers → active hijack.
|
|
453
|
+
if (probe.instance !== record.instance) {
|
|
454
|
+
let listeners: number | undefined;
|
|
455
|
+
try {
|
|
456
|
+
listeners = deps.countHubListeners(port);
|
|
457
|
+
} catch {
|
|
458
|
+
listeners = undefined;
|
|
459
|
+
}
|
|
460
|
+
const who = probe.instance
|
|
461
|
+
? `a different hub (instance ${probe.instance})`
|
|
462
|
+
: "a foreign process (its /health carries no hub instance nonce)";
|
|
463
|
+
const listenerNote =
|
|
464
|
+
listeners !== undefined && listeners > 1
|
|
465
|
+
? ` lsof shows ${listeners} listeners on the port.`
|
|
466
|
+
: "";
|
|
467
|
+
return {
|
|
468
|
+
name: "loopback-hijack",
|
|
469
|
+
title,
|
|
470
|
+
status: "fail",
|
|
471
|
+
detail: `loopback 127.0.0.1:${port} is answered by ${who}, NOT this hub (instance ${record.instance}) — module JWKS/API calls are reaching the wrong hub.${listenerNote} Incident: ${HIJACK_INCIDENT_REF}`,
|
|
472
|
+
fix: `lsof -nP -iTCP:${port} -sTCP:LISTEN # find the shadow; then \`orb list\` and stop any VM auto-forwarding ${port}`,
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Loopback reaches us. A second listener is a latent shadow that could win the
|
|
477
|
+
// next reboot — WARN so the operator clears it before it flips to a FAIL.
|
|
478
|
+
let listeners: number | undefined;
|
|
479
|
+
try {
|
|
480
|
+
listeners = deps.countHubListeners(port);
|
|
481
|
+
} catch {
|
|
482
|
+
listeners = undefined;
|
|
483
|
+
}
|
|
484
|
+
if (listeners !== undefined && listeners > 1) {
|
|
485
|
+
return {
|
|
486
|
+
name: "loopback-hijack",
|
|
487
|
+
title,
|
|
488
|
+
status: "warn",
|
|
489
|
+
detail: `loopback reaches this hub, but lsof shows ${listeners} listeners on :${port} — a second bind is a latent shadow that could win loopback after a restart`,
|
|
490
|
+
fix: `lsof -nP -iTCP:${port} -sTCP:LISTEN # identify + stop the extra listener (e.g. \`orb list\`)`,
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
return {
|
|
494
|
+
name: "loopback-hijack",
|
|
495
|
+
title,
|
|
496
|
+
status: "pass",
|
|
497
|
+
detail: `loopback 127.0.0.1:${port}/health returns this hub's instance nonce${
|
|
498
|
+
listeners === 1 ? " (single listener)" : ""
|
|
499
|
+
}`,
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
340
503
|
/**
|
|
341
504
|
* Each CONFIGURED module alive via its own loopback `/health` (2xx OR 401).
|
|
342
505
|
* Only modules present in services.json are checked — an absent module is
|
|
@@ -1017,7 +1180,8 @@ async function runChecks(
|
|
|
1017
1180
|
const hub = await checkHubReachable(configDir, deps);
|
|
1018
1181
|
const hubHealthy = hub.status === "pass";
|
|
1019
1182
|
|
|
1020
|
-
const [modules, bins, exposure] = await Promise.all([
|
|
1183
|
+
const [hijack, modules, bins, exposure] = await Promise.all([
|
|
1184
|
+
checkLoopbackHijack(configDir, deps),
|
|
1021
1185
|
checkModulesAlive(manifest, hubHealthy, deps),
|
|
1022
1186
|
checkModuleBins(manifest, deps),
|
|
1023
1187
|
checkExposure(configDir, deps),
|
|
@@ -1032,7 +1196,7 @@ async function runChecks(
|
|
|
1032
1196
|
const add = (group: Group, checks: CheckResult[]) => {
|
|
1033
1197
|
for (const c of checks) grouped.push({ ...c, group });
|
|
1034
1198
|
};
|
|
1035
|
-
add("Hub", [hub]);
|
|
1199
|
+
add("Hub", [hub, hijack]);
|
|
1036
1200
|
add("Modules", [...modules, ...bins]);
|
|
1037
1201
|
add("Configuration", [manifestCheck, portDrift, operator]);
|
|
1038
1202
|
add("Migration", migration);
|
|
@@ -1208,8 +1372,7 @@ async function fixPortDrift(
|
|
|
1208
1372
|
const canonicalByName = new Map(drifted.map((d) => [d.name, d.canonical]));
|
|
1209
1373
|
const next = {
|
|
1210
1374
|
services: parsed.services.map((row) => {
|
|
1211
|
-
const canonical =
|
|
1212
|
-
typeof row.name === "string" ? canonicalByName.get(row.name) : undefined;
|
|
1375
|
+
const canonical = typeof row.name === "string" ? canonicalByName.get(row.name) : undefined;
|
|
1213
1376
|
return canonical === undefined ? row : { ...row, port: canonical };
|
|
1214
1377
|
}),
|
|
1215
1378
|
};
|
package/src/commands/serve.ts
CHANGED
|
@@ -36,6 +36,14 @@ import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
|
36
36
|
import { readExposeState } from "../expose-state.ts";
|
|
37
37
|
import { createDbHolder, defaultStatInode, startDbPathLivenessTimer } from "../hub-db-liveness.ts";
|
|
38
38
|
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
39
|
+
import {
|
|
40
|
+
type HubInstanceRecord,
|
|
41
|
+
type HubSelfProbe,
|
|
42
|
+
armHubSelfProbe,
|
|
43
|
+
clearHubInstanceFile,
|
|
44
|
+
generateInstanceNonce,
|
|
45
|
+
writeHubInstanceFile,
|
|
46
|
+
} from "../hub-instance.ts";
|
|
39
47
|
import { hubFetch } from "../hub-server.ts";
|
|
40
48
|
import { getHubOrigin } from "../hub-settings.ts";
|
|
41
49
|
import { writeHubFile } from "../hub.ts";
|
|
@@ -513,6 +521,13 @@ export async function serve(opts: ServeOpts = {}): Promise<{
|
|
|
513
521
|
|
|
514
522
|
const supervisor = opts.supervisor ?? new Supervisor();
|
|
515
523
|
|
|
524
|
+
// Per-boot instance nonce (hub#737). Minted BEFORE the listener so it can be
|
|
525
|
+
// threaded into `/health`; written to disk AFTER a successful bind (below) so
|
|
526
|
+
// a bind failure never leaves a stale identity file. It's the linchpin of
|
|
527
|
+
// loopback-hijack detection: `/health` echoes it, and external tools compare
|
|
528
|
+
// the disk copy to what a loopback `/health` actually returns.
|
|
529
|
+
const instanceNonce = generateInstanceNonce();
|
|
530
|
+
|
|
516
531
|
// Claim the hub port FIRST — before booting a single supervised module. If
|
|
517
532
|
// another hub/supervisor already owns it, `Bun.serve` throws here and we
|
|
518
533
|
// exit immediately. The prior order (boot modules, *then* bind) let a
|
|
@@ -534,6 +549,7 @@ export async function serve(opts: ServeOpts = {}): Promise<{
|
|
|
534
549
|
probeDbPath: () => dbHolder.probePath(),
|
|
535
550
|
issuer,
|
|
536
551
|
loopbackPort: port,
|
|
552
|
+
instanceNonce,
|
|
537
553
|
supervisor,
|
|
538
554
|
}),
|
|
539
555
|
}),
|
|
@@ -544,6 +560,38 @@ export async function serve(opts: ServeOpts = {}): Promise<{
|
|
|
544
560
|
throw err;
|
|
545
561
|
}
|
|
546
562
|
|
|
563
|
+
// We own the listener now — record this process's identity on disk (0644) so
|
|
564
|
+
// `parachute status` / `parachute doctor` can detect a loopback hijack by
|
|
565
|
+
// comparing this nonce to what a loopback `/health` returns. Best-effort:
|
|
566
|
+
// a write failure only degrades external detection, never blocks the hub.
|
|
567
|
+
const instanceRecord: HubInstanceRecord = {
|
|
568
|
+
instance: instanceNonce,
|
|
569
|
+
pid: process.pid,
|
|
570
|
+
port,
|
|
571
|
+
startedAt: new Date().toISOString(),
|
|
572
|
+
};
|
|
573
|
+
writeHubInstanceFile(instanceRecord, { configDir: CONFIG_DIR, log });
|
|
574
|
+
|
|
575
|
+
// Arm the loopback self-probe (hub#737): an immediate check right after the
|
|
576
|
+
// bind catches a hijack that's ALREADY present at boot (the OrbStack VM that
|
|
577
|
+
// relaunched at reboot and grabbed 127.0.0.1:<port> before us), then a
|
|
578
|
+
// low-frequency re-probe catches one that appears later. It logs loudly on a
|
|
579
|
+
// mismatch and records the verdict into hub-instance.json for external tools.
|
|
580
|
+
// Skipped alongside module boot in tests (`skipModuleBoot`) so the test path
|
|
581
|
+
// never spawns a real 5-minute timer / loopback fetch.
|
|
582
|
+
let selfProbe: HubSelfProbe | undefined;
|
|
583
|
+
if (!opts.skipModuleBoot) {
|
|
584
|
+
selfProbe = armHubSelfProbe(
|
|
585
|
+
{ port, nonce: instanceNonce, record: instanceRecord, configDir: CONFIG_DIR },
|
|
586
|
+
{ log },
|
|
587
|
+
);
|
|
588
|
+
// Fire the startup check without blocking serve's return — a hijack present
|
|
589
|
+
// at boot surfaces within the probe's bounded timeout, not on the next tick.
|
|
590
|
+
// `probeOnce` is non-throwing in production, but guard the floating promise
|
|
591
|
+
// against ever surfacing as an unhandled rejection.
|
|
592
|
+
void selfProbe.probeOnce().catch(() => {});
|
|
593
|
+
}
|
|
594
|
+
|
|
547
595
|
log(
|
|
548
596
|
formatListeningBanner({
|
|
549
597
|
hostname,
|
|
@@ -618,8 +666,12 @@ export async function serve(opts: ServeOpts = {}): Promise<{
|
|
|
618
666
|
for (const state of supervisor.list()) {
|
|
619
667
|
await supervisor.stop(state.short);
|
|
620
668
|
}
|
|
669
|
+
selfProbe?.stop();
|
|
621
670
|
livenessTimer.stop();
|
|
622
671
|
await server.stop();
|
|
672
|
+
// Clear our on-disk identity so a cleanly-stopped hub leaves no stale
|
|
673
|
+
// self-probe verdict for `status` / `doctor` to read (hub#737 review).
|
|
674
|
+
clearHubInstanceFile(CONFIG_DIR);
|
|
623
675
|
dbHolder.get().close();
|
|
624
676
|
},
|
|
625
677
|
};
|
package/src/commands/status.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { Database } from "bun:sqlite";
|
|
|
2
2
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
3
3
|
import { readHubPort } from "../hub-control.ts";
|
|
4
4
|
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
5
|
+
import { type SelfProbeState, readHubInstanceFile } from "../hub-instance.ts";
|
|
5
6
|
import {
|
|
6
7
|
HUB_UNIT_DEFAULT_PORT,
|
|
7
8
|
type HubUnitDeps,
|
|
@@ -90,6 +91,16 @@ export interface StatusOpts {
|
|
|
90
91
|
openDb?: (configDir: string) => Database;
|
|
91
92
|
/** Loopback hub base URL override (default derives from the hub port). */
|
|
92
93
|
baseUrl?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Read the running serve process's last loopback self-probe verdict from
|
|
96
|
+
* `hub-instance.json` (hub#737). Read from DISK, not over loopback — during
|
|
97
|
+
* a hijack the loopback /health (and the module-ops API) reach the WRONG
|
|
98
|
+
* hub, so the on-disk verdict the real serve wrote is the only trustworthy
|
|
99
|
+
* source. A `hijacked` verdict overrides the hub row (which would otherwise
|
|
100
|
+
* read `active` off the rogue's 200). Default {@link readHubInstanceFile}'s
|
|
101
|
+
* `selfProbe`; tests inject a state (or undefined).
|
|
102
|
+
*/
|
|
103
|
+
readInstanceState?: (configDir: string) => SelfProbeState | undefined;
|
|
93
104
|
};
|
|
94
105
|
}
|
|
95
106
|
|
|
@@ -386,6 +397,7 @@ interface ResolvedStatusSupervisor {
|
|
|
386
397
|
probeModuleHealth: (port: number, health: string) => Promise<boolean>;
|
|
387
398
|
openDb: (configDir: string) => Database;
|
|
388
399
|
baseUrl: string | undefined;
|
|
400
|
+
readInstanceState: (configDir: string) => SelfProbeState | undefined;
|
|
389
401
|
}
|
|
390
402
|
|
|
391
403
|
/**
|
|
@@ -402,6 +414,8 @@ function resolveStatusSupervisor(opts: StatusOpts["supervisor"]): ResolvedStatus
|
|
|
402
414
|
probeModuleHealth: opts?.probeModuleHealth ?? defaultProbeModuleHealth,
|
|
403
415
|
openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
|
|
404
416
|
baseUrl: opts?.baseUrl,
|
|
417
|
+
readInstanceState:
|
|
418
|
+
opts?.readInstanceState ?? ((configDir) => readHubInstanceFile(configDir)?.selfProbe),
|
|
405
419
|
};
|
|
406
420
|
}
|
|
407
421
|
|
|
@@ -671,8 +685,35 @@ async function buildSupervisorRows(args: BuildSupervisorRowsArgs): Promise<Statu
|
|
|
671
685
|
port,
|
|
672
686
|
hubHealthy,
|
|
673
687
|
});
|
|
688
|
+
// Loopback-hijack override (hub#737). During a hijack the loopback `/health`
|
|
689
|
+
// the hub row's liveness probe hit belongs to the ROGUE hub (a 200 → the row
|
|
690
|
+
// reads `active`), so trust the running serve's own on-disk self-probe verdict
|
|
691
|
+
// instead — read from disk, never over the hijacked loopback. A `hijacked`
|
|
692
|
+
// verdict flips the row to `failing` with the loud, actionable note.
|
|
693
|
+
//
|
|
694
|
+
// GATED ON `hubHealthy` (review fix): the instance file is written per-boot
|
|
695
|
+
// and only cleared on a *graceful* stop, so a hard-killed hub can leave a
|
|
696
|
+
// stale `hijacked` verdict on disk. `hubHealthy` is true exactly when
|
|
697
|
+
// SOMETHING is answering the loopback port right now — which is precisely the
|
|
698
|
+
// live-hijack condition (the rogue keeps answering 200), so gating here never
|
|
699
|
+
// suppresses a real hijack, but it does keep a stopped hub (nothing answering)
|
|
700
|
+
// from rendering a phantom hijack over its normal down-hub row.
|
|
701
|
+
let selfProbe: SelfProbeState | undefined;
|
|
702
|
+
try {
|
|
703
|
+
selfProbe = sup.readInstanceState(configDir);
|
|
704
|
+
} catch {
|
|
705
|
+
selfProbe = undefined;
|
|
706
|
+
}
|
|
707
|
+
if (hubHealthy && selfProbe?.status === "hijacked") {
|
|
708
|
+
hub.stateLabel = "failing";
|
|
709
|
+
hub.healthy = false;
|
|
710
|
+
hub.skipped = false;
|
|
711
|
+
hub.healthDetail = "loopback hijacked — /health answered by a foreign process";
|
|
712
|
+
hub.managerNote = `LOOPBACK HIJACK on :${port} — module JWKS/API calls are NOT reaching this hub. Run \`parachute doctor\` and \`lsof -nP -iTCP:${port} -sTCP:LISTEN\`.`;
|
|
713
|
+
}
|
|
674
714
|
// If the degraded-read note never landed on a module row (empty manifest),
|
|
675
|
-
// surface it on the hub row so the operator still sees the actionable hint
|
|
715
|
+
// surface it on the hub row so the operator still sees the actionable hint —
|
|
716
|
+
// unless the hijack note already claimed it (the hijack is the bigger signal).
|
|
676
717
|
if (moduleReadNote && !hub.managerNote) hub.managerNote = moduleReadNote;
|
|
677
718
|
rows.push(hub);
|
|
678
719
|
return rows;
|