@openparachute/hub 0.7.5 → 0.7.6-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/package.json +1 -1
  2. package/src/__tests__/admin-module-token.test.ts +40 -3
  3. package/src/__tests__/api-modules-ops.test.ts +8 -3
  4. package/src/__tests__/api-modules.test.ts +26 -18
  5. package/src/__tests__/connections-store.test.ts +84 -0
  6. package/src/__tests__/doctor.test.ts +131 -0
  7. package/src/__tests__/git-notify.test.ts +29 -1
  8. package/src/__tests__/grants-store.test.ts +33 -1
  9. package/src/__tests__/hub-instance.test.ts +297 -0
  10. package/src/__tests__/hub-server.test.ts +169 -0
  11. package/src/__tests__/install.test.ts +28 -0
  12. package/src/__tests__/serve-boot.test.ts +60 -0
  13. package/src/__tests__/service-spec-discovery.test.ts +32 -9
  14. package/src/__tests__/setup.test.ts +64 -16
  15. package/src/__tests__/stale-module-units.test.ts +1 -1
  16. package/src/__tests__/status-supervisor.test.ts +112 -0
  17. package/src/admin-connections.ts +5 -1
  18. package/src/admin-module-token.ts +2 -2
  19. package/src/api-modules-ops.ts +3 -3
  20. package/src/api-modules.ts +13 -13
  21. package/src/commands/doctor.ts +167 -4
  22. package/src/commands/install.ts +29 -3
  23. package/src/commands/migrate.ts +5 -0
  24. package/src/commands/serve.ts +52 -0
  25. package/src/commands/setup.ts +10 -9
  26. package/src/commands/status.ts +42 -1
  27. package/src/connections-store.ts +15 -2
  28. package/src/git-notify.ts +34 -5
  29. package/src/grants-store.ts +15 -2
  30. package/src/help.ts +3 -3
  31. package/src/hub-instance.ts +365 -0
  32. package/src/hub-server.ts +89 -1
  33. package/src/install-source.ts +1 -1
  34. package/src/service-spec.ts +36 -44
  35. package/src/services-manifest.ts +1 -1
  36. package/src/stale-module-units.ts +2 -2
  37. package/src/well-known.ts +3 -3
@@ -0,0 +1,365 @@
1
+ /**
2
+ * Hub instance identity + loopback-hijack detection (hub#737).
3
+ *
4
+ * ## The incident this defends against (2026-07-02 P0)
5
+ *
6
+ * The hub binds `*:1939` (INADDR_ANY / `0.0.0.0`). An OrbStack Linux machine
7
+ * auto-forwarded ITS port 1939 onto the host as a SPECIFIC bind on
8
+ * `127.0.0.1:1939` — and a specific loopback bind WINS over a wildcard bind for
9
+ * all loopback traffic. Every module's JWKS/API call to `127.0.0.1:1939`
10
+ * silently reached the WRONG hub (a fresh container DB → empty JWKS, no admin),
11
+ * so every hub-JWT validation failed `no applicable key found in the JWKS` and
12
+ * the ecosystem 401-looped for hours. `lsof -nP -i :1939` showed two LISTENs;
13
+ * the tell was `/health` reporting the container's version, not the checkout's.
14
+ *
15
+ * ## The primitive: a per-process instance nonce
16
+ *
17
+ * Each `parachute serve` process generates a random nonce at boot, (a) exposes
18
+ * it as `instance` in `/health`, and (b) writes it to
19
+ * `~/.parachute/hub-instance.json` (0644). That file is the linchpin: an
20
+ * EXTERNAL process (`parachute status`, `parachute doctor`) can learn THIS
21
+ * hub's true identity from disk WITHOUT traversing the (possibly hijacked)
22
+ * loopback — then compare it to what a loopback `GET /health` actually returns.
23
+ * A mismatch means another process owns `127.0.0.1:<port>`.
24
+ *
25
+ * The in-process self-probe (armed by `serve`) compares its own in-memory nonce
26
+ * to the loopback `/health` it fetches, logs loudly on mismatch, and records the
27
+ * verdict back into the same file's `selfProbe` field so external tools surface
28
+ * the serve process's own authoritative reading without re-probing.
29
+ *
30
+ * Every side effect (fs, network probe) is behind an injectable seam so the
31
+ * whole module runs deterministically in tests with no real network / disk.
32
+ */
33
+
34
+ import { randomUUID } from "node:crypto";
35
+ import { existsSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync } from "node:fs";
36
+ import { dirname, join } from "node:path";
37
+ import { CONFIG_DIR } from "./config.ts";
38
+
39
+ /** The public incident reference operators grep for. */
40
+ export const HIJACK_INCIDENT_REF = "hub#737 / team-vault Log/2026-07-02-port-exhaustion-incident";
41
+
42
+ /** Self-probe verdicts. `ok` = loopback reaches us; `hijacked` = someone else owns loopback. */
43
+ export type SelfProbeStatus = "ok" | "hijacked" | "unreachable";
44
+
45
+ /**
46
+ * The serve process's own most-recent loopback self-probe reading, persisted
47
+ * into the instance file so external readers (`status`) see the authoritative
48
+ * verdict without re-probing the (possibly hijacked) loopback themselves.
49
+ */
50
+ export interface SelfProbeState {
51
+ status: SelfProbeStatus;
52
+ /** ISO timestamp of the reading. */
53
+ checkedAt: string;
54
+ /** The `instance` the loopback `/health` actually returned (present on a `hijacked` reading). */
55
+ observedInstance?: string;
56
+ /** One-line human detail (loud message on a hijack; the probe error class on unreachable). */
57
+ detail?: string;
58
+ }
59
+
60
+ /** The `~/.parachute/hub-instance.json` record. */
61
+ export interface HubInstanceRecord {
62
+ /** Per-process random nonce (`crypto.randomUUID`) minted at serve boot. */
63
+ instance: string;
64
+ /** The serve process PID (informational — helps an operator map the file to a process). */
65
+ pid: number;
66
+ /** The port this serve bound. */
67
+ port: number;
68
+ /** ISO timestamp of serve boot. */
69
+ startedAt: string;
70
+ /** Last self-probe reading, patched in by the running serve process. */
71
+ selfProbe?: SelfProbeState;
72
+ }
73
+
74
+ /** Mint a fresh per-process nonce. */
75
+ export function generateInstanceNonce(): string {
76
+ return randomUUID();
77
+ }
78
+
79
+ /** Path to the instance file under a config dir (default `~/.parachute`). */
80
+ export function hubInstancePath(configDir: string = CONFIG_DIR): string {
81
+ return join(configDir, "hub-instance.json");
82
+ }
83
+
84
+ /**
85
+ * Atomically write the instance record (tmp + rename, 0644). Best-effort: a
86
+ * write failure must NEVER take the hub down — the file is a diagnostic aid, not
87
+ * a load-bearing runtime dependency. Returns true on success.
88
+ */
89
+ export function writeHubInstanceFile(
90
+ record: HubInstanceRecord,
91
+ opts: { configDir?: string; log?: (line: string) => void } = {},
92
+ ): boolean {
93
+ const path = hubInstancePath(opts.configDir);
94
+ const tmp = `${path}.tmp-${process.pid}-${Date.now()}`;
95
+ try {
96
+ const dir = dirname(path);
97
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
98
+ writeFileSync(tmp, `${JSON.stringify(record, null, 2)}\n`, { mode: 0o644 });
99
+ renameSync(tmp, path);
100
+ return true;
101
+ } catch (err) {
102
+ // Don't leave a half-written tmp behind if the rename (or write) failed.
103
+ try {
104
+ if (existsSync(tmp)) rmSync(tmp, { force: true });
105
+ } catch {
106
+ // Best-effort cleanup — nothing more we can do.
107
+ }
108
+ opts.log?.(
109
+ `parachute serve: could not write ${path} (${err instanceof Error ? err.message : String(err)}); loopback-hijack detection for external tools is degraded, hub start continues.`,
110
+ );
111
+ return false;
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Remove the instance file (best-effort). Called on graceful shutdown so a
117
+ * cleanly-stopped hub doesn't leave a stale identity/self-probe verdict on disk
118
+ * for `status` / `doctor` to read as a phantom. A hard kill (SIGKILL) can't run
119
+ * this — the readers additionally gate on live hub liveness, so a leftover file
120
+ * from a hard kill never surfaces as a false hijack.
121
+ */
122
+ export function clearHubInstanceFile(configDir: string = CONFIG_DIR): void {
123
+ try {
124
+ rmSync(hubInstancePath(configDir), { force: true });
125
+ } catch {
126
+ // Best-effort — a missing / unremovable file is not worth surfacing.
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Read + validate the instance file. Returns null on absence / unreadable /
132
+ * malformed — a missing file is the benign "no nonce-aware serve wrote one yet"
133
+ * state, never an error.
134
+ */
135
+ export function readHubInstanceFile(configDir: string = CONFIG_DIR): HubInstanceRecord | null {
136
+ const path = hubInstancePath(configDir);
137
+ let raw: unknown;
138
+ try {
139
+ raw = JSON.parse(readFileSync(path, "utf8"));
140
+ } catch {
141
+ return null;
142
+ }
143
+ if (!raw || typeof raw !== "object") return null;
144
+ const r = raw as Record<string, unknown>;
145
+ if (typeof r.instance !== "string" || r.instance.length === 0) return null;
146
+ if (typeof r.port !== "number") return null;
147
+ const rec: HubInstanceRecord = {
148
+ instance: r.instance,
149
+ pid: typeof r.pid === "number" ? r.pid : -1,
150
+ port: r.port,
151
+ startedAt: typeof r.startedAt === "string" ? r.startedAt : "",
152
+ };
153
+ const sp = r.selfProbe;
154
+ if (sp && typeof sp === "object") {
155
+ const s = sp as Record<string, unknown>;
156
+ if (s.status === "ok" || s.status === "hijacked" || s.status === "unreachable") {
157
+ const state: SelfProbeState = {
158
+ status: s.status,
159
+ checkedAt: typeof s.checkedAt === "string" ? s.checkedAt : "",
160
+ };
161
+ if (typeof s.observedInstance === "string") state.observedInstance = s.observedInstance;
162
+ if (typeof s.detail === "string") state.detail = s.detail;
163
+ rec.selfProbe = state;
164
+ }
165
+ }
166
+ return rec;
167
+ }
168
+
169
+ /** The result of probing a loopback `/health`. */
170
+ export interface LoopbackProbe {
171
+ /** The socket answered at all (any HTTP status). */
172
+ reachable: boolean;
173
+ /** HTTP status, when reachable. */
174
+ status?: number;
175
+ /** The `instance` field of the JSON body, when present + parseable. */
176
+ instance?: string;
177
+ /** True when the body self-identifies as a parachute hub (`service: "parachute-hub"`). */
178
+ isHub?: boolean;
179
+ }
180
+
181
+ /**
182
+ * Probe `http://127.0.0.1:<port>/health` and extract the instance identity.
183
+ * Bounded (default 1.5s); never throws — a network error is `{ reachable: false }`.
184
+ */
185
+ export async function probeLoopbackInstance(
186
+ port: number,
187
+ opts: { timeoutMs?: number; fetchFn?: typeof fetch } = {},
188
+ ): Promise<LoopbackProbe> {
189
+ const fetchFn = opts.fetchFn ?? fetch;
190
+ try {
191
+ const res = await fetchFn(`http://127.0.0.1:${port}/health`, {
192
+ signal: AbortSignal.timeout(opts.timeoutMs ?? 1500),
193
+ });
194
+ const out: LoopbackProbe = { reachable: true, status: res.status };
195
+ try {
196
+ const body = (await res.json()) as Record<string, unknown>;
197
+ if (typeof body.instance === "string") out.instance = body.instance;
198
+ if (body.service === "parachute-hub") out.isHub = true;
199
+ } catch {
200
+ // A non-JSON / unparseable body still counts as "reachable" — a foreign
201
+ // process answering the port with junk is exactly the hijack shape.
202
+ }
203
+ return out;
204
+ } catch {
205
+ return { reachable: false };
206
+ }
207
+ }
208
+
209
+ /**
210
+ * Classify a loopback probe against our TRUE nonce.
211
+ * - not reachable → `unreachable` (we bound, but loopback refused/timed out — suspicious but soft).
212
+ * - reachable, instance === ours → `ok`.
213
+ * - reachable, instance !== ours → `hijacked` (a DIFFERENT process owns loopback: another hub, or a foreign
214
+ * server answering `/health` with no/other instance — the OrbStack-shadow class).
215
+ */
216
+ export function classifyLoopback(ourNonce: string, probe: LoopbackProbe): SelfProbeStatus {
217
+ if (!probe.reachable) return "unreachable";
218
+ if (probe.instance === ourNonce) return "ok";
219
+ return "hijacked";
220
+ }
221
+
222
+ /**
223
+ * The LOUD, structured hijack alert. Names the class + the exact diagnosis
224
+ * commands + the incident reference so an operator scanning logs can act
225
+ * immediately. Repeated verbatim on every probe while mismatched (by design —
226
+ * a single line scrolls away; a hijack is a standing emergency).
227
+ */
228
+ export function hijackAlertMessage(port: number, observedInstance?: string): string {
229
+ const observed = observedInstance
230
+ ? `a DIFFERENT hub (instance=${observedInstance})`
231
+ : "a foreign process (no hub instance nonce in its /health)";
232
+ return [
233
+ `parachute serve: LOOPBACK HIJACK on 127.0.0.1:${port} — this hub bound the port but loopback /health is answered by ${observed}.`,
234
+ " Loopback traffic (module JWKS/API calls, CLI probes) is NOT reaching this hub — every hub-JWT validation downstream will fail.",
235
+ ` A specific 127.0.0.1:${port} bind (commonly an OrbStack/container port-forward) wins over this hub's wildcard bind.`,
236
+ ` Diagnose: lsof -nP -iTCP:${port} -sTCP:LISTEN (expect ONE listener — this hub)`,
237
+ ` orb list (stop/delete any VM auto-forwarding ${port}, e.g. a leftover smoke-test machine)`,
238
+ ` Incident: ${HIJACK_INCIDENT_REF}`,
239
+ ].join("\n");
240
+ }
241
+
242
+ /** The softer "we're listening but loopback didn't answer" note (logged once per state change). */
243
+ export function unreachableNote(port: number): string {
244
+ return `parachute serve: loopback /health on 127.0.0.1:${port} did not answer, yet this hub is bound — transient, or another process is interfering with loopback. Watching (will re-probe).`;
245
+ }
246
+
247
+ // ---------------------------------------------------------------------------
248
+ // Self-probe timer (armed by `serve` after the listener is up)
249
+ // ---------------------------------------------------------------------------
250
+
251
+ export interface HubSelfProbe {
252
+ /** Stop the interval. */
253
+ stop(): void;
254
+ /** Run exactly one probe now (used for the immediate startup check + tests). */
255
+ probeOnce(): Promise<SelfProbeStatus>;
256
+ /** The most recent in-memory verdict (tests). */
257
+ getState(): SelfProbeState | undefined;
258
+ }
259
+
260
+ export interface HubSelfProbeDeps<H = unknown> {
261
+ /** Poll cadence in ms. Default 300_000 (5 min) — a safety net, not a hot path. */
262
+ intervalMs?: number;
263
+ /** Loopback probe (default {@link probeLoopbackInstance}). */
264
+ probe?: (port: number) => Promise<LoopbackProbe>;
265
+ /** Persist the verdict (default: patch the instance file's `selfProbe`). */
266
+ writeState?: (state: SelfProbeState) => void;
267
+ /** Loud log sink (default `console.error`). */
268
+ log?: (line: string) => void;
269
+ /** Clock seam (default `() => new Date()`). */
270
+ now?: () => Date;
271
+ /** Injectable scheduler (default `setInterval`). Tests drive ticks manually. */
272
+ setIntervalFn?: (cb: () => void, ms: number) => H;
273
+ /** Injectable clear (default `clearInterval`). */
274
+ clearIntervalFn?: (handle: H) => void;
275
+ }
276
+
277
+ /**
278
+ * Arm the loopback self-probe. On each tick (and on the immediate startup
279
+ * `probeOnce`) it fetches loopback `/health`, compares the returned instance to
280
+ * OUR nonce, logs per the incident-severity rules, and persists the verdict:
281
+ *
282
+ * - `hijacked` → LOUD structured alert EVERY tick (standing emergency), verdict persisted.
283
+ * - `unreachable`→ softer note, logged ONLY on a state change (avoid a spinning log on a flaky loopback).
284
+ * - `ok` → recovery line logged once when clearing a prior non-ok verdict.
285
+ *
286
+ * The verdict is written to the instance file's `selfProbe` field so external
287
+ * tools (`status`) read the authoritative reading without re-probing the
288
+ * hijacked loopback. Overlapping ticks are guarded (a slow probe never stacks).
289
+ * The interval is `unref`'d so it never keeps the event loop alive on its own.
290
+ */
291
+ export function armHubSelfProbe<H = ReturnType<typeof setInterval>>(
292
+ args: { port: number; nonce: string; record: HubInstanceRecord; configDir?: string },
293
+ deps: HubSelfProbeDeps<H> = {},
294
+ ): HubSelfProbe {
295
+ const { port, nonce, record } = args;
296
+ const intervalMs = deps.intervalMs ?? 300_000;
297
+ const probe = deps.probe ?? probeLoopbackInstance;
298
+ const log = deps.log ?? ((line: string) => console.error(line));
299
+ const now = deps.now ?? (() => new Date());
300
+ const writeState =
301
+ deps.writeState ??
302
+ ((state: SelfProbeState) =>
303
+ writeHubInstanceFile(
304
+ { ...record, selfProbe: state },
305
+ { ...(args.configDir !== undefined ? { configDir: args.configDir } : {}), log },
306
+ ));
307
+ const setIntervalFn =
308
+ deps.setIntervalFn ?? ((cb: () => void, ms: number) => setInterval(cb, ms) as unknown as H);
309
+ const clearIntervalFn =
310
+ deps.clearIntervalFn ??
311
+ ((h: H) => clearInterval(h as unknown as ReturnType<typeof setInterval>));
312
+
313
+ let last: SelfProbeState | undefined;
314
+ let inFlight = false;
315
+
316
+ async function probeOnce(): Promise<SelfProbeStatus> {
317
+ if (inFlight) return last?.status ?? "ok";
318
+ inFlight = true;
319
+ try {
320
+ const result = await probe(port);
321
+ const status = classifyLoopback(nonce, result);
322
+ const state: SelfProbeState = { status, checkedAt: now().toISOString() };
323
+ if (status === "hijacked") {
324
+ if (result.instance !== undefined) state.observedInstance = result.instance;
325
+ state.detail = hijackAlertMessage(port, result.instance);
326
+ // LOUD every tick — a hijack is a standing emergency, not a one-shot notice.
327
+ log(state.detail);
328
+ } else if (status === "unreachable") {
329
+ state.detail = unreachableNote(port);
330
+ if (last?.status !== "unreachable") log(state.detail);
331
+ } else {
332
+ // ok — announce recovery once when clearing a prior non-ok verdict.
333
+ if (last && last.status !== "ok") {
334
+ log(
335
+ `parachute serve: loopback /health on 127.0.0.1:${port} is back to this hub (instance=${nonce}). Hijack cleared.`,
336
+ );
337
+ }
338
+ }
339
+ last = state;
340
+ try {
341
+ writeState(state);
342
+ } catch {
343
+ // Persisting the verdict is best-effort; the loud log already fired.
344
+ }
345
+ return status;
346
+ } finally {
347
+ inFlight = false;
348
+ }
349
+ }
350
+
351
+ const handle = setIntervalFn(() => {
352
+ void probeOnce();
353
+ }, intervalMs);
354
+ (handle as { unref?: () => void }).unref?.();
355
+
356
+ return {
357
+ stop() {
358
+ clearIntervalFn(handle);
359
+ },
360
+ probeOnce,
361
+ getState() {
362
+ return last;
363
+ },
364
+ };
365
+ }
package/src/hub-server.ts CHANGED
@@ -782,6 +782,52 @@ export function wsCapBucketKey(req: Request, peerAddr: string | null): string {
782
782
  return peer ?? WS_CAP_SHARED_BUCKET;
783
783
  }
784
784
 
785
+ /**
786
+ * Hop-by-hop headers (RFC 9110 §7.6.1) — connection-scoped, meaningful only
787
+ * on a single transport hop. An intermediary MUST NOT forward them, and the
788
+ * hub is exactly such an intermediary between the client and each loopback
789
+ * module.
790
+ *
791
+ * Forwarding a client's `Connection: close` verbatim was the P0 amplifier in
792
+ * the 2026-07-02 port exhaustion (hub#738): Bun's fetch honors the forwarded
793
+ * `Connection: close` and opens a FRESH ephemeral socket per proxied request
794
+ * instead of reusing its per-origin keep-alive pool. A hot client loop then
795
+ * converts request volume 1:1 into 30s TIME_WAIT entries (macOS: ~16k
796
+ * ephemeral ports), exhausting the range and taking out all host outbound.
797
+ * With these stripped, Bun reuses a handful of pooled sockets to each 127.0.0.1
798
+ * upstream regardless of what the client sends.
799
+ */
800
+ const HOP_BY_HOP_HEADERS = [
801
+ "connection",
802
+ "keep-alive",
803
+ "proxy-authenticate",
804
+ "proxy-authorization",
805
+ "te",
806
+ "trailer",
807
+ "transfer-encoding",
808
+ "upgrade",
809
+ ] as const;
810
+
811
+ /**
812
+ * Delete hop-by-hop headers from an outgoing proxy header bag (mutates in
813
+ * place). The `Connection` field-value can NAME further headers to drop
814
+ * (RFC 9110 §7.6.1 — e.g. `Connection: close, X-Custom`), so those tokens are
815
+ * collected and deleted before the standard set. WebSocket upgrades never
816
+ * reach the fetch-based proxy (the Bun-native bridge handles them before
817
+ * dispatch — see `proxyRequest`'s docstring), so dropping `Upgrade`/`Connection`
818
+ * here only ever touches non-declaring mounts, which see a plain request.
819
+ */
820
+ export function stripHopByHopHeaders(headers: Headers): void {
821
+ const connectionValue = headers.get("connection");
822
+ if (connectionValue) {
823
+ for (const token of connectionValue.split(",")) {
824
+ const named = token.trim().toLowerCase();
825
+ if (named) headers.delete(named);
826
+ }
827
+ }
828
+ for (const name of HOP_BY_HOP_HEADERS) headers.delete(name);
829
+ }
830
+
785
831
  /**
786
832
  * Forward a request to a loopback service on `127.0.0.1:<port>`. By default
787
833
  * the incoming pathname + query are preserved verbatim; pass `targetPath` to
@@ -847,6 +893,12 @@ async function proxyRequest(
847
893
  // Host comes from the requester (tailnet FQDN); the loopback target wants
848
894
  // its own. Bun's fetch fills it in when omitted.
849
895
  headers.delete("host");
896
+ // Strip hop-by-hop headers before forwarding (RFC 9110 §7.6.1). Critically
897
+ // this drops a client-supplied `Connection: close`, which Bun's fetch would
898
+ // otherwise honor by disabling keep-alive and burning a fresh ephemeral
899
+ // socket per request — the P0 amplifier in hub#738. Bun refills the
900
+ // connection framing for the upstream hop itself.
901
+ stripHopByHopHeaders(headers);
850
902
  // Force upstreams to reply with uncompressed bodies. The chrome-strip
851
903
  // injector (workstream G) buffers + TextDecoders the HTML response to
852
904
  // inject the persistent chrome; without this, a gzip- or br-compressed
@@ -887,6 +939,13 @@ async function proxyRequest(
887
939
  method: req.method,
888
940
  headers,
889
941
  redirect: "manual",
942
+ // Forward the incoming request's abort signal to the upstream hop. When a
943
+ // client hangs up mid-response, this aborts the loopback fetch so the
944
+ // upstream stops streaming to a gone client and its socket is released
945
+ // back to the pool (or closed) instead of running the full body out and
946
+ // holding the connection — a secondary socket-retention leak alongside the
947
+ // TIME_WAIT churn (hub#738). `req.signal` is present on Bun.serve requests.
948
+ signal: req.signal,
890
949
  };
891
950
  if (req.method !== "GET" && req.method !== "HEAD") {
892
951
  init.body = req.body;
@@ -895,6 +954,14 @@ async function proxyRequest(
895
954
  try {
896
955
  return await fetch(upstream, init);
897
956
  } catch (err) {
957
+ // Client hung up mid-flight: the upstream fetch was aborted via req.signal
958
+ // (forwarded above), not an upstream failure. The client is gone, so the
959
+ // response is discarded — don't run the boot-readiness classifier or
960
+ // render a "module unreachable" page that would misclassify a normal
961
+ // disconnect. 499 = client closed request (nginx convention).
962
+ if (req.signal?.aborted) {
963
+ return new Response(null, { status: 499 });
964
+ }
898
965
  const msg = err instanceof Error ? err.message : String(err);
899
966
  // Classify the failure (transient boot-window vs persistent crash) and
900
967
  // render either an HTML page or a JSON error per the request's Accept.
@@ -1261,6 +1328,17 @@ export interface HubFetchDeps {
1261
1328
  * rejected" because Origin ≠ tailnet issuer).
1262
1329
  */
1263
1330
  loopbackPort?: number;
1331
+ /**
1332
+ * This serve process's per-boot instance nonce (hub#737). When present it's
1333
+ * echoed as `instance` in `/health` so an external reader can tell whether a
1334
+ * loopback `/health` actually reached THIS hub or a foreign process that has
1335
+ * shadowed the port (the OrbStack loopback-hijack class). `serve` mints it,
1336
+ * writes it to `~/.parachute/hub-instance.json`, and threads it here; absent
1337
+ * on the DB-less / test / `bun src/hub-server.ts` paths, where `/health`
1338
+ * simply omits the field (additive — no consumer parses `/health` for
1339
+ * `instance` strictly).
1340
+ */
1341
+ instanceNonce?: string;
1264
1342
  /**
1265
1343
  * Test seam for reading `expose-state.json`'s `hubOrigin`. Production reads
1266
1344
  * the operator's `~/.parachute/expose-state.json` via `readExposeState`;
@@ -2314,7 +2392,17 @@ export function hubFetch(
2314
2392
  }
2315
2393
  }
2316
2394
  return new Response(
2317
- JSON.stringify({ status: "ok", service: "parachute-hub", version: pkg.version, db }),
2395
+ JSON.stringify({
2396
+ status: "ok",
2397
+ service: "parachute-hub",
2398
+ version: pkg.version,
2399
+ db,
2400
+ // Per-boot instance nonce (hub#737): lets an external reader detect a
2401
+ // loopback hijack (foreign process shadowing 127.0.0.1:<port>) by
2402
+ // comparing this to the nonce serve wrote to hub-instance.json.
2403
+ // Omitted when unset (DB-less / test / dev-entrypoint paths).
2404
+ ...(deps?.instanceNonce ? { instance: deps.instanceNonce } : {}),
2405
+ }),
2318
2406
  {
2319
2407
  headers: {
2320
2408
  "content-type": "application/json",
@@ -123,7 +123,7 @@ function packageNameFor(entryName: string): string | undefined {
123
123
  if (short === undefined) return undefined;
124
124
  const fb = FIRST_PARTY_FALLBACKS[short];
125
125
  if (fb) return fb.package;
126
- // KNOWN_MODULES (vault / scribe / runner — post hub#310 FALLBACK
126
+ // KNOWN_MODULES (vault / scribe / agent / surface — post hub#310 FALLBACK
127
127
  // retirement) carries the package name without an embedded manifest.
128
128
  return KNOWN_MODULES[short]?.package;
129
129
  }
@@ -322,7 +322,7 @@ const NOTES_FALLBACK: FirstPartyFallback = {
322
322
  * Indexed by short name (the `parachute install <X>` token).
323
323
  *
324
324
  * Only notes remains — see the block comment above for the rationale
325
- * (vault/scribe/runner/agent now self-register and ship their own
325
+ * (vault/scribe/agent now self-register and ship their own
326
326
  * module.json). Other code paths consult both this table AND `KNOWN_MODULES`
327
327
  * (which carries the post-self-register-retirement entries) via the helpers
328
328
  * in this file (`shortNameForManifest`, `knownServices`, …).
@@ -437,27 +437,17 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
437
437
  ],
438
438
  },
439
439
  },
440
- runner: {
441
- short: "runner",
442
- package: "@openparachute/runner",
443
- manifestName: "parachute-runner",
444
- canonicalPort: 1945,
445
- displayName: "Runner",
446
- tagline:
447
- "Vault-as-job-substrate engine spawns claude -p against vault job notes on schedule.",
448
- canonicalPaths: ["/runner", "/.parachute"],
449
- canonicalHealth: "/runner/healthz",
450
- canonicalStripPrefix: false,
451
- extras: {
452
- // Backward-compat startCmd — same rationale as scribe / vault above.
453
- startCmd: () => ["parachute-runner", "serve"],
454
- // Runner's HTTP routes (everything past `/healthz`) gate on a
455
- // hub-issued JWT carrying `runner:admin` scope (see runner's
456
- // `src/auth.ts`). Surfaces in `parachute status` as auth-required by
457
- // default, same posture as vault.
458
- hasAuth: true,
459
- },
460
- },
440
+ // NOTE (2026-07-01): `runner` was REMOVED from this registry (decision:
441
+ // Aaron 2026-07-01 — the module set of record is vault / hub / agent /
442
+ // scribe / surface). Runner is no longer offered, installable, or
443
+ // lifecycle-addressable by short name from the hub's bootstrap registries.
444
+ // Existing installs stay GRACEFUL: a legacy `parachute-runner` services.json
445
+ // row is handled exactly like any unknown/third-party row — `parachute
446
+ // status` renders it (short falls back to the row name), `parachute serve`
447
+ // boots it via `<installDir>/.parachute/module.json` when installDir is
448
+ // stamped and logs-and-skips otherwise. Deliberately NOT added to
449
+ // RETIRED_MODULES: that registry GC-drops rows on load, which would break
450
+ // routing for operators still running the runner daemon.
461
451
  agent: {
462
452
  short: "agent",
463
453
  package: "@openparachute/agent",
@@ -474,7 +464,7 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
474
464
  canonicalStripPrefix: true,
475
465
  extras: {
476
466
  // Backward-compat startCmd for rows without installDir — same rationale
477
- // as scribe / vault / runner. The bare binary IS the daemon (agent's
467
+ // as scribe / vault. The bare binary IS the daemon (agent's
478
468
  // package.json bin maps `parachute-agent` → src/daemon.ts).
479
469
  startCmd: () => ["parachute-agent"],
480
470
  // Agent gates its endpoints behind hub-issued JWTs (agent:* scopes).
@@ -498,14 +488,14 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
498
488
  canonicalHealth: "/surface/healthz",
499
489
  canonicalStripPrefix: false,
500
490
  extras: {
501
- // Backward-compat startCmd — same rationale as scribe / vault / runner
491
+ // Backward-compat startCmd — same rationale as scribe / vault
502
492
  // above. Post-self-register, lifecycle reads module.json's startCmd via
503
493
  // `composeKnownModuleSpec` and that path wins.
504
494
  startCmd: () => ["parachute-surface", "serve"],
505
495
  // Surface's admin + per-UI surfaces gate behind hub-issued JWTs (design
506
496
  // doc §6 same-hub auto-trust + scope `surface:admin`). Surfaces in
507
- // `parachute status` as auth-required by default, same posture as vault
508
- // + runner.
497
+ // `parachute status` as auth-required by default, same posture as
498
+ // vault.
509
499
  hasAuth: true,
510
500
  },
511
501
  },
@@ -646,10 +636,11 @@ export function knownServices(): string[] {
646
636
  * - `experimental` — agent (legit preview; still OFFERED on a fresh install)
647
637
  * + any unlisted third-party short.
648
638
  * - `deprecated` — notes (notes-daemon deprecated 2026-05-22; notes-ui moved
649
- * into parachute-surface) + runner (per Aaron 2026-06-25: not for new
650
- * installs). Still RESOLVABLE (discoverableShorts unchanged) and
651
- * SHOWN-IF-INSTALLED so an existing operator can manage/uninstall, but NOT
652
- * OFFERED on a fresh setup.
639
+ * into parachute-surface). Still RESOLVABLE (discoverableShorts unchanged)
640
+ * and SHOWN-IF-INSTALLED so an existing operator can manage/uninstall, but
641
+ * NOT OFFERED on a fresh setup. `runner` used to sit here too (deprecated
642
+ * 2026-06-25) until its full registry removal on 2026-07-01 see the note
643
+ * in KNOWN_MODULES.
653
644
  *
654
645
  * **Show all installed; never hide** — `focus` groups + labels; the one
655
646
  * behavioral lever is the fresh-install OFFER, which drops `deprecated` shorts.
@@ -660,7 +651,6 @@ const FOCUS_DEFAULTS: Record<string, ModuleFocus> = {
660
651
  hub: "core",
661
652
  surface: "core",
662
653
  agent: "experimental",
663
- runner: "deprecated",
664
654
  notes: "deprecated",
665
655
  };
666
656
 
@@ -671,7 +661,7 @@ const FOCUS_DEFAULTS: Record<string, ModuleFocus> = {
671
661
  * returns undefined — the Modules screen always has a tier to group by.
672
662
  *
673
663
  * Tier semantics: `core`/`experimental` are both OFFERED on a fresh install;
674
- * `deprecated` (notes / runner) is NOT offered on a fresh setup but stays
664
+ * `deprecated` (notes) is NOT offered on a fresh setup but stays
675
665
  * resolvable + shown-if-installed (the `isKnownModuleShort` /
676
666
  * `discoverableShorts` resolution surface is unchanged). The fresh-install
677
667
  * filters in `setup.ts` + `api-modules.ts` consult this tier to drop
@@ -689,14 +679,16 @@ export function focusForShort(short: string, declared?: ModuleFocus): ModuleFocu
689
679
  * `CURATED_MODULES` whitelist (2026-06-09 modular-UI architecture, P2): every
690
680
  * module the hub can resolve a package/manifest for is discoverable + installable,
691
681
  * regardless of `focus` tier. Deduped, with FIRST_PARTY_FALLBACKS shorts first
692
- * (notes) then KNOWN_MODULES (vault / scribe / runner / agent / surface).
693
- *
694
- * `notes` (and `runner`) are intentionally included — still resolvable
695
- * (vendored fallback / KNOWN_MODULES) for legacy installs; they surface as
696
- * `deprecated` (2026-06-25) and aren't OFFERED on a fresh install. The
697
- * fresh-install OFFER (setup wizard + admin SPA) filters by tier
698
- * (`focus !== "deprecated"`); `discoverableShorts` itself stays the full
699
- * resolution surface so existing installs keep working.
682
+ * (notes) then KNOWN_MODULES (vault / scribe / agent / surface).
683
+ *
684
+ * `notes` is intentionally included — still resolvable (vendored fallback)
685
+ * for legacy installs; it surfaces as `deprecated` (2026-06-25) and isn't
686
+ * OFFERED on a fresh install. The fresh-install OFFER (setup wizard + admin
687
+ * SPA) filters by tier (`focus !== "deprecated"`); `discoverableShorts`
688
+ * itself stays the full resolution surface so existing installs keep working.
689
+ * `runner` is NOT here anymore (registry removal 2026-07-01 — see the
690
+ * KNOWN_MODULES note); a legacy runner install is handled as an
691
+ * unknown/third-party row.
700
692
  */
701
693
  export function discoverableShorts(): string[] {
702
694
  const seen = new Set<string>();
@@ -750,7 +742,7 @@ export function canonicalPortForManifest(manifestName: string): number | undefin
750
742
  * spec with embedded manifest + extras — the vendored manifest is the
751
743
  * source of truth pre-install and the install path preserves it through.
752
744
  *
753
- * KNOWN_MODULES shorts (vault / scribe / runner / agent / surface — post
745
+ * KNOWN_MODULES shorts (vault / scribe / agent / surface — post
754
746
  * FALLBACK retirement) return a **minimal** spec carrying `package`, `manifestName`,
755
747
  * and the imperative `extras` fields
756
748
  * (`init`, `hasAuth`, `urlForEntry`, `postInstallFooter`). They do NOT carry
@@ -860,7 +852,7 @@ const LEGACY_MANIFEST_ALIASES: Record<string, string> = {
860
852
 
861
853
  /** Short name for a given manifest name, e.g. `parachute-vault` → `vault`.
862
854
  * Consults both FIRST_PARTY_FALLBACKS (notes) and KNOWN_MODULES
863
- * (vault / scribe / runner / agent / surface — post-FALLBACK-retirement).
855
+ * (vault / scribe / agent / surface — post-FALLBACK-retirement).
864
856
  * Returns undefined for unknown manifests. */
865
857
  export function shortNameForManifest(manifestName: string): string | undefined {
866
858
  for (const [short, fb] of Object.entries(FIRST_PARTY_FALLBACKS)) {
@@ -887,7 +879,7 @@ export function shortNameForManifest(manifestName: string): string | undefined {
887
879
  * here — `shortNameForManifest` only knows the canonical `parachute-vault`, so
888
880
  * `findServiceByShort(services, "vault")` returns undefined even when a vault is
889
881
  * installed. Vault rows are resolved by mount path via `findVaultUpstream`; this
890
- * helper is for single-instance modules (agent / scribe / runner / surface).
882
+ * helper is for single-instance modules (agent / scribe / surface).
891
883
  */
892
884
  export function findServiceByShort<T extends { name: string }>(
893
885
  services: readonly T[],
@@ -901,7 +893,7 @@ export function findServiceByShort<T extends { name: string }>(
901
893
  * manifest data the caller has on hand (typically read from
902
894
  * `<installDir>/.parachute/module.json`).
903
895
  *
904
- * Used at install-time and lifecycle-time for vault / scribe / runner
896
+ * Used at install-time and lifecycle-time for vault / scribe / surface
905
897
  * where hub no longer vendors the manifest (services.json + module.json
906
898
  * are authoritative) but still needs the imperative `extras` bits
907
899
  * (`init`, `postInstallFooter`, `urlForEntry`, `hasAuth`) the CLI install