@openparachute/hub 0.7.5 → 0.7.6-rc.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/admin-module-token.test.ts +40 -3
- package/src/__tests__/api-modules-ops.test.ts +8 -3
- package/src/__tests__/api-modules.test.ts +26 -18
- package/src/__tests__/connections-store.test.ts +84 -0
- package/src/__tests__/doctor.test.ts +131 -0
- package/src/__tests__/git-notify.test.ts +29 -1
- package/src/__tests__/grants-store.test.ts +33 -1
- package/src/__tests__/hub-instance.test.ts +297 -0
- package/src/__tests__/hub-server.test.ts +169 -0
- package/src/__tests__/install.test.ts +28 -0
- package/src/__tests__/serve-boot.test.ts +60 -0
- package/src/__tests__/service-spec-discovery.test.ts +32 -9
- package/src/__tests__/setup.test.ts +64 -16
- package/src/__tests__/stale-module-units.test.ts +1 -1
- package/src/__tests__/status-supervisor.test.ts +112 -0
- package/src/admin-connections.ts +5 -1
- package/src/admin-module-token.ts +2 -2
- package/src/api-modules-ops.ts +3 -3
- package/src/api-modules.ts +13 -13
- package/src/commands/doctor.ts +167 -4
- package/src/commands/install.ts +29 -3
- package/src/commands/migrate.ts +5 -0
- package/src/commands/serve.ts +52 -0
- package/src/commands/setup.ts +10 -9
- package/src/commands/status.ts +42 -1
- package/src/connections-store.ts +15 -2
- package/src/git-notify.ts +34 -5
- package/src/grants-store.ts +15 -2
- package/src/help.ts +3 -3
- package/src/hub-instance.ts +365 -0
- package/src/hub-server.ts +89 -1
- package/src/install-source.ts +1 -1
- package/src/service-spec.ts +36 -44
- package/src/services-manifest.ts +1 -1
- package/src/stale-module-units.ts +2 -2
- package/src/well-known.ts +3 -3
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hub instance identity + loopback-hijack detection (hub#737).
|
|
3
|
+
*
|
|
4
|
+
* ## The incident this defends against (2026-07-02 P0)
|
|
5
|
+
*
|
|
6
|
+
* The hub binds `*:1939` (INADDR_ANY / `0.0.0.0`). An OrbStack Linux machine
|
|
7
|
+
* auto-forwarded ITS port 1939 onto the host as a SPECIFIC bind on
|
|
8
|
+
* `127.0.0.1:1939` — and a specific loopback bind WINS over a wildcard bind for
|
|
9
|
+
* all loopback traffic. Every module's JWKS/API call to `127.0.0.1:1939`
|
|
10
|
+
* silently reached the WRONG hub (a fresh container DB → empty JWKS, no admin),
|
|
11
|
+
* so every hub-JWT validation failed `no applicable key found in the JWKS` and
|
|
12
|
+
* the ecosystem 401-looped for hours. `lsof -nP -i :1939` showed two LISTENs;
|
|
13
|
+
* the tell was `/health` reporting the container's version, not the checkout's.
|
|
14
|
+
*
|
|
15
|
+
* ## The primitive: a per-process instance nonce
|
|
16
|
+
*
|
|
17
|
+
* Each `parachute serve` process generates a random nonce at boot, (a) exposes
|
|
18
|
+
* it as `instance` in `/health`, and (b) writes it to
|
|
19
|
+
* `~/.parachute/hub-instance.json` (0644). That file is the linchpin: an
|
|
20
|
+
* EXTERNAL process (`parachute status`, `parachute doctor`) can learn THIS
|
|
21
|
+
* hub's true identity from disk WITHOUT traversing the (possibly hijacked)
|
|
22
|
+
* loopback — then compare it to what a loopback `GET /health` actually returns.
|
|
23
|
+
* A mismatch means another process owns `127.0.0.1:<port>`.
|
|
24
|
+
*
|
|
25
|
+
* The in-process self-probe (armed by `serve`) compares its own in-memory nonce
|
|
26
|
+
* to the loopback `/health` it fetches, logs loudly on mismatch, and records the
|
|
27
|
+
* verdict back into the same file's `selfProbe` field so external tools surface
|
|
28
|
+
* the serve process's own authoritative reading without re-probing.
|
|
29
|
+
*
|
|
30
|
+
* Every side effect (fs, network probe) is behind an injectable seam so the
|
|
31
|
+
* whole module runs deterministically in tests with no real network / disk.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
import { randomUUID } from "node:crypto";
|
|
35
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync } from "node:fs";
|
|
36
|
+
import { dirname, join } from "node:path";
|
|
37
|
+
import { CONFIG_DIR } from "./config.ts";
|
|
38
|
+
|
|
39
|
+
/** The public incident reference operators grep for. */
|
|
40
|
+
export const HIJACK_INCIDENT_REF = "hub#737 / team-vault Log/2026-07-02-port-exhaustion-incident";
|
|
41
|
+
|
|
42
|
+
/** Self-probe verdicts. `ok` = loopback reaches us; `hijacked` = someone else owns loopback. */
|
|
43
|
+
export type SelfProbeStatus = "ok" | "hijacked" | "unreachable";
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* The serve process's own most-recent loopback self-probe reading, persisted
|
|
47
|
+
* into the instance file so external readers (`status`) see the authoritative
|
|
48
|
+
* verdict without re-probing the (possibly hijacked) loopback themselves.
|
|
49
|
+
*/
|
|
50
|
+
export interface SelfProbeState {
|
|
51
|
+
status: SelfProbeStatus;
|
|
52
|
+
/** ISO timestamp of the reading. */
|
|
53
|
+
checkedAt: string;
|
|
54
|
+
/** The `instance` the loopback `/health` actually returned (present on a `hijacked` reading). */
|
|
55
|
+
observedInstance?: string;
|
|
56
|
+
/** One-line human detail (loud message on a hijack; the probe error class on unreachable). */
|
|
57
|
+
detail?: string;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** The `~/.parachute/hub-instance.json` record. */
|
|
61
|
+
export interface HubInstanceRecord {
|
|
62
|
+
/** Per-process random nonce (`crypto.randomUUID`) minted at serve boot. */
|
|
63
|
+
instance: string;
|
|
64
|
+
/** The serve process PID (informational — helps an operator map the file to a process). */
|
|
65
|
+
pid: number;
|
|
66
|
+
/** The port this serve bound. */
|
|
67
|
+
port: number;
|
|
68
|
+
/** ISO timestamp of serve boot. */
|
|
69
|
+
startedAt: string;
|
|
70
|
+
/** Last self-probe reading, patched in by the running serve process. */
|
|
71
|
+
selfProbe?: SelfProbeState;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Mint a fresh per-process nonce. */
|
|
75
|
+
export function generateInstanceNonce(): string {
|
|
76
|
+
return randomUUID();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Path to the instance file under a config dir (default `~/.parachute`). */
|
|
80
|
+
export function hubInstancePath(configDir: string = CONFIG_DIR): string {
|
|
81
|
+
return join(configDir, "hub-instance.json");
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Atomically write the instance record (tmp + rename, 0644). Best-effort: a
|
|
86
|
+
* write failure must NEVER take the hub down — the file is a diagnostic aid, not
|
|
87
|
+
* a load-bearing runtime dependency. Returns true on success.
|
|
88
|
+
*/
|
|
89
|
+
export function writeHubInstanceFile(
|
|
90
|
+
record: HubInstanceRecord,
|
|
91
|
+
opts: { configDir?: string; log?: (line: string) => void } = {},
|
|
92
|
+
): boolean {
|
|
93
|
+
const path = hubInstancePath(opts.configDir);
|
|
94
|
+
const tmp = `${path}.tmp-${process.pid}-${Date.now()}`;
|
|
95
|
+
try {
|
|
96
|
+
const dir = dirname(path);
|
|
97
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
98
|
+
writeFileSync(tmp, `${JSON.stringify(record, null, 2)}\n`, { mode: 0o644 });
|
|
99
|
+
renameSync(tmp, path);
|
|
100
|
+
return true;
|
|
101
|
+
} catch (err) {
|
|
102
|
+
// Don't leave a half-written tmp behind if the rename (or write) failed.
|
|
103
|
+
try {
|
|
104
|
+
if (existsSync(tmp)) rmSync(tmp, { force: true });
|
|
105
|
+
} catch {
|
|
106
|
+
// Best-effort cleanup — nothing more we can do.
|
|
107
|
+
}
|
|
108
|
+
opts.log?.(
|
|
109
|
+
`parachute serve: could not write ${path} (${err instanceof Error ? err.message : String(err)}); loopback-hijack detection for external tools is degraded, hub start continues.`,
|
|
110
|
+
);
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Remove the instance file (best-effort). Called on graceful shutdown so a
|
|
117
|
+
* cleanly-stopped hub doesn't leave a stale identity/self-probe verdict on disk
|
|
118
|
+
* for `status` / `doctor` to read as a phantom. A hard kill (SIGKILL) can't run
|
|
119
|
+
* this — the readers additionally gate on live hub liveness, so a leftover file
|
|
120
|
+
* from a hard kill never surfaces as a false hijack.
|
|
121
|
+
*/
|
|
122
|
+
export function clearHubInstanceFile(configDir: string = CONFIG_DIR): void {
|
|
123
|
+
try {
|
|
124
|
+
rmSync(hubInstancePath(configDir), { force: true });
|
|
125
|
+
} catch {
|
|
126
|
+
// Best-effort — a missing / unremovable file is not worth surfacing.
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Read + validate the instance file. Returns null on absence / unreadable /
|
|
132
|
+
* malformed — a missing file is the benign "no nonce-aware serve wrote one yet"
|
|
133
|
+
* state, never an error.
|
|
134
|
+
*/
|
|
135
|
+
export function readHubInstanceFile(configDir: string = CONFIG_DIR): HubInstanceRecord | null {
|
|
136
|
+
const path = hubInstancePath(configDir);
|
|
137
|
+
let raw: unknown;
|
|
138
|
+
try {
|
|
139
|
+
raw = JSON.parse(readFileSync(path, "utf8"));
|
|
140
|
+
} catch {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
if (!raw || typeof raw !== "object") return null;
|
|
144
|
+
const r = raw as Record<string, unknown>;
|
|
145
|
+
if (typeof r.instance !== "string" || r.instance.length === 0) return null;
|
|
146
|
+
if (typeof r.port !== "number") return null;
|
|
147
|
+
const rec: HubInstanceRecord = {
|
|
148
|
+
instance: r.instance,
|
|
149
|
+
pid: typeof r.pid === "number" ? r.pid : -1,
|
|
150
|
+
port: r.port,
|
|
151
|
+
startedAt: typeof r.startedAt === "string" ? r.startedAt : "",
|
|
152
|
+
};
|
|
153
|
+
const sp = r.selfProbe;
|
|
154
|
+
if (sp && typeof sp === "object") {
|
|
155
|
+
const s = sp as Record<string, unknown>;
|
|
156
|
+
if (s.status === "ok" || s.status === "hijacked" || s.status === "unreachable") {
|
|
157
|
+
const state: SelfProbeState = {
|
|
158
|
+
status: s.status,
|
|
159
|
+
checkedAt: typeof s.checkedAt === "string" ? s.checkedAt : "",
|
|
160
|
+
};
|
|
161
|
+
if (typeof s.observedInstance === "string") state.observedInstance = s.observedInstance;
|
|
162
|
+
if (typeof s.detail === "string") state.detail = s.detail;
|
|
163
|
+
rec.selfProbe = state;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return rec;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** The result of probing a loopback `/health`. */
|
|
170
|
+
export interface LoopbackProbe {
|
|
171
|
+
/** The socket answered at all (any HTTP status). */
|
|
172
|
+
reachable: boolean;
|
|
173
|
+
/** HTTP status, when reachable. */
|
|
174
|
+
status?: number;
|
|
175
|
+
/** The `instance` field of the JSON body, when present + parseable. */
|
|
176
|
+
instance?: string;
|
|
177
|
+
/** True when the body self-identifies as a parachute hub (`service: "parachute-hub"`). */
|
|
178
|
+
isHub?: boolean;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Probe `http://127.0.0.1:<port>/health` and extract the instance identity.
|
|
183
|
+
* Bounded (default 1.5s); never throws — a network error is `{ reachable: false }`.
|
|
184
|
+
*/
|
|
185
|
+
export async function probeLoopbackInstance(
|
|
186
|
+
port: number,
|
|
187
|
+
opts: { timeoutMs?: number; fetchFn?: typeof fetch } = {},
|
|
188
|
+
): Promise<LoopbackProbe> {
|
|
189
|
+
const fetchFn = opts.fetchFn ?? fetch;
|
|
190
|
+
try {
|
|
191
|
+
const res = await fetchFn(`http://127.0.0.1:${port}/health`, {
|
|
192
|
+
signal: AbortSignal.timeout(opts.timeoutMs ?? 1500),
|
|
193
|
+
});
|
|
194
|
+
const out: LoopbackProbe = { reachable: true, status: res.status };
|
|
195
|
+
try {
|
|
196
|
+
const body = (await res.json()) as Record<string, unknown>;
|
|
197
|
+
if (typeof body.instance === "string") out.instance = body.instance;
|
|
198
|
+
if (body.service === "parachute-hub") out.isHub = true;
|
|
199
|
+
} catch {
|
|
200
|
+
// A non-JSON / unparseable body still counts as "reachable" — a foreign
|
|
201
|
+
// process answering the port with junk is exactly the hijack shape.
|
|
202
|
+
}
|
|
203
|
+
return out;
|
|
204
|
+
} catch {
|
|
205
|
+
return { reachable: false };
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Classify a loopback probe against our TRUE nonce.
|
|
211
|
+
* - not reachable → `unreachable` (we bound, but loopback refused/timed out — suspicious but soft).
|
|
212
|
+
* - reachable, instance === ours → `ok`.
|
|
213
|
+
* - reachable, instance !== ours → `hijacked` (a DIFFERENT process owns loopback: another hub, or a foreign
|
|
214
|
+
* server answering `/health` with no/other instance — the OrbStack-shadow class).
|
|
215
|
+
*/
|
|
216
|
+
export function classifyLoopback(ourNonce: string, probe: LoopbackProbe): SelfProbeStatus {
|
|
217
|
+
if (!probe.reachable) return "unreachable";
|
|
218
|
+
if (probe.instance === ourNonce) return "ok";
|
|
219
|
+
return "hijacked";
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* The LOUD, structured hijack alert. Names the class + the exact diagnosis
|
|
224
|
+
* commands + the incident reference so an operator scanning logs can act
|
|
225
|
+
* immediately. Repeated verbatim on every probe while mismatched (by design —
|
|
226
|
+
* a single line scrolls away; a hijack is a standing emergency).
|
|
227
|
+
*/
|
|
228
|
+
export function hijackAlertMessage(port: number, observedInstance?: string): string {
|
|
229
|
+
const observed = observedInstance
|
|
230
|
+
? `a DIFFERENT hub (instance=${observedInstance})`
|
|
231
|
+
: "a foreign process (no hub instance nonce in its /health)";
|
|
232
|
+
return [
|
|
233
|
+
`parachute serve: LOOPBACK HIJACK on 127.0.0.1:${port} — this hub bound the port but loopback /health is answered by ${observed}.`,
|
|
234
|
+
" Loopback traffic (module JWKS/API calls, CLI probes) is NOT reaching this hub — every hub-JWT validation downstream will fail.",
|
|
235
|
+
` A specific 127.0.0.1:${port} bind (commonly an OrbStack/container port-forward) wins over this hub's wildcard bind.`,
|
|
236
|
+
` Diagnose: lsof -nP -iTCP:${port} -sTCP:LISTEN (expect ONE listener — this hub)`,
|
|
237
|
+
` orb list (stop/delete any VM auto-forwarding ${port}, e.g. a leftover smoke-test machine)`,
|
|
238
|
+
` Incident: ${HIJACK_INCIDENT_REF}`,
|
|
239
|
+
].join("\n");
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/** The softer "we're listening but loopback didn't answer" note (logged once per state change). */
|
|
243
|
+
export function unreachableNote(port: number): string {
|
|
244
|
+
return `parachute serve: loopback /health on 127.0.0.1:${port} did not answer, yet this hub is bound — transient, or another process is interfering with loopback. Watching (will re-probe).`;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
// Self-probe timer (armed by `serve` after the listener is up)
|
|
249
|
+
// ---------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
export interface HubSelfProbe {
|
|
252
|
+
/** Stop the interval. */
|
|
253
|
+
stop(): void;
|
|
254
|
+
/** Run exactly one probe now (used for the immediate startup check + tests). */
|
|
255
|
+
probeOnce(): Promise<SelfProbeStatus>;
|
|
256
|
+
/** The most recent in-memory verdict (tests). */
|
|
257
|
+
getState(): SelfProbeState | undefined;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export interface HubSelfProbeDeps<H = unknown> {
|
|
261
|
+
/** Poll cadence in ms. Default 300_000 (5 min) — a safety net, not a hot path. */
|
|
262
|
+
intervalMs?: number;
|
|
263
|
+
/** Loopback probe (default {@link probeLoopbackInstance}). */
|
|
264
|
+
probe?: (port: number) => Promise<LoopbackProbe>;
|
|
265
|
+
/** Persist the verdict (default: patch the instance file's `selfProbe`). */
|
|
266
|
+
writeState?: (state: SelfProbeState) => void;
|
|
267
|
+
/** Loud log sink (default `console.error`). */
|
|
268
|
+
log?: (line: string) => void;
|
|
269
|
+
/** Clock seam (default `() => new Date()`). */
|
|
270
|
+
now?: () => Date;
|
|
271
|
+
/** Injectable scheduler (default `setInterval`). Tests drive ticks manually. */
|
|
272
|
+
setIntervalFn?: (cb: () => void, ms: number) => H;
|
|
273
|
+
/** Injectable clear (default `clearInterval`). */
|
|
274
|
+
clearIntervalFn?: (handle: H) => void;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Arm the loopback self-probe. On each tick (and on the immediate startup
|
|
279
|
+
* `probeOnce`) it fetches loopback `/health`, compares the returned instance to
|
|
280
|
+
* OUR nonce, logs per the incident-severity rules, and persists the verdict:
|
|
281
|
+
*
|
|
282
|
+
* - `hijacked` → LOUD structured alert EVERY tick (standing emergency), verdict persisted.
|
|
283
|
+
* - `unreachable`→ softer note, logged ONLY on a state change (avoid a spinning log on a flaky loopback).
|
|
284
|
+
* - `ok` → recovery line logged once when clearing a prior non-ok verdict.
|
|
285
|
+
*
|
|
286
|
+
* The verdict is written to the instance file's `selfProbe` field so external
|
|
287
|
+
* tools (`status`) read the authoritative reading without re-probing the
|
|
288
|
+
* hijacked loopback. Overlapping ticks are guarded (a slow probe never stacks).
|
|
289
|
+
* The interval is `unref`'d so it never keeps the event loop alive on its own.
|
|
290
|
+
*/
|
|
291
|
+
export function armHubSelfProbe<H = ReturnType<typeof setInterval>>(
|
|
292
|
+
args: { port: number; nonce: string; record: HubInstanceRecord; configDir?: string },
|
|
293
|
+
deps: HubSelfProbeDeps<H> = {},
|
|
294
|
+
): HubSelfProbe {
|
|
295
|
+
const { port, nonce, record } = args;
|
|
296
|
+
const intervalMs = deps.intervalMs ?? 300_000;
|
|
297
|
+
const probe = deps.probe ?? probeLoopbackInstance;
|
|
298
|
+
const log = deps.log ?? ((line: string) => console.error(line));
|
|
299
|
+
const now = deps.now ?? (() => new Date());
|
|
300
|
+
const writeState =
|
|
301
|
+
deps.writeState ??
|
|
302
|
+
((state: SelfProbeState) =>
|
|
303
|
+
writeHubInstanceFile(
|
|
304
|
+
{ ...record, selfProbe: state },
|
|
305
|
+
{ ...(args.configDir !== undefined ? { configDir: args.configDir } : {}), log },
|
|
306
|
+
));
|
|
307
|
+
const setIntervalFn =
|
|
308
|
+
deps.setIntervalFn ?? ((cb: () => void, ms: number) => setInterval(cb, ms) as unknown as H);
|
|
309
|
+
const clearIntervalFn =
|
|
310
|
+
deps.clearIntervalFn ??
|
|
311
|
+
((h: H) => clearInterval(h as unknown as ReturnType<typeof setInterval>));
|
|
312
|
+
|
|
313
|
+
let last: SelfProbeState | undefined;
|
|
314
|
+
let inFlight = false;
|
|
315
|
+
|
|
316
|
+
async function probeOnce(): Promise<SelfProbeStatus> {
|
|
317
|
+
if (inFlight) return last?.status ?? "ok";
|
|
318
|
+
inFlight = true;
|
|
319
|
+
try {
|
|
320
|
+
const result = await probe(port);
|
|
321
|
+
const status = classifyLoopback(nonce, result);
|
|
322
|
+
const state: SelfProbeState = { status, checkedAt: now().toISOString() };
|
|
323
|
+
if (status === "hijacked") {
|
|
324
|
+
if (result.instance !== undefined) state.observedInstance = result.instance;
|
|
325
|
+
state.detail = hijackAlertMessage(port, result.instance);
|
|
326
|
+
// LOUD every tick — a hijack is a standing emergency, not a one-shot notice.
|
|
327
|
+
log(state.detail);
|
|
328
|
+
} else if (status === "unreachable") {
|
|
329
|
+
state.detail = unreachableNote(port);
|
|
330
|
+
if (last?.status !== "unreachable") log(state.detail);
|
|
331
|
+
} else {
|
|
332
|
+
// ok — announce recovery once when clearing a prior non-ok verdict.
|
|
333
|
+
if (last && last.status !== "ok") {
|
|
334
|
+
log(
|
|
335
|
+
`parachute serve: loopback /health on 127.0.0.1:${port} is back to this hub (instance=${nonce}). Hijack cleared.`,
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
last = state;
|
|
340
|
+
try {
|
|
341
|
+
writeState(state);
|
|
342
|
+
} catch {
|
|
343
|
+
// Persisting the verdict is best-effort; the loud log already fired.
|
|
344
|
+
}
|
|
345
|
+
return status;
|
|
346
|
+
} finally {
|
|
347
|
+
inFlight = false;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const handle = setIntervalFn(() => {
|
|
352
|
+
void probeOnce();
|
|
353
|
+
}, intervalMs);
|
|
354
|
+
(handle as { unref?: () => void }).unref?.();
|
|
355
|
+
|
|
356
|
+
return {
|
|
357
|
+
stop() {
|
|
358
|
+
clearIntervalFn(handle);
|
|
359
|
+
},
|
|
360
|
+
probeOnce,
|
|
361
|
+
getState() {
|
|
362
|
+
return last;
|
|
363
|
+
},
|
|
364
|
+
};
|
|
365
|
+
}
|
package/src/hub-server.ts
CHANGED
|
@@ -782,6 +782,52 @@ export function wsCapBucketKey(req: Request, peerAddr: string | null): string {
|
|
|
782
782
|
return peer ?? WS_CAP_SHARED_BUCKET;
|
|
783
783
|
}
|
|
784
784
|
|
|
785
|
+
/**
|
|
786
|
+
* Hop-by-hop headers (RFC 9110 §7.6.1) — connection-scoped, meaningful only
|
|
787
|
+
* on a single transport hop. An intermediary MUST NOT forward them, and the
|
|
788
|
+
* hub is exactly such an intermediary between the client and each loopback
|
|
789
|
+
* module.
|
|
790
|
+
*
|
|
791
|
+
* Forwarding a client's `Connection: close` verbatim was the P0 amplifier in
|
|
792
|
+
* the 2026-07-02 port exhaustion (hub#738): Bun's fetch honors the forwarded
|
|
793
|
+
* `Connection: close` and opens a FRESH ephemeral socket per proxied request
|
|
794
|
+
* instead of reusing its per-origin keep-alive pool. A hot client loop then
|
|
795
|
+
* converts request volume 1:1 into 30s TIME_WAIT entries (macOS: ~16k
|
|
796
|
+
* ephemeral ports), exhausting the range and taking out all host outbound.
|
|
797
|
+
* With these stripped, Bun reuses a handful of pooled sockets to each 127.0.0.1
|
|
798
|
+
* upstream regardless of what the client sends.
|
|
799
|
+
*/
|
|
800
|
+
const HOP_BY_HOP_HEADERS = [
|
|
801
|
+
"connection",
|
|
802
|
+
"keep-alive",
|
|
803
|
+
"proxy-authenticate",
|
|
804
|
+
"proxy-authorization",
|
|
805
|
+
"te",
|
|
806
|
+
"trailer",
|
|
807
|
+
"transfer-encoding",
|
|
808
|
+
"upgrade",
|
|
809
|
+
] as const;
|
|
810
|
+
|
|
811
|
+
/**
|
|
812
|
+
* Delete hop-by-hop headers from an outgoing proxy header bag (mutates in
|
|
813
|
+
* place). The `Connection` field-value can NAME further headers to drop
|
|
814
|
+
* (RFC 9110 §7.6.1 — e.g. `Connection: close, X-Custom`), so those tokens are
|
|
815
|
+
* collected and deleted before the standard set. WebSocket upgrades never
|
|
816
|
+
* reach the fetch-based proxy (the Bun-native bridge handles them before
|
|
817
|
+
* dispatch — see `proxyRequest`'s docstring), so dropping `Upgrade`/`Connection`
|
|
818
|
+
* here only ever touches non-declaring mounts, which see a plain request.
|
|
819
|
+
*/
|
|
820
|
+
export function stripHopByHopHeaders(headers: Headers): void {
|
|
821
|
+
const connectionValue = headers.get("connection");
|
|
822
|
+
if (connectionValue) {
|
|
823
|
+
for (const token of connectionValue.split(",")) {
|
|
824
|
+
const named = token.trim().toLowerCase();
|
|
825
|
+
if (named) headers.delete(named);
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
for (const name of HOP_BY_HOP_HEADERS) headers.delete(name);
|
|
829
|
+
}
|
|
830
|
+
|
|
785
831
|
/**
|
|
786
832
|
* Forward a request to a loopback service on `127.0.0.1:<port>`. By default
|
|
787
833
|
* the incoming pathname + query are preserved verbatim; pass `targetPath` to
|
|
@@ -847,6 +893,12 @@ async function proxyRequest(
|
|
|
847
893
|
// Host comes from the requester (tailnet FQDN); the loopback target wants
|
|
848
894
|
// its own. Bun's fetch fills it in when omitted.
|
|
849
895
|
headers.delete("host");
|
|
896
|
+
// Strip hop-by-hop headers before forwarding (RFC 9110 §7.6.1). Critically
|
|
897
|
+
// this drops a client-supplied `Connection: close`, which Bun's fetch would
|
|
898
|
+
// otherwise honor by disabling keep-alive and burning a fresh ephemeral
|
|
899
|
+
// socket per request — the P0 amplifier in hub#738. Bun refills the
|
|
900
|
+
// connection framing for the upstream hop itself.
|
|
901
|
+
stripHopByHopHeaders(headers);
|
|
850
902
|
// Force upstreams to reply with uncompressed bodies. The chrome-strip
|
|
851
903
|
// injector (workstream G) buffers + TextDecoders the HTML response to
|
|
852
904
|
// inject the persistent chrome; without this, a gzip- or br-compressed
|
|
@@ -887,6 +939,13 @@ async function proxyRequest(
|
|
|
887
939
|
method: req.method,
|
|
888
940
|
headers,
|
|
889
941
|
redirect: "manual",
|
|
942
|
+
// Forward the incoming request's abort signal to the upstream hop. When a
|
|
943
|
+
// client hangs up mid-response, this aborts the loopback fetch so the
|
|
944
|
+
// upstream stops streaming to a gone client and its socket is released
|
|
945
|
+
// back to the pool (or closed) instead of running the full body out and
|
|
946
|
+
// holding the connection — a secondary socket-retention leak alongside the
|
|
947
|
+
// TIME_WAIT churn (hub#738). `req.signal` is present on Bun.serve requests.
|
|
948
|
+
signal: req.signal,
|
|
890
949
|
};
|
|
891
950
|
if (req.method !== "GET" && req.method !== "HEAD") {
|
|
892
951
|
init.body = req.body;
|
|
@@ -895,6 +954,14 @@ async function proxyRequest(
|
|
|
895
954
|
try {
|
|
896
955
|
return await fetch(upstream, init);
|
|
897
956
|
} catch (err) {
|
|
957
|
+
// Client hung up mid-flight: the upstream fetch was aborted via req.signal
|
|
958
|
+
// (forwarded above), not an upstream failure. The client is gone, so the
|
|
959
|
+
// response is discarded — don't run the boot-readiness classifier or
|
|
960
|
+
// render a "module unreachable" page that would misclassify a normal
|
|
961
|
+
// disconnect. 499 = client closed request (nginx convention).
|
|
962
|
+
if (req.signal?.aborted) {
|
|
963
|
+
return new Response(null, { status: 499 });
|
|
964
|
+
}
|
|
898
965
|
const msg = err instanceof Error ? err.message : String(err);
|
|
899
966
|
// Classify the failure (transient boot-window vs persistent crash) and
|
|
900
967
|
// render either an HTML page or a JSON error per the request's Accept.
|
|
@@ -1261,6 +1328,17 @@ export interface HubFetchDeps {
|
|
|
1261
1328
|
* rejected" because Origin ≠ tailnet issuer).
|
|
1262
1329
|
*/
|
|
1263
1330
|
loopbackPort?: number;
|
|
1331
|
+
/**
|
|
1332
|
+
* This serve process's per-boot instance nonce (hub#737). When present it's
|
|
1333
|
+
* echoed as `instance` in `/health` so an external reader can tell whether a
|
|
1334
|
+
* loopback `/health` actually reached THIS hub or a foreign process that has
|
|
1335
|
+
* shadowed the port (the OrbStack loopback-hijack class). `serve` mints it,
|
|
1336
|
+
* writes it to `~/.parachute/hub-instance.json`, and threads it here; absent
|
|
1337
|
+
* on the DB-less / test / `bun src/hub-server.ts` paths, where `/health`
|
|
1338
|
+
* simply omits the field (additive — no consumer parses `/health` for
|
|
1339
|
+
* `instance` strictly).
|
|
1340
|
+
*/
|
|
1341
|
+
instanceNonce?: string;
|
|
1264
1342
|
/**
|
|
1265
1343
|
* Test seam for reading `expose-state.json`'s `hubOrigin`. Production reads
|
|
1266
1344
|
* the operator's `~/.parachute/expose-state.json` via `readExposeState`;
|
|
@@ -2314,7 +2392,17 @@ export function hubFetch(
|
|
|
2314
2392
|
}
|
|
2315
2393
|
}
|
|
2316
2394
|
return new Response(
|
|
2317
|
-
JSON.stringify({
|
|
2395
|
+
JSON.stringify({
|
|
2396
|
+
status: "ok",
|
|
2397
|
+
service: "parachute-hub",
|
|
2398
|
+
version: pkg.version,
|
|
2399
|
+
db,
|
|
2400
|
+
// Per-boot instance nonce (hub#737): lets an external reader detect a
|
|
2401
|
+
// loopback hijack (foreign process shadowing 127.0.0.1:<port>) by
|
|
2402
|
+
// comparing this to the nonce serve wrote to hub-instance.json.
|
|
2403
|
+
// Omitted when unset (DB-less / test / dev-entrypoint paths).
|
|
2404
|
+
...(deps?.instanceNonce ? { instance: deps.instanceNonce } : {}),
|
|
2405
|
+
}),
|
|
2318
2406
|
{
|
|
2319
2407
|
headers: {
|
|
2320
2408
|
"content-type": "application/json",
|
package/src/install-source.ts
CHANGED
|
@@ -123,7 +123,7 @@ function packageNameFor(entryName: string): string | undefined {
|
|
|
123
123
|
if (short === undefined) return undefined;
|
|
124
124
|
const fb = FIRST_PARTY_FALLBACKS[short];
|
|
125
125
|
if (fb) return fb.package;
|
|
126
|
-
// KNOWN_MODULES (vault / scribe /
|
|
126
|
+
// KNOWN_MODULES (vault / scribe / agent / surface — post hub#310 FALLBACK
|
|
127
127
|
// retirement) carries the package name without an embedded manifest.
|
|
128
128
|
return KNOWN_MODULES[short]?.package;
|
|
129
129
|
}
|
package/src/service-spec.ts
CHANGED
|
@@ -322,7 +322,7 @@ const NOTES_FALLBACK: FirstPartyFallback = {
|
|
|
322
322
|
* Indexed by short name (the `parachute install <X>` token).
|
|
323
323
|
*
|
|
324
324
|
* Only notes remains — see the block comment above for the rationale
|
|
325
|
-
* (vault/scribe/
|
|
325
|
+
* (vault/scribe/agent now self-register and ship their own
|
|
326
326
|
* module.json). Other code paths consult both this table AND `KNOWN_MODULES`
|
|
327
327
|
* (which carries the post-self-register-retirement entries) via the helpers
|
|
328
328
|
* in this file (`shortNameForManifest`, `knownServices`, …).
|
|
@@ -437,27 +437,17 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
|
|
|
437
437
|
],
|
|
438
438
|
},
|
|
439
439
|
},
|
|
440
|
-
runner:
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
extras: {
|
|
452
|
-
// Backward-compat startCmd — same rationale as scribe / vault above.
|
|
453
|
-
startCmd: () => ["parachute-runner", "serve"],
|
|
454
|
-
// Runner's HTTP routes (everything past `/healthz`) gate on a
|
|
455
|
-
// hub-issued JWT carrying `runner:admin` scope (see runner's
|
|
456
|
-
// `src/auth.ts`). Surfaces in `parachute status` as auth-required by
|
|
457
|
-
// default, same posture as vault.
|
|
458
|
-
hasAuth: true,
|
|
459
|
-
},
|
|
460
|
-
},
|
|
440
|
+
// NOTE (2026-07-01): `runner` was REMOVED from this registry (decision:
|
|
441
|
+
// Aaron 2026-07-01 — the module set of record is vault / hub / agent /
|
|
442
|
+
// scribe / surface). Runner is no longer offered, installable, or
|
|
443
|
+
// lifecycle-addressable by short name from the hub's bootstrap registries.
|
|
444
|
+
// Existing installs stay GRACEFUL: a legacy `parachute-runner` services.json
|
|
445
|
+
// row is handled exactly like any unknown/third-party row — `parachute
|
|
446
|
+
// status` renders it (short falls back to the row name), `parachute serve`
|
|
447
|
+
// boots it via `<installDir>/.parachute/module.json` when installDir is
|
|
448
|
+
// stamped and logs-and-skips otherwise. Deliberately NOT added to
|
|
449
|
+
// RETIRED_MODULES: that registry GC-drops rows on load, which would break
|
|
450
|
+
// routing for operators still running the runner daemon.
|
|
461
451
|
agent: {
|
|
462
452
|
short: "agent",
|
|
463
453
|
package: "@openparachute/agent",
|
|
@@ -474,7 +464,7 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
|
|
|
474
464
|
canonicalStripPrefix: true,
|
|
475
465
|
extras: {
|
|
476
466
|
// Backward-compat startCmd for rows without installDir — same rationale
|
|
477
|
-
// as scribe / vault
|
|
467
|
+
// as scribe / vault. The bare binary IS the daemon (agent's
|
|
478
468
|
// package.json bin maps `parachute-agent` → src/daemon.ts).
|
|
479
469
|
startCmd: () => ["parachute-agent"],
|
|
480
470
|
// Agent gates its endpoints behind hub-issued JWTs (agent:* scopes).
|
|
@@ -498,14 +488,14 @@ export const KNOWN_MODULES: Record<string, KnownModule> = {
|
|
|
498
488
|
canonicalHealth: "/surface/healthz",
|
|
499
489
|
canonicalStripPrefix: false,
|
|
500
490
|
extras: {
|
|
501
|
-
// Backward-compat startCmd — same rationale as scribe / vault
|
|
491
|
+
// Backward-compat startCmd — same rationale as scribe / vault
|
|
502
492
|
// above. Post-self-register, lifecycle reads module.json's startCmd via
|
|
503
493
|
// `composeKnownModuleSpec` and that path wins.
|
|
504
494
|
startCmd: () => ["parachute-surface", "serve"],
|
|
505
495
|
// Surface's admin + per-UI surfaces gate behind hub-issued JWTs (design
|
|
506
496
|
// doc §6 same-hub auto-trust + scope `surface:admin`). Surfaces in
|
|
507
|
-
// `parachute status` as auth-required by default, same posture as
|
|
508
|
-
//
|
|
497
|
+
// `parachute status` as auth-required by default, same posture as
|
|
498
|
+
// vault.
|
|
509
499
|
hasAuth: true,
|
|
510
500
|
},
|
|
511
501
|
},
|
|
@@ -646,10 +636,11 @@ export function knownServices(): string[] {
|
|
|
646
636
|
* - `experimental` — agent (legit preview; still OFFERED on a fresh install)
|
|
647
637
|
* + any unlisted third-party short.
|
|
648
638
|
* - `deprecated` — notes (notes-daemon deprecated 2026-05-22; notes-ui moved
|
|
649
|
-
* into parachute-surface)
|
|
650
|
-
*
|
|
651
|
-
*
|
|
652
|
-
*
|
|
639
|
+
* into parachute-surface). Still RESOLVABLE (discoverableShorts unchanged)
|
|
640
|
+
* and SHOWN-IF-INSTALLED so an existing operator can manage/uninstall, but
|
|
641
|
+
* NOT OFFERED on a fresh setup. `runner` used to sit here too (deprecated
|
|
642
|
+
* 2026-06-25) until its full registry removal on 2026-07-01 — see the note
|
|
643
|
+
* in KNOWN_MODULES.
|
|
653
644
|
*
|
|
654
645
|
* **Show all installed; never hide** — `focus` groups + labels; the one
|
|
655
646
|
* behavioral lever is the fresh-install OFFER, which drops `deprecated` shorts.
|
|
@@ -660,7 +651,6 @@ const FOCUS_DEFAULTS: Record<string, ModuleFocus> = {
|
|
|
660
651
|
hub: "core",
|
|
661
652
|
surface: "core",
|
|
662
653
|
agent: "experimental",
|
|
663
|
-
runner: "deprecated",
|
|
664
654
|
notes: "deprecated",
|
|
665
655
|
};
|
|
666
656
|
|
|
@@ -671,7 +661,7 @@ const FOCUS_DEFAULTS: Record<string, ModuleFocus> = {
|
|
|
671
661
|
* returns undefined — the Modules screen always has a tier to group by.
|
|
672
662
|
*
|
|
673
663
|
* Tier semantics: `core`/`experimental` are both OFFERED on a fresh install;
|
|
674
|
-
* `deprecated` (notes
|
|
664
|
+
* `deprecated` (notes) is NOT offered on a fresh setup but stays
|
|
675
665
|
* resolvable + shown-if-installed (the `isKnownModuleShort` /
|
|
676
666
|
* `discoverableShorts` resolution surface is unchanged). The fresh-install
|
|
677
667
|
* filters in `setup.ts` + `api-modules.ts` consult this tier to drop
|
|
@@ -689,14 +679,16 @@ export function focusForShort(short: string, declared?: ModuleFocus): ModuleFocu
|
|
|
689
679
|
* `CURATED_MODULES` whitelist (2026-06-09 modular-UI architecture, P2): every
|
|
690
680
|
* module the hub can resolve a package/manifest for is discoverable + installable,
|
|
691
681
|
* regardless of `focus` tier. Deduped, with FIRST_PARTY_FALLBACKS shorts first
|
|
692
|
-
* (notes) then KNOWN_MODULES (vault / scribe /
|
|
693
|
-
*
|
|
694
|
-
* `notes`
|
|
695
|
-
*
|
|
696
|
-
*
|
|
697
|
-
*
|
|
698
|
-
*
|
|
699
|
-
*
|
|
682
|
+
* (notes) then KNOWN_MODULES (vault / scribe / agent / surface).
|
|
683
|
+
*
|
|
684
|
+
* `notes` is intentionally included — still resolvable (vendored fallback)
|
|
685
|
+
* for legacy installs; it surfaces as `deprecated` (2026-06-25) and isn't
|
|
686
|
+
* OFFERED on a fresh install. The fresh-install OFFER (setup wizard + admin
|
|
687
|
+
* SPA) filters by tier (`focus !== "deprecated"`); `discoverableShorts`
|
|
688
|
+
* itself stays the full resolution surface so existing installs keep working.
|
|
689
|
+
* `runner` is NOT here anymore (registry removal 2026-07-01 — see the
|
|
690
|
+
* KNOWN_MODULES note); a legacy runner install is handled as an
|
|
691
|
+
* unknown/third-party row.
|
|
700
692
|
*/
|
|
701
693
|
export function discoverableShorts(): string[] {
|
|
702
694
|
const seen = new Set<string>();
|
|
@@ -750,7 +742,7 @@ export function canonicalPortForManifest(manifestName: string): number | undefin
|
|
|
750
742
|
* spec with embedded manifest + extras — the vendored manifest is the
|
|
751
743
|
* source of truth pre-install and the install path preserves it through.
|
|
752
744
|
*
|
|
753
|
-
* KNOWN_MODULES shorts (vault / scribe /
|
|
745
|
+
* KNOWN_MODULES shorts (vault / scribe / agent / surface — post
|
|
754
746
|
* FALLBACK retirement) return a **minimal** spec carrying `package`, `manifestName`,
|
|
755
747
|
* and the imperative `extras` fields
|
|
756
748
|
* (`init`, `hasAuth`, `urlForEntry`, `postInstallFooter`). They do NOT carry
|
|
@@ -860,7 +852,7 @@ const LEGACY_MANIFEST_ALIASES: Record<string, string> = {
|
|
|
860
852
|
|
|
861
853
|
/** Short name for a given manifest name, e.g. `parachute-vault` → `vault`.
|
|
862
854
|
* Consults both FIRST_PARTY_FALLBACKS (notes) and KNOWN_MODULES
|
|
863
|
-
* (vault / scribe /
|
|
855
|
+
* (vault / scribe / agent / surface — post-FALLBACK-retirement).
|
|
864
856
|
* Returns undefined for unknown manifests. */
|
|
865
857
|
export function shortNameForManifest(manifestName: string): string | undefined {
|
|
866
858
|
for (const [short, fb] of Object.entries(FIRST_PARTY_FALLBACKS)) {
|
|
@@ -887,7 +879,7 @@ export function shortNameForManifest(manifestName: string): string | undefined {
|
|
|
887
879
|
* here — `shortNameForManifest` only knows the canonical `parachute-vault`, so
|
|
888
880
|
* `findServiceByShort(services, "vault")` returns undefined even when a vault is
|
|
889
881
|
* installed. Vault rows are resolved by mount path via `findVaultUpstream`; this
|
|
890
|
-
* helper is for single-instance modules (agent / scribe /
|
|
882
|
+
* helper is for single-instance modules (agent / scribe / surface).
|
|
891
883
|
*/
|
|
892
884
|
export function findServiceByShort<T extends { name: string }>(
|
|
893
885
|
services: readonly T[],
|
|
@@ -901,7 +893,7 @@ export function findServiceByShort<T extends { name: string }>(
|
|
|
901
893
|
* manifest data the caller has on hand (typically read from
|
|
902
894
|
* `<installDir>/.parachute/module.json`).
|
|
903
895
|
*
|
|
904
|
-
* Used at install-time and lifecycle-time for vault / scribe /
|
|
896
|
+
* Used at install-time and lifecycle-time for vault / scribe / surface —
|
|
905
897
|
* where hub no longer vendors the manifest (services.json + module.json
|
|
906
898
|
* are authoritative) but still needs the imperative `extras` bits
|
|
907
899
|
* (`init`, `postInstallFooter`, `urlForEntry`, `hasAuth`) the CLI install
|