@openparachute/hub 0.6.1 → 0.6.3-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/account-home-ui.test.ts +34 -0
- package/src/__tests__/api-modules-ops.test.ts +359 -3
- package/src/__tests__/api-modules.test.ts +54 -0
- package/src/__tests__/cloudflare-connector-service.test.ts +441 -0
- package/src/__tests__/expose-cloudflare.test.ts +272 -0
- package/src/__tests__/hub-unit.test.ts +574 -0
- package/src/__tests__/init.test.ts +219 -2
- package/src/__tests__/lifecycle.test.ts +423 -0
- package/src/__tests__/managed-unit.test.ts +575 -0
- package/src/__tests__/module-ops-client.test.ts +556 -0
- package/src/__tests__/port-probe.test.ts +23 -0
- package/src/__tests__/setup-wizard.test.ts +130 -0
- package/src/__tests__/status-supervisor.test.ts +569 -0
- package/src/__tests__/supervisor.test.ts +471 -6
- package/src/account-home-ui.ts +4 -1
- package/src/api-modules-ops.ts +221 -0
- package/src/api-modules.ts +18 -2
- package/src/cli.ts +14 -4
- package/src/cloudflare/connector-service.ts +273 -0
- package/src/cloudflare/state.ts +13 -1
- package/src/commands/expose-cloudflare.ts +143 -10
- package/src/commands/init.ts +225 -12
- package/src/commands/lifecycle.ts +366 -38
- package/src/commands/serve-boot.ts +71 -25
- package/src/commands/status.ts +596 -49
- package/src/hub-server.ts +11 -0
- package/src/hub-unit.ts +735 -0
- package/src/managed-unit.ts +674 -0
- package/src/module-ops-client.ts +457 -0
- package/src/port-probe.ts +50 -0
- package/src/setup-wizard.ts +80 -1
- package/src/supervisor.ts +360 -14
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI client for the module-ops HTTP API (`POST /api/modules/:short/<op>`).
|
|
3
|
+
*
|
|
4
|
+
* This is the credential + transport seam that Phase 3 of the
|
|
5
|
+
* hub-as-supervisor unification (design 2026-06-01 §3.1) will repoint
|
|
6
|
+
* `parachute start/stop/restart <svc>` onto: instead of touching pidfiles
|
|
7
|
+
* directly (`commands/lifecycle.ts`), those verbs become authenticated calls
|
|
8
|
+
* to the running hub's in-process Supervisor over loopback.
|
|
9
|
+
*
|
|
10
|
+
* **Phase 1 is additive.** This file ADDS the client + its tests; it does NOT
|
|
11
|
+
* repoint any existing CLI command. `parachute start/stop/restart/install/
|
|
12
|
+
* upgrade` stay on the detached `lifecycle.ts` path until the Phase 3 cutover.
|
|
13
|
+
*
|
|
14
|
+
* ## The credential (§3.1)
|
|
15
|
+
*
|
|
16
|
+
* The on-box caller's proof of operator authority is `~/.parachute/operator.token`
|
|
17
|
+
* — a hub-issued JWT carrying `parachute:host:admin` under the default `admin`
|
|
18
|
+
* scope-set, which is exactly the scope `api-modules-ops.ts` gates on. We READ
|
|
19
|
+
* it via `useOperatorTokenWithAutoRotate` (which validates against the hub DB +
|
|
20
|
+
* issuer and opportunistically re-mints a within-7d-of-expiry token in place);
|
|
21
|
+
* we never mint a parallel token, so there is no second SQLite writer racing
|
|
22
|
+
* the running hub. The token is presented as `Authorization: Bearer` to the
|
|
23
|
+
* loopback hub.
|
|
24
|
+
*
|
|
25
|
+
* No operator token on disk → an actionable error ("no operator token — run
|
|
26
|
+
* `parachute auth rotate-operator`"), never a raw 401.
|
|
27
|
+
*
|
|
28
|
+
* ## Sync vs async ops
|
|
29
|
+
*
|
|
30
|
+
* `start` / `stop` / `restart` / `uninstall` are synchronous: the handler does
|
|
31
|
+
* the work inline and returns the new state in the body. `install` / `upgrade`
|
|
32
|
+
* return `202 { operation_id }` and the client polls
|
|
33
|
+
* `GET /api/modules/operations/:id` to a terminal state. This client handles
|
|
34
|
+
* both: a 202-with-operation_id response is polled to completion; any other
|
|
35
|
+
* 2xx body is returned as-is.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import type { Database } from "bun:sqlite";
|
|
39
|
+
import { OperatorTokenExpiredError, useOperatorTokenWithAutoRotate } from "./operator-token.ts";
|
|
40
|
+
|
|
41
|
+
/** Loopback hub base URL when none is injected. The hub pins 1939 (canonical-ports). */
|
|
42
|
+
export const DEFAULT_HUB_BASE_URL = "http://127.0.0.1:1939";
|
|
43
|
+
|
|
44
|
+
/** Default poll interval + ceiling for async operations. */
|
|
45
|
+
const DEFAULT_POLL_INTERVAL_MS = 1_000;
|
|
46
|
+
const DEFAULT_POLL_TIMEOUT_MS = 120_000;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Default ceiling for the single `GET /api/modules` read in {@link fetchModuleStates}.
|
|
50
|
+
* `status` is a read-only health snapshot — a hub whose request handler is wedged
|
|
51
|
+
* (accepts the TCP connection but never answers) must not hang it. Bounded so the
|
|
52
|
+
* caller degrades to a "couldn't read live module state" note instead of stalling.
|
|
53
|
+
*/
|
|
54
|
+
const DEFAULT_STATES_FETCH_TIMEOUT_MS = 3_000;
|
|
55
|
+
|
|
56
|
+
/** Module-op verbs the client can drive against `POST /api/modules/:short/<op>`. */
|
|
57
|
+
export type ModuleOp = "start" | "stop" | "restart" | "install" | "upgrade" | "uninstall";
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Thrown when no `operator.token` exists on disk. The CLI surfaces
|
|
61
|
+
* `.message` directly — it's already actionable. Distinct error class so a
|
|
62
|
+
* caller can branch on "needs bootstrap" vs "hub said no."
|
|
63
|
+
*/
|
|
64
|
+
export class NoOperatorTokenError extends Error {
|
|
65
|
+
override name = "NoOperatorTokenError";
|
|
66
|
+
constructor() {
|
|
67
|
+
super(
|
|
68
|
+
"no operator token — run `parachute auth rotate-operator` to mint one (looked for ~/.parachute/operator.token)",
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Thrown when the hub answers a module-op with a non-2xx status. Carries the
|
|
75
|
+
* HTTP status + the parsed `{ error, error_description }` body so the CLI can
|
|
76
|
+
* render the hub's own message (e.g. `not_installed`, `insufficient_scope`).
|
|
77
|
+
*/
|
|
78
|
+
export class ModuleOpHttpError extends Error {
|
|
79
|
+
override name = "ModuleOpHttpError";
|
|
80
|
+
readonly status: number;
|
|
81
|
+
readonly code: string;
|
|
82
|
+
constructor(status: number, code: string, description: string) {
|
|
83
|
+
super(`${code}: ${description}`);
|
|
84
|
+
this.status = status;
|
|
85
|
+
this.code = code;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Thrown when an async operation reaches `failed`, or polling times out. */
|
|
90
|
+
export class ModuleOpFailedError extends Error {
|
|
91
|
+
override name = "ModuleOpFailedError";
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Terminal shape returned to the caller — the hub's response body. */
|
|
95
|
+
export interface ModuleOpResult {
|
|
96
|
+
/** HTTP status of the initiating POST (200 sync, 202 async). */
|
|
97
|
+
readonly status: number;
|
|
98
|
+
/** Parsed JSON body. For async ops, the terminal operation record. */
|
|
99
|
+
readonly body: unknown;
|
|
100
|
+
/** Operation id when the op was async (202); undefined for sync ops. */
|
|
101
|
+
readonly operationId?: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface DriveModuleOpDeps {
|
|
105
|
+
/** Open hub DB handle — used to validate / auto-rotate the operator token. */
|
|
106
|
+
readonly db: Database;
|
|
107
|
+
/** Hub issuer (origin) the operator token's `iss` is validated against. */
|
|
108
|
+
readonly issuer: string;
|
|
109
|
+
/** Loopback hub base URL. Defaults to {@link DEFAULT_HUB_BASE_URL}. */
|
|
110
|
+
readonly baseUrl?: string;
|
|
111
|
+
/** configDir override (where operator.token lives). Defaults to `configDir()`. */
|
|
112
|
+
readonly configDir?: string;
|
|
113
|
+
/** Optional JSON body for the POST (e.g. `{ channel }` on install/upgrade). */
|
|
114
|
+
readonly body?: unknown;
|
|
115
|
+
/**
|
|
116
|
+
* fetch seam. Production passes the global `fetch`; tests inject a fake that
|
|
117
|
+
* asserts the Authorization header + returns canned responses without a
|
|
118
|
+
* real socket.
|
|
119
|
+
*/
|
|
120
|
+
readonly fetch?: typeof fetch;
|
|
121
|
+
/** Clock seam for the operator-token rotation check. */
|
|
122
|
+
readonly now?: () => Date;
|
|
123
|
+
/** Sleep seam for the async-op poll loop. Tests stub to advance instantly. */
|
|
124
|
+
readonly sleep?: (ms: number) => Promise<void>;
|
|
125
|
+
/** Poll interval for async ops, ms. Default 1000. */
|
|
126
|
+
readonly pollIntervalMs?: number;
|
|
127
|
+
/** Poll ceiling for async ops, ms. Default 120000. */
|
|
128
|
+
readonly pollTimeoutMs?: number;
|
|
129
|
+
/**
|
|
130
|
+
* Per-request ceiling for the {@link fetchModuleStates} `GET /api/modules` read,
|
|
131
|
+
* ms. Default {@link DEFAULT_STATES_FETCH_TIMEOUT_MS} (3000). Bounds `status`
|
|
132
|
+
* against a wedged hub handler; tests inject a short value to exercise the
|
|
133
|
+
* timeout-degrade path without a real wall-clock wait.
|
|
134
|
+
*/
|
|
135
|
+
readonly statesFetchTimeoutMs?: number;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Read the operator token (auto-rotating if near expiry) and return the bearer
|
|
140
|
+
* to present onward. Throws {@link NoOperatorTokenError} when none is on disk,
|
|
141
|
+
* and re-throws {@link OperatorTokenExpiredError} unchanged (its message is
|
|
142
|
+
* already the actionable "run rotate-operator" shape).
|
|
143
|
+
*/
|
|
144
|
+
export async function resolveOperatorBearer(deps: DriveModuleOpDeps): Promise<string> {
|
|
145
|
+
const used = await useOperatorTokenWithAutoRotate(deps.db, {
|
|
146
|
+
issuer: deps.issuer,
|
|
147
|
+
...(deps.configDir !== undefined ? { configDir: deps.configDir } : {}),
|
|
148
|
+
...(deps.now !== undefined ? { now: deps.now } : {}),
|
|
149
|
+
});
|
|
150
|
+
if (!used) throw new NoOperatorTokenError();
|
|
151
|
+
return used.token;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Drive a module-op end-to-end: read the operator token, POST it as Bearer to
|
|
156
|
+
* the loopback hub, and (for async ops that return 202 + operation_id) poll
|
|
157
|
+
* `GET /api/modules/operations/:id` to a terminal state.
|
|
158
|
+
*
|
|
159
|
+
* Throws:
|
|
160
|
+
* - {@link NoOperatorTokenError} — no operator.token on disk.
|
|
161
|
+
* - {@link OperatorTokenExpiredError} — token fully expired (actionable msg).
|
|
162
|
+
* - {@link ModuleOpHttpError} — hub answered non-2xx (carries status + code).
|
|
163
|
+
* - {@link ModuleOpFailedError} — async op reached `failed`, or poll timed out.
|
|
164
|
+
*/
|
|
165
|
+
export async function driveModuleOp(
|
|
166
|
+
short: string,
|
|
167
|
+
op: ModuleOp,
|
|
168
|
+
deps: DriveModuleOpDeps,
|
|
169
|
+
): Promise<ModuleOpResult> {
|
|
170
|
+
const doFetch = deps.fetch ?? fetch;
|
|
171
|
+
const baseUrl = (deps.baseUrl ?? DEFAULT_HUB_BASE_URL).replace(/\/+$/, "");
|
|
172
|
+
|
|
173
|
+
const bearer = await resolveOperatorBearer(deps);
|
|
174
|
+
|
|
175
|
+
const headers: Record<string, string> = { authorization: `Bearer ${bearer}` };
|
|
176
|
+
const init: RequestInit = { method: "POST", headers };
|
|
177
|
+
if (deps.body !== undefined) {
|
|
178
|
+
headers["content-type"] = "application/json";
|
|
179
|
+
init.body = JSON.stringify(deps.body);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const res = await doFetch(`${baseUrl}/api/modules/${short}/${op}`, init);
|
|
183
|
+
const body = await parseJsonSafe(res);
|
|
184
|
+
|
|
185
|
+
if (res.status < 200 || res.status >= 300) {
|
|
186
|
+
const { error, error_description } = asErrorBody(body);
|
|
187
|
+
throw new ModuleOpHttpError(res.status, error, error_description);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Async op (install / upgrade): 202 + { operation_id } → poll to terminal.
|
|
191
|
+
if (res.status === 202) {
|
|
192
|
+
const operationId = extractOperationId(body);
|
|
193
|
+
if (!operationId) {
|
|
194
|
+
// 202 means "accepted, poll for completion" — but with no operation_id
|
|
195
|
+
// there's nothing to poll. Silently returning the 202 would strand the
|
|
196
|
+
// caller on an incomplete op; surface it as a hard failure instead.
|
|
197
|
+
throw new ModuleOpFailedError("hub returned 202 but no operation_id in body");
|
|
198
|
+
}
|
|
199
|
+
const terminal = await pollOperation(operationId, bearer, baseUrl, doFetch, deps);
|
|
200
|
+
return { status: res.status, body: terminal, operationId };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Sync op (start / stop / restart / uninstall) — body is the final state.
|
|
204
|
+
return { status: res.status, body };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Poll `GET /api/modules/operations/:id` until `succeeded` / `failed` or the
|
|
209
|
+
* timeout elapses. Returns the terminal operation record on success; throws
|
|
210
|
+
* {@link ModuleOpFailedError} on `failed` or timeout, {@link ModuleOpHttpError}
|
|
211
|
+
* on a non-2xx poll response.
|
|
212
|
+
*/
|
|
213
|
+
async function pollOperation(
|
|
214
|
+
operationId: string,
|
|
215
|
+
bearer: string,
|
|
216
|
+
baseUrl: string,
|
|
217
|
+
doFetch: typeof fetch,
|
|
218
|
+
deps: DriveModuleOpDeps,
|
|
219
|
+
): Promise<unknown> {
|
|
220
|
+
const sleep = deps.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
|
|
221
|
+
const intervalMs = deps.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
|
222
|
+
const timeoutMs = deps.pollTimeoutMs ?? DEFAULT_POLL_TIMEOUT_MS;
|
|
223
|
+
const now = deps.now ?? (() => new Date());
|
|
224
|
+
const deadline = now().getTime() + timeoutMs;
|
|
225
|
+
const url = `${baseUrl}/api/modules/operations/${operationId}`;
|
|
226
|
+
|
|
227
|
+
while (true) {
|
|
228
|
+
const res = await doFetch(url, {
|
|
229
|
+
method: "GET",
|
|
230
|
+
headers: { authorization: `Bearer ${bearer}` },
|
|
231
|
+
});
|
|
232
|
+
const body = await parseJsonSafe(res);
|
|
233
|
+
if (res.status < 200 || res.status >= 300) {
|
|
234
|
+
const { error, error_description } = asErrorBody(body);
|
|
235
|
+
throw new ModuleOpHttpError(res.status, error, error_description);
|
|
236
|
+
}
|
|
237
|
+
const status = extractOpStatus(body);
|
|
238
|
+
if (status === "succeeded") return body;
|
|
239
|
+
if (status === "failed") {
|
|
240
|
+
const errMsg = extractOpError(body) ?? "operation failed";
|
|
241
|
+
throw new ModuleOpFailedError(errMsg);
|
|
242
|
+
}
|
|
243
|
+
if (now().getTime() >= deadline) {
|
|
244
|
+
throw new ModuleOpFailedError(
|
|
245
|
+
`operation ${operationId} did not complete within ${timeoutMs}ms (last status: ${status ?? "unknown"})`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
await sleep(intervalMs);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/** Terminal shape returned by {@link fetchModuleLogs}. */
|
|
253
|
+
export interface ModuleLogsResult {
|
|
254
|
+
/** The short name the logs belong to. */
|
|
255
|
+
readonly short: string;
|
|
256
|
+
/** Buffered lines, oldest-first (each includes its trailing newline). */
|
|
257
|
+
readonly lines: string[];
|
|
258
|
+
/** The same lines joined — the tail-blob shape `parachute logs` will print. */
|
|
259
|
+
readonly text: string;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Read a supervised module's recent output from the hub's per-module ring
|
|
264
|
+
* buffer (`GET /api/modules/:short/logs`, §6.5). Additive — this does NOT wire
|
|
265
|
+
* into the `parachute logs` CLI command (that cutover is Phase 3); it's the
|
|
266
|
+
* transport+credential seam Phase 3 will call.
|
|
267
|
+
*
|
|
268
|
+
* Reuses the same operator.token→Bearer path as {@link driveModuleOp} (read,
|
|
269
|
+
* never mint). The buffer replay includes the boot/crash lines that preceded
|
|
270
|
+
* the call — the must-have that a connect-time stream would miss.
|
|
271
|
+
*
|
|
272
|
+
* Throws:
|
|
273
|
+
* - {@link NoOperatorTokenError} — no operator.token on disk.
|
|
274
|
+
* - {@link OperatorTokenExpiredError} — token fully expired (actionable msg).
|
|
275
|
+
* - {@link ModuleOpHttpError} — hub answered non-2xx (e.g. `not_supervised`).
|
|
276
|
+
*/
|
|
277
|
+
export async function fetchModuleLogs(
|
|
278
|
+
short: string,
|
|
279
|
+
deps: DriveModuleOpDeps,
|
|
280
|
+
): Promise<ModuleLogsResult> {
|
|
281
|
+
const doFetch = deps.fetch ?? fetch;
|
|
282
|
+
const baseUrl = (deps.baseUrl ?? DEFAULT_HUB_BASE_URL).replace(/\/+$/, "");
|
|
283
|
+
const bearer = await resolveOperatorBearer(deps);
|
|
284
|
+
|
|
285
|
+
const res = await doFetch(`${baseUrl}/api/modules/${short}/logs`, {
|
|
286
|
+
method: "GET",
|
|
287
|
+
headers: { authorization: `Bearer ${bearer}` },
|
|
288
|
+
});
|
|
289
|
+
const body = await parseJsonSafe(res);
|
|
290
|
+
if (res.status < 200 || res.status >= 300) {
|
|
291
|
+
const { error, error_description } = asErrorBody(body);
|
|
292
|
+
throw new ModuleOpHttpError(res.status, error, error_description);
|
|
293
|
+
}
|
|
294
|
+
const b = (body ?? {}) as { lines?: unknown; text?: unknown };
|
|
295
|
+
const lines = Array.isArray(b.lines)
|
|
296
|
+
? b.lines.filter((l): l is string => typeof l === "string")
|
|
297
|
+
: [];
|
|
298
|
+
const text = typeof b.text === "string" ? b.text : lines.join("");
|
|
299
|
+
return { short, lines, text };
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* One module's run-state as read from `GET /api/modules` — the subset
|
|
304
|
+
* `parachute status` needs to render a module row from the RUNNING supervisor
|
|
305
|
+
* (design §6.4 module rows). Snake-case mirrors the wire shape (`api-modules.ts`
|
|
306
|
+
* `ModuleWireShape`); we keep only the supervisor-derived fields here.
|
|
307
|
+
*/
|
|
308
|
+
export interface ModuleStateSnapshot {
|
|
309
|
+
readonly short: string;
|
|
310
|
+
readonly installed: boolean;
|
|
311
|
+
readonly installed_version: string | null;
|
|
312
|
+
/**
|
|
313
|
+
* Supervisor run-status (`running` / `stopped` / `crashed` / `starting` /
|
|
314
|
+
* `restarting`), or null when the module isn't tracked by the supervisor
|
|
315
|
+
* (e.g. never booted, skipped at boot, or no supervisor on this hub).
|
|
316
|
+
*/
|
|
317
|
+
readonly supervisor_status: string | null;
|
|
318
|
+
readonly pid: number | null;
|
|
319
|
+
/**
|
|
320
|
+
* Structured start-error the supervisor recorded (missing-dependency /
|
|
321
|
+
* started-but-unbound). Passed through verbatim so `status` can render the
|
|
322
|
+
* SAME friendly missing-dependency note the detached path shows (#188).
|
|
323
|
+
*/
|
|
324
|
+
readonly supervisor_start_error: unknown | null;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/** Terminal shape returned by {@link fetchModuleStates}. */
|
|
328
|
+
export interface ModuleStatesResult {
|
|
329
|
+
/** Whether the running hub has a supervisor wired in (`supervisor_available`). */
|
|
330
|
+
readonly supervisorAvailable: boolean;
|
|
331
|
+
/** Per-module supervisor snapshots, keyed by short name in array order. */
|
|
332
|
+
readonly modules: ModuleStateSnapshot[];
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Read the RUNNING hub's per-module supervisor states via `GET /api/modules`
|
|
337
|
+
* (design §6.4 module rows). The operator token's `admin` scope-set carries
|
|
338
|
+
* `parachute:host:auth` (the scope `/api/modules` gates on), so the same
|
|
339
|
+
* read-never-mint operator-token→Bearer path {@link driveModuleOp} uses
|
|
340
|
+
* authenticates this read.
|
|
341
|
+
*
|
|
342
|
+
* Used by `parachute status` on a UNIT-MANAGED box to source module rows from
|
|
343
|
+
* the live supervisor instead of pidfiles. It is read-only and BOUNDED: the
|
|
344
|
+
* single `GET /api/modules` carries an `AbortSignal.timeout` (default
|
|
345
|
+
* {@link DEFAULT_STATES_FETCH_TIMEOUT_MS}) so a hub that accepts the TCP
|
|
346
|
+
* connection but never answers (wedged request handler) can't hang `status` —
|
|
347
|
+
* the preceding `/health` probe is bounded too, but this read needs its own
|
|
348
|
+
* ceiling. The CALLER is responsible for degrading gracefully (hub down → don't
|
|
349
|
+
* call this; no token → catch {@link NoOperatorTokenError}) so `status` never
|
|
350
|
+
* hangs/crashes.
|
|
351
|
+
*
|
|
352
|
+
* Throws (all caught + degraded to a "couldn't read live module state" note by
|
|
353
|
+
* the `status` caller — see `buildSupervisorRows`):
|
|
354
|
+
* - {@link NoOperatorTokenError} — no operator.token on disk.
|
|
355
|
+
* - {@link OperatorTokenExpiredError} — token fully expired (actionable msg).
|
|
356
|
+
* - {@link ModuleOpHttpError} — hub answered non-2xx, OR the bounded fetch
|
|
357
|
+
* timed out / aborted (surfaced as a synthetic `request_timeout` so the
|
|
358
|
+
* caller degrades with a message via its existing non-2xx catch, exactly as
|
|
359
|
+
* it does for a real HTTP error — never a hang).
|
|
360
|
+
*/
|
|
361
|
+
export async function fetchModuleStates(deps: DriveModuleOpDeps): Promise<ModuleStatesResult> {
|
|
362
|
+
const doFetch = deps.fetch ?? fetch;
|
|
363
|
+
const baseUrl = (deps.baseUrl ?? DEFAULT_HUB_BASE_URL).replace(/\/+$/, "");
|
|
364
|
+
const timeoutMs = deps.statesFetchTimeoutMs ?? DEFAULT_STATES_FETCH_TIMEOUT_MS;
|
|
365
|
+
const bearer = await resolveOperatorBearer(deps);
|
|
366
|
+
|
|
367
|
+
let res: Response;
|
|
368
|
+
try {
|
|
369
|
+
res = await doFetch(`${baseUrl}/api/modules`, {
|
|
370
|
+
method: "GET",
|
|
371
|
+
// `/api/modules` parses the scheme-cased `Bearer ` prefix; match it exactly.
|
|
372
|
+
headers: { authorization: `Bearer ${bearer}` },
|
|
373
|
+
// Bound the read so a wedged hub handler degrades `status` rather than
|
|
374
|
+
// hanging it. AbortSignal.timeout fires `AbortError` once the ceiling
|
|
375
|
+
// elapses (or the fetch errors for another transport reason).
|
|
376
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
377
|
+
});
|
|
378
|
+
} catch (err) {
|
|
379
|
+
// Timeout/abort or transport failure → re-shape as a ModuleOpHttpError so the
|
|
380
|
+
// `status` caller degrades through the SAME non-2xx catch it uses for an HTTP
|
|
381
|
+
// error (a "couldn't read live module state" note + exit 0), never hangs.
|
|
382
|
+
const aborted =
|
|
383
|
+
err instanceof Error && (err.name === "AbortError" || err.name === "TimeoutError");
|
|
384
|
+
const description = aborted
|
|
385
|
+
? `module-states read timed out after ${timeoutMs}ms`
|
|
386
|
+
: `module-states read failed (${err instanceof Error ? err.message : String(err)})`;
|
|
387
|
+
throw new ModuleOpHttpError(0, "request_timeout", description);
|
|
388
|
+
}
|
|
389
|
+
const body = await parseJsonSafe(res);
|
|
390
|
+
if (res.status < 200 || res.status >= 300) {
|
|
391
|
+
const { error, error_description } = asErrorBody(body);
|
|
392
|
+
throw new ModuleOpHttpError(res.status, error, error_description);
|
|
393
|
+
}
|
|
394
|
+
const b = (body ?? {}) as { modules?: unknown; supervisor_available?: unknown };
|
|
395
|
+
const supervisorAvailable = b.supervisor_available === true;
|
|
396
|
+
const modules: ModuleStateSnapshot[] = Array.isArray(b.modules)
|
|
397
|
+
? b.modules
|
|
398
|
+
.filter((m): m is Record<string, unknown> => !!m && typeof m === "object")
|
|
399
|
+
.map((m) => ({
|
|
400
|
+
short: typeof m.short === "string" ? m.short : "",
|
|
401
|
+
installed: m.installed === true,
|
|
402
|
+
installed_version: typeof m.installed_version === "string" ? m.installed_version : null,
|
|
403
|
+
supervisor_status: typeof m.supervisor_status === "string" ? m.supervisor_status : null,
|
|
404
|
+
pid: typeof m.pid === "number" ? m.pid : null,
|
|
405
|
+
supervisor_start_error:
|
|
406
|
+
m.supervisor_start_error !== undefined ? (m.supervisor_start_error ?? null) : null,
|
|
407
|
+
}))
|
|
408
|
+
: [];
|
|
409
|
+
return { supervisorAvailable, modules };
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
async function parseJsonSafe(res: Response): Promise<unknown> {
|
|
413
|
+
try {
|
|
414
|
+
return await res.json();
|
|
415
|
+
} catch {
|
|
416
|
+
return undefined;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function asErrorBody(body: unknown): { error: string; error_description: string } {
|
|
421
|
+
if (body && typeof body === "object") {
|
|
422
|
+
const b = body as Record<string, unknown>;
|
|
423
|
+
const error = typeof b.error === "string" ? b.error : "error";
|
|
424
|
+
const error_description =
|
|
425
|
+
typeof b.error_description === "string" ? b.error_description : "request failed";
|
|
426
|
+
return { error, error_description };
|
|
427
|
+
}
|
|
428
|
+
return { error: "error", error_description: "request failed" };
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
function extractOperationId(body: unknown): string | undefined {
|
|
432
|
+
if (body && typeof body === "object") {
|
|
433
|
+
const id = (body as Record<string, unknown>).operation_id;
|
|
434
|
+
if (typeof id === "string" && id.length > 0) return id;
|
|
435
|
+
}
|
|
436
|
+
return undefined;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function extractOpStatus(body: unknown): string | undefined {
|
|
440
|
+
if (body && typeof body === "object") {
|
|
441
|
+
const s = (body as Record<string, unknown>).status;
|
|
442
|
+
if (typeof s === "string") return s;
|
|
443
|
+
}
|
|
444
|
+
return undefined;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
function extractOpError(body: unknown): string | undefined {
|
|
448
|
+
if (body && typeof body === "object") {
|
|
449
|
+
const e = (body as Record<string, unknown>).error;
|
|
450
|
+
if (typeof e === "string" && e.length > 0) return e;
|
|
451
|
+
}
|
|
452
|
+
return undefined;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Re-export so CLI callers can catch the expired-token case without a second
|
|
456
|
+
// import from operator-token.ts.
|
|
457
|
+
export { OperatorTokenExpiredError };
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Loopback TCP-port readiness probe — the tiny "is something listening on
|
|
3
|
+
* 127.0.0.1:<port>?" primitive shared by the detached `commands/lifecycle.ts`
|
|
4
|
+
* start path and the in-process `supervisor.ts` (design 2026-06-01 §6.5).
|
|
5
|
+
*
|
|
6
|
+
* Factored out of `lifecycle.ts` so the supervisor can reach the probe without
|
|
7
|
+
* importing all of lifecycle's heavy graph (hub-db, operator-token,
|
|
8
|
+
* services-manifest, …) into a module that hub-server / proxy-state / the
|
|
9
|
+
* module-ops API all depend on. `lifecycle.ts` re-exports `defaultPortListening`
|
|
10
|
+
* + `PortListeningFn` so its public API is unchanged; both files share THIS
|
|
11
|
+
* one implementation, so they can't drift.
|
|
12
|
+
*
|
|
13
|
+
* `node:net` rather than `Bun.connect` because the latter has no clean
|
|
14
|
+
* "connection refused → false" without a custom socket handler, and the net
|
|
15
|
+
* Socket's `error`/`connect` events map directly onto the boolean we want.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { Socket } from "node:net";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* "Is something listening on this TCP port on loopback?" seam. Pairs with the
|
|
22
|
+
* spawn-then-die settle to catch the alive-but-never-bound failure shape
|
|
23
|
+
* (hub#487): a service that lives long enough to clear a liveness check but
|
|
24
|
+
* never binds its port (port already held by an orphan / a bun-linked
|
|
25
|
+
* resolution failure that lingers). Tests inject a deterministic stub;
|
|
26
|
+
* production uses {@link defaultPortListening}.
|
|
27
|
+
*/
|
|
28
|
+
export type PortListeningFn = (port: number) => Promise<boolean>;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Connect-probe: open a TCP socket to 127.0.0.1:<port> and see if it's
|
|
32
|
+
* accepted. A successful connect means *something* is listening; we close
|
|
33
|
+
* immediately. Connection refused / timeout means nothing is bound yet.
|
|
34
|
+
*/
|
|
35
|
+
export const defaultPortListening: PortListeningFn = (port) =>
|
|
36
|
+
new Promise((resolve) => {
|
|
37
|
+
const socket = new Socket();
|
|
38
|
+
let settled = false;
|
|
39
|
+
const done = (listening: boolean) => {
|
|
40
|
+
if (settled) return;
|
|
41
|
+
settled = true;
|
|
42
|
+
socket.destroy();
|
|
43
|
+
resolve(listening);
|
|
44
|
+
};
|
|
45
|
+
socket.setTimeout(1000);
|
|
46
|
+
socket.once("connect", () => done(true));
|
|
47
|
+
socket.once("timeout", () => done(false));
|
|
48
|
+
socket.once("error", () => done(false));
|
|
49
|
+
socket.connect(port, "127.0.0.1");
|
|
50
|
+
});
|
package/src/setup-wizard.ts
CHANGED
|
@@ -67,7 +67,13 @@ import {
|
|
|
67
67
|
} from "./hub-settings.ts";
|
|
68
68
|
import { signAccessToken } from "./jwt-sign.ts";
|
|
69
69
|
import { escapeHtml } from "./oauth-ui.ts";
|
|
70
|
-
import {
|
|
70
|
+
import {
|
|
71
|
+
type IssueOperatorTokenResult,
|
|
72
|
+
type MintOperatorTokenOpts,
|
|
73
|
+
issueOperatorToken,
|
|
74
|
+
mintOperatorToken,
|
|
75
|
+
readOperatorTokenFile,
|
|
76
|
+
} from "./operator-token.ts";
|
|
71
77
|
import { isHttpsRequest } from "./request-protocol.ts";
|
|
72
78
|
import { findService, readManifestLenient } from "./services-manifest.ts";
|
|
73
79
|
import {
|
|
@@ -377,6 +383,22 @@ export interface SetupWizardDeps {
|
|
|
377
383
|
* `readExposeStateFn` seam.
|
|
378
384
|
*/
|
|
379
385
|
readExposeStateFn?: () => ExposeState | undefined;
|
|
386
|
+
/**
|
|
387
|
+
* Test seam for the fresh-box operator-token closure (design §3.1 /
|
|
388
|
+
* Phase 3b Deliverable A). After the wizard creates the first admin, it
|
|
389
|
+
* persists `~/.parachute/operator.token` so the box has a CLI operator
|
|
390
|
+
* credential the moment it gains an admin — without it, the Phase 3b
|
|
391
|
+
* per-module verbs (`parachute start/stop/restart <svc>` driving the
|
|
392
|
+
* supervisor) would 401 on a freshly-bootstrapped box. Production omits
|
|
393
|
+
* this and uses the real {@link issueOperatorToken}; tests inject a stub
|
|
394
|
+
* to assert the call (or to make it throw and prove a token-write failure
|
|
395
|
+
* never fails account creation).
|
|
396
|
+
*/
|
|
397
|
+
issueOperatorToken?: (
|
|
398
|
+
db: Database,
|
|
399
|
+
userId: string,
|
|
400
|
+
opts: MintOperatorTokenOpts & { dir?: string },
|
|
401
|
+
) => Promise<IssueOperatorTokenResult>;
|
|
380
402
|
}
|
|
381
403
|
|
|
382
404
|
/**
|
|
@@ -1888,6 +1910,16 @@ export async function handleSetupAccountPost(
|
|
|
1888
1910
|
// any racer who saw it over the operator's shoulder during the
|
|
1889
1911
|
// window between log-print and form-submit.
|
|
1890
1912
|
if (requireToken) consumeBootstrapToken();
|
|
1913
|
+
// Fresh-box operator-token closure (design §3.1 / Phase 3b Deliverable A).
|
|
1914
|
+
// The box now has its first admin — persist `operator.token` so it has a
|
|
1915
|
+
// CLI operator credential immediately. Without it, the Phase 3b per-module
|
|
1916
|
+
// verbs (start/stop/restart <svc> driving the supervisor over the
|
|
1917
|
+
// module-ops API) would 401 on a box bootstrapped purely through the
|
|
1918
|
+
// wizard. Runs AFTER the admin row + bootstrap-token are committed so a
|
|
1919
|
+
// half-written admin never gains a token; guarded so an existing token is
|
|
1920
|
+
// never clobbered; wrapped so a token-write failure NEVER fails the
|
|
1921
|
+
// account creation the operator just completed.
|
|
1922
|
+
await ensureOperatorTokenForFirstAdmin(deps, user.id);
|
|
1891
1923
|
const session = createSession(deps.db, { userId: user.id });
|
|
1892
1924
|
const cookie = buildSessionCookie(session.id, Math.floor(SESSION_TTL_MS / 1000), {
|
|
1893
1925
|
secure: isHttpsRequest(req),
|
|
@@ -1927,6 +1959,53 @@ export async function handleSetupAccountPost(
|
|
|
1927
1959
|
}
|
|
1928
1960
|
}
|
|
1929
1961
|
|
|
1962
|
+
/**
|
|
1963
|
+
* Persist `~/.parachute/operator.token` for the just-created first admin
|
|
1964
|
+
* (design §3.1 / Phase 3b Deliverable A). The 3a reviewer flagged that a fresh
|
|
1965
|
+
* `init`→wizard flow ends with NO operator token on disk, so the Phase 3b
|
|
1966
|
+
* per-module verbs — `parachute start/stop/restart <svc>`, which now drive the
|
|
1967
|
+
* supervisor over the host-admin-gated module-ops API — would 401 on such a
|
|
1968
|
+
* box. Minting the token here makes the box have a CLI operator credential the
|
|
1969
|
+
* moment it gains an admin.
|
|
1970
|
+
*
|
|
1971
|
+
* Three invariants:
|
|
1972
|
+
* - Mints under the `admin` scope-set (the default), which carries
|
|
1973
|
+
* `parachute:host:admin` — exactly the scope `api-modules-ops.ts` gates on.
|
|
1974
|
+
* `issueOperatorToken` writes it 0600 (`writeOperatorTokenFile`).
|
|
1975
|
+
* - Guarded by `readOperatorTokenFile() === null`: never clobber a token an
|
|
1976
|
+
* operator already minted (`auth set-password` / `rotate-operator`, or a
|
|
1977
|
+
* prior init).
|
|
1978
|
+
* - Wrapped in try/catch so a token-write failure NEVER fails the account
|
|
1979
|
+
* creation the operator just completed — they have an admin row + session
|
|
1980
|
+
* either way, and `parachute auth rotate-operator` is the documented
|
|
1981
|
+
* recovery for a missing token.
|
|
1982
|
+
*
|
|
1983
|
+
* Uses `deps.issuer` as the `iss` claim — the same pre-resolved origin the rest
|
|
1984
|
+
* of the wizard's mints use (`handleSetupExposePost`). The hub-server derives
|
|
1985
|
+
* that origin the same way `commands/auth.ts:resolveHubIssuer` does — semantically
|
|
1986
|
+
* equivalent, structurally different: this path takes a pre-resolved `deps.issuer`
|
|
1987
|
+
* while `auth.ts` reads expose-state inline at call time. `start hub` self-heals a
|
|
1988
|
+
* stale `iss` later if the box is exposed after init (hub#481), so an
|
|
1989
|
+
* init-at-loopback mint is correct here.
|
|
1990
|
+
*/
|
|
1991
|
+
async function ensureOperatorTokenForFirstAdmin(
|
|
1992
|
+
deps: SetupWizardDeps,
|
|
1993
|
+
userId: string,
|
|
1994
|
+
): Promise<void> {
|
|
1995
|
+
const issue = deps.issueOperatorToken ?? issueOperatorToken;
|
|
1996
|
+
try {
|
|
1997
|
+
const existing = await readOperatorTokenFile(deps.configDir);
|
|
1998
|
+
if (existing !== null) return;
|
|
1999
|
+
await issue(deps.db, userId, { issuer: deps.issuer, dir: deps.configDir });
|
|
2000
|
+
} catch (err) {
|
|
2001
|
+
// Non-fatal: the admin + session were already committed. Log for the
|
|
2002
|
+
// operator's debugging; they can recover with `parachute auth
|
|
2003
|
+
// rotate-operator` from a shell on the box.
|
|
2004
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2005
|
+
console.warn(`[setup-wizard] operator-token closure skipped for new admin: ${msg}`);
|
|
2006
|
+
}
|
|
2007
|
+
}
|
|
2008
|
+
|
|
1930
2009
|
/**
|
|
1931
2010
|
* Static error page surfaced when an `/admin/setup/account` POST arrives
|
|
1932
2011
|
* after the bootstrap token has already been consumed by a successful
|