@openparachute/hub 0.6.2 → 0.6.3-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/api-modules-ops.test.ts +359 -3
- package/src/__tests__/api-modules.test.ts +54 -0
- package/src/__tests__/hub-unit.test.ts +574 -0
- package/src/__tests__/init.test.ts +219 -2
- package/src/__tests__/lifecycle.test.ts +423 -0
- package/src/__tests__/managed-unit.test.ts +575 -0
- package/src/__tests__/module-ops-client.test.ts +556 -0
- package/src/__tests__/port-probe.test.ts +23 -0
- package/src/__tests__/setup-wizard.test.ts +130 -0
- package/src/__tests__/status-supervisor.test.ts +569 -0
- package/src/__tests__/supervisor.test.ts +471 -6
- package/src/api-modules-ops.ts +221 -0
- package/src/api-modules.ts +18 -2
- package/src/cli.ts +14 -4
- package/src/cloudflare/connector-service.ts +117 -322
- package/src/commands/init.ts +225 -12
- package/src/commands/lifecycle.ts +366 -38
- package/src/commands/serve-boot.ts +71 -25
- package/src/commands/status.ts +596 -49
- package/src/hub-server.ts +11 -0
- package/src/hub-unit.ts +735 -0
- package/src/managed-unit.ts +674 -0
- package/src/module-ops-client.ts +457 -0
- package/src/port-probe.ts +50 -0
- package/src/setup-wizard.ts +80 -1
- package/src/supervisor.ts +360 -14
package/src/supervisor.ts
CHANGED
|
@@ -29,8 +29,37 @@
|
|
|
29
29
|
* services.json on boot).
|
|
30
30
|
*/
|
|
31
31
|
|
|
32
|
+
import {
|
|
33
|
+
MissingDependencyError,
|
|
34
|
+
type MissingDependencyWire,
|
|
35
|
+
ensureExecutable,
|
|
36
|
+
rethrowIfMissing,
|
|
37
|
+
} from "@openparachute/depcheck";
|
|
38
|
+
import { type PortListeningFn, defaultPortListening } from "./port-probe.ts";
|
|
39
|
+
|
|
32
40
|
export type ModuleStatus = "starting" | "running" | "stopped" | "crashed" | "restarting";
|
|
33
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Structured start-failure detail recorded onto `ModuleState` (§6.5). Mirrors
|
|
44
|
+
* depcheck's `MissingDependencyWire` for the missing-dependency case and the
|
|
45
|
+
* services.json-row `ServiceEntryStartError` shape `commands/lifecycle.ts`
|
|
46
|
+
* records, so `status` / the SPA keep the SAME friendly missing-dependency
|
|
47
|
+
* surface whether a module was started via the detached path or the
|
|
48
|
+
* supervisor. `error_type` is left open for a future non-dependency failure.
|
|
49
|
+
*/
|
|
50
|
+
export interface ModuleStartError {
|
|
51
|
+
readonly error_type: string;
|
|
52
|
+
readonly error_description: string;
|
|
53
|
+
/** Present for `error_type: "missing_dependency"`. */
|
|
54
|
+
readonly binary?: string;
|
|
55
|
+
readonly why?: string | null;
|
|
56
|
+
readonly docs_url?: string | null;
|
|
57
|
+
readonly install?: { darwin?: string; linux?: string; generic?: string };
|
|
58
|
+
readonly sysadmin_hint?: string;
|
|
59
|
+
/** ISO timestamp of when the failure was recorded. */
|
|
60
|
+
readonly at: string;
|
|
61
|
+
}
|
|
62
|
+
|
|
34
63
|
export interface ModuleState {
|
|
35
64
|
/** Short name (vault / notes / scribe / …). */
|
|
36
65
|
readonly short: string;
|
|
@@ -46,6 +75,15 @@ export interface ModuleState {
|
|
|
46
75
|
readonly lastCrashAt?: string;
|
|
47
76
|
/** Exit code of the most recent crash. */
|
|
48
77
|
readonly lastExitCode?: number | null;
|
|
78
|
+
/**
|
|
79
|
+
* Structured start-failure detail (§6.5). Set when a preflight
|
|
80
|
+
* `MissingDependencyError` aborts the spawn, OR when a spawned child stays
|
|
81
|
+
* alive but never binds its port within the readiness window
|
|
82
|
+
* (started-but-unbound, hub#487). Cleared on a clean, port-confirmed start.
|
|
83
|
+
* The `status` enum is intentionally NOT extended (proxy-state Mode-1 + the
|
|
84
|
+
* SPA read `running`); this field carries the friendly diagnostic instead.
|
|
85
|
+
*/
|
|
86
|
+
readonly startError?: ModuleStartError;
|
|
49
87
|
}
|
|
50
88
|
|
|
51
89
|
export interface SpawnRequest {
|
|
@@ -97,10 +135,31 @@ export interface SupervisorOpts {
|
|
|
97
135
|
* stream without spelunking stdout.
|
|
98
136
|
*/
|
|
99
137
|
readonly output?: (line: string) => void;
|
|
138
|
+
/**
|
|
139
|
+
* Cap, in bytes, of the per-module log ring buffer (§6.5). The supervisor
|
|
140
|
+
* keeps the most-recent ~`logBufferBytes` of each child's output so a
|
|
141
|
+
* `GET /api/modules/:short/logs` tap can replay the boot/crash lines that
|
|
142
|
+
* happened *before* the reader connected — the detached path got this for
|
|
143
|
+
* free via the per-service logfile; the supervisor streams-and-discards, so
|
|
144
|
+
* without a buffer the crash cause (the most important line) is lost. The
|
|
145
|
+
* oldest whole lines are dropped once the cap is exceeded. Default 64 KiB.
|
|
146
|
+
*/
|
|
147
|
+
readonly logBufferBytes?: number;
|
|
100
148
|
/**
|
|
101
149
|
* Test seam over `Bun.spawn`. Returns a Subprocess-shaped handle.
|
|
102
150
|
*/
|
|
103
151
|
readonly spawnFn?: SpawnFn;
|
|
152
|
+
/**
|
|
153
|
+
* Group-aware kill seam (hub#88). Production sends the signal to the child's
|
|
154
|
+
* whole process group (`process.kill(-pid, signal)`) so wrapped startCmds
|
|
155
|
+
* like `pnpm exec tsx server.ts` reap the tsx grandchild — not just the
|
|
156
|
+
* wrapper that would otherwise leave the grandchild bound to the port →
|
|
157
|
+
* EADDRINUSE on restart. Pairs with `defaultSpawnFn`'s `detached: true`
|
|
158
|
+
* (each child is its own process-group leader, `pid === pgid`). Defaults to
|
|
159
|
+
* {@link defaultKillGroup}; tests inject a stub so they stay deterministic
|
|
160
|
+
* (no real signals) and can assert the negative pid (group send) was used.
|
|
161
|
+
*/
|
|
162
|
+
readonly killFn?: KillFn;
|
|
104
163
|
/**
|
|
105
164
|
* Test seam over wall-clock. Production passes `Date.now`.
|
|
106
165
|
*/
|
|
@@ -110,6 +169,40 @@ export interface SupervisorOpts {
|
|
|
110
169
|
* with `setTimeout`. Tests stub to advance time deterministically.
|
|
111
170
|
*/
|
|
112
171
|
readonly sleep?: (ms: number) => Promise<void>;
|
|
172
|
+
/**
|
|
173
|
+
* Port-readiness probe (§6.5). After a child spawns, the supervisor polls
|
|
174
|
+
* this until the module's port (from `req.env.PORT`) binds, to catch the
|
|
175
|
+
* alive-but-never-bound shape (hub#487). Defaults to `defaultPortListening`
|
|
176
|
+
* (a loopback TCP connect). Tests inject a deterministic stub.
|
|
177
|
+
*
|
|
178
|
+
* Defaulting policy (mirrors `commands/lifecycle.ts`): the readiness gate is
|
|
179
|
+
* SKIPPED unless this is the production path (no `spawnFn` override) OR a
|
|
180
|
+
* test explicitly opts in by injecting `portListening` / `startReadyMs`.
|
|
181
|
+
* Without that guard, every existing stub-spawner test (fake procs that
|
|
182
|
+
* never bind a real port) would block the full readiness window.
|
|
183
|
+
*/
|
|
184
|
+
readonly portListening?: PortListeningFn;
|
|
185
|
+
/**
|
|
186
|
+
* How long the post-spawn port-readiness gate polls before recording a
|
|
187
|
+
* `started-but-unbound` start-error, in ms. Default 4000 on the production
|
|
188
|
+
* path; 0 (skipped) on the stub-spawner test path unless `portListening` /
|
|
189
|
+
* `startReadyMs` is set explicitly.
|
|
190
|
+
*/
|
|
191
|
+
readonly startReadyMs?: number;
|
|
192
|
+
/** Poll interval while waiting for the port to bind, in ms. Default 200. */
|
|
193
|
+
readonly startReadyPollMs?: number;
|
|
194
|
+
/**
|
|
195
|
+
* PATH-resolution seam for the pre-spawn `ensureExecutable` preflight
|
|
196
|
+
* (`@openparachute/depcheck`). Production uses the real `Bun.which`; a
|
|
197
|
+
* missing startCmd binary then aborts the spawn with a structured
|
|
198
|
+
* `MissingDependencyError` recorded onto `ModuleState.startError`.
|
|
199
|
+
*
|
|
200
|
+
* Defaulting policy mirrors the readiness gate: a stub `spawnFn` (test path)
|
|
201
|
+
* gets a permissive resolver so the preflight doesn't trip on binaries
|
|
202
|
+
* absent from the test host's PATH; production gets the real `Bun.which`.
|
|
203
|
+
* Tests exercising the missing-binary branch inject `which: () => null`.
|
|
204
|
+
*/
|
|
205
|
+
readonly which?: (cmd: string) => string | null;
|
|
113
206
|
}
|
|
114
207
|
|
|
115
208
|
/**
|
|
@@ -119,6 +212,12 @@ export interface SupervisorOpts {
|
|
|
119
212
|
*/
|
|
120
213
|
export type SpawnFn = (req: SpawnRequest) => SupervisedProc;
|
|
121
214
|
|
|
215
|
+
/**
|
|
216
|
+
* Group-aware kill seam. Sends `signal` to the process group rooted at `pid`.
|
|
217
|
+
* Production uses {@link defaultKillGroup}; tests inject a stub.
|
|
218
|
+
*/
|
|
219
|
+
export type KillFn = (pid: number, signal: NodeJS.Signals | number) => void;
|
|
220
|
+
|
|
122
221
|
/**
|
|
123
222
|
* The minimal Subprocess shape the supervisor depends on. Bun's real
|
|
124
223
|
* `Subprocess` matches this; the test fake mirrors it.
|
|
@@ -135,6 +234,46 @@ const DEFAULT_MAX_RESTARTS = 3;
|
|
|
135
234
|
const DEFAULT_RESTART_WINDOW_MS = 60_000;
|
|
136
235
|
const DEFAULT_RESTART_DELAY_MS = 500;
|
|
137
236
|
const DEFAULT_KILL_TIMEOUT_MS = 5_000;
|
|
237
|
+
const DEFAULT_LOG_BUFFER_BYTES = 64 * 1024;
|
|
238
|
+
const DEFAULT_START_READY_MS = 4_000;
|
|
239
|
+
const DEFAULT_START_READY_POLL_MS = 200;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Bounded, line-oriented ring buffer (§6.5). Holds the most-recent lines of a
|
|
243
|
+
* module's output up to `maxBytes`; pushing past the cap drops whole lines
|
|
244
|
+
* from the front (oldest-first) until it fits. Bounding by bytes (not line
|
|
245
|
+
* count) keeps a chatty module from pinning unbounded memory regardless of
|
|
246
|
+
* line length. Each pushed string is already a single prefixed line from
|
|
247
|
+
* `pumpLines` (it includes its trailing newline).
|
|
248
|
+
*/
|
|
249
|
+
export class LogRingBuffer {
|
|
250
|
+
private readonly lines: string[] = [];
|
|
251
|
+
private bytes = 0;
|
|
252
|
+
|
|
253
|
+
constructor(private readonly maxBytes: number) {}
|
|
254
|
+
|
|
255
|
+
push(line: string): void {
|
|
256
|
+
this.lines.push(line);
|
|
257
|
+
this.bytes += Buffer.byteLength(line);
|
|
258
|
+
// Drop oldest whole lines until we're back under the cap. A single line
|
|
259
|
+
// larger than the cap is kept (we never split a line) — the alternative
|
|
260
|
+
// (dropping it) would lose exactly the long stack-trace we most want.
|
|
261
|
+
while (this.bytes > this.maxBytes && this.lines.length > 1) {
|
|
262
|
+
const dropped = this.lines.shift();
|
|
263
|
+
if (dropped !== undefined) this.bytes -= Buffer.byteLength(dropped);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/** Snapshot of the buffered lines, oldest-first. */
|
|
268
|
+
snapshot(): string[] {
|
|
269
|
+
return [...this.lines];
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/** Buffered lines joined into a single string (the wire/tail shape). */
|
|
273
|
+
text(): string {
|
|
274
|
+
return this.lines.join("");
|
|
275
|
+
}
|
|
276
|
+
}
|
|
138
277
|
|
|
139
278
|
/**
|
|
140
279
|
* Per-module supervisor. Owns the spawn → watch → restart loop.
|
|
@@ -151,15 +290,30 @@ export class Supervisor {
|
|
|
151
290
|
private readonly modules = new Map<string, ModuleEntry>();
|
|
152
291
|
|
|
153
292
|
constructor(opts: SupervisorOpts = {}) {
|
|
293
|
+
// Defaulting policy for the port-readiness gate + preflight (§6.5),
|
|
294
|
+
// mirroring `commands/lifecycle.ts`: production (no `spawnFn` override) gets
|
|
295
|
+
// the real 4s readiness window + `Bun.which` preflight. The stub-spawner
|
|
296
|
+
// test path gets 0 (skipped) + a permissive `which` UNLESS a test opts in
|
|
297
|
+
// explicitly (injecting `portListening` / `startReadyMs` / `which`) — so
|
|
298
|
+
// existing fake-proc tests (which never bind a real port) don't block.
|
|
299
|
+
const isProductionPath = opts.spawnFn === undefined;
|
|
300
|
+
const readinessOptedIn = opts.portListening !== undefined || opts.startReadyMs !== undefined;
|
|
154
301
|
this.opts = {
|
|
155
302
|
maxRestarts: opts.maxRestarts ?? DEFAULT_MAX_RESTARTS,
|
|
156
303
|
restartWindowMs: opts.restartWindowMs ?? DEFAULT_RESTART_WINDOW_MS,
|
|
157
304
|
restartDelayMs: opts.restartDelayMs ?? DEFAULT_RESTART_DELAY_MS,
|
|
158
305
|
killTimeoutMs: opts.killTimeoutMs ?? DEFAULT_KILL_TIMEOUT_MS,
|
|
159
306
|
output: opts.output ?? ((line) => process.stdout.write(line)),
|
|
307
|
+
logBufferBytes: opts.logBufferBytes ?? DEFAULT_LOG_BUFFER_BYTES,
|
|
160
308
|
spawnFn: opts.spawnFn ?? defaultSpawnFn,
|
|
309
|
+
killFn: opts.killFn ?? defaultKillGroup,
|
|
161
310
|
now: opts.now ?? Date.now,
|
|
162
311
|
sleep: opts.sleep ?? ((ms) => new Promise((r) => setTimeout(r, ms))),
|
|
312
|
+
portListening: opts.portListening ?? defaultPortListening,
|
|
313
|
+
startReadyMs:
|
|
314
|
+
opts.startReadyMs ?? (isProductionPath || readinessOptedIn ? DEFAULT_START_READY_MS : 0),
|
|
315
|
+
startReadyPollMs: opts.startReadyPollMs ?? DEFAULT_START_READY_POLL_MS,
|
|
316
|
+
which: opts.which ?? (isProductionPath ? Bun.which : () => "/stub/bin/preflight-skipped"),
|
|
163
317
|
};
|
|
164
318
|
}
|
|
165
319
|
|
|
@@ -175,6 +329,9 @@ export class Supervisor {
|
|
|
175
329
|
return existing.state;
|
|
176
330
|
}
|
|
177
331
|
// Crashed → operator intent is "try again." Wipe the budget.
|
|
332
|
+
// A fresh ring buffer per entry — `start` is a clean spawn (the crash-
|
|
333
|
+
// respawn path in `handleExit` reuses the existing entry + buffer, so a
|
|
334
|
+
// crashed module's boot/crash lines survive into the restart for replay).
|
|
178
335
|
const entry: ModuleEntry = {
|
|
179
336
|
req,
|
|
180
337
|
state: {
|
|
@@ -183,12 +340,117 @@ export class Supervisor {
|
|
|
183
340
|
restartsInWindow: 0,
|
|
184
341
|
},
|
|
185
342
|
crashStamps: [],
|
|
343
|
+
logs: new LogRingBuffer(this.opts.logBufferBytes),
|
|
186
344
|
};
|
|
187
345
|
this.modules.set(req.short, entry);
|
|
188
|
-
|
|
346
|
+
|
|
347
|
+
// Pre-spawn preflight (§6.5): resolve the startCmd binary on PATH before
|
|
348
|
+
// spawning a doomed child. A missing binary records a structured
|
|
349
|
+
// `MissingDependencyError` onto state (the same friendly missing-dependency
|
|
350
|
+
// surface `commands/lifecycle.ts` records) and aborts — no spawn. Mirrors
|
|
351
|
+
// `lifecycle.start`'s `ensureExecutable` preflight.
|
|
352
|
+
const startBinary = req.cmd[0];
|
|
353
|
+
if (startBinary) {
|
|
354
|
+
try {
|
|
355
|
+
ensureExecutable(startBinary, { which: this.opts.which });
|
|
356
|
+
} catch (err) {
|
|
357
|
+
if (err instanceof MissingDependencyError) {
|
|
358
|
+
entry.state = {
|
|
359
|
+
...entry.state,
|
|
360
|
+
status: "crashed",
|
|
361
|
+
pid: undefined,
|
|
362
|
+
startError: startErrorFromWire(err.toWire(), this.opts.now),
|
|
363
|
+
};
|
|
364
|
+
return entry.state;
|
|
365
|
+
}
|
|
366
|
+
throw err;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Belt-and-suspenders for a spawn that slips past the preflight (binary
|
|
371
|
+
// removed between check + spawn, or a path that didn't preflight): a
|
|
372
|
+
// not-found spawn throw becomes the same structured MissingDependencyError
|
|
373
|
+
// recorded onto state, not a throw out of `start`. Mirrors
|
|
374
|
+
// `lifecycle.start`'s `rethrowIfMissing` catch.
|
|
375
|
+
try {
|
|
376
|
+
this.spawnAndWatch(entry);
|
|
377
|
+
} catch (err) {
|
|
378
|
+
if (startBinary) {
|
|
379
|
+
try {
|
|
380
|
+
rethrowIfMissing(err, startBinary);
|
|
381
|
+
} catch (missing) {
|
|
382
|
+
if (missing instanceof MissingDependencyError) {
|
|
383
|
+
entry.state = {
|
|
384
|
+
...entry.state,
|
|
385
|
+
status: "crashed",
|
|
386
|
+
pid: undefined,
|
|
387
|
+
startError: startErrorFromWire(missing.toWire(), this.opts.now),
|
|
388
|
+
};
|
|
389
|
+
return entry.state;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
throw err;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Post-spawn port-readiness gate (§6.5, hub#487). A returned pid only
|
|
397
|
+
// proves the kernel forked the process; it says nothing about whether the
|
|
398
|
+
// module bound its port. Poll the port (from req.env.PORT) up to
|
|
399
|
+
// `startReadyMs`. On success: clear any prior startError. On timeout while
|
|
400
|
+
// the child is still alive: record a `started-but-unbound` structured
|
|
401
|
+
// start-error WITHOUT touching the `running` status enum (proxy-state
|
|
402
|
+
// Mode-1 + the SPA read `running`) — the friendly diagnostic rides the
|
|
403
|
+
// startError field. A child that died during the window is left to the
|
|
404
|
+
// crash watcher (`handleExit`), which owns the restart budget.
|
|
405
|
+
await this.awaitPortReadiness(entry);
|
|
189
406
|
return entry.state;
|
|
190
407
|
}
|
|
191
408
|
|
|
409
|
+
/**
|
|
410
|
+
* Poll the module's port until it binds or `startReadyMs` elapses (§6.5).
|
|
411
|
+
* Skipped when the gate is disabled (stub-spawner test path) or the request
|
|
412
|
+
* carries no `PORT`. Records / clears `state.startError` accordingly; never
|
|
413
|
+
* mutates `state.status` (see `start`).
|
|
414
|
+
*/
|
|
415
|
+
private async awaitPortReadiness(entry: ModuleEntry): Promise<void> {
|
|
416
|
+
if (this.opts.startReadyMs <= 0) return;
|
|
417
|
+
const portStr = entry.req.env?.PORT;
|
|
418
|
+
const port = portStr ? Number(portStr) : Number.NaN;
|
|
419
|
+
if (!Number.isFinite(port) || port <= 0) return; // No port to probe.
|
|
420
|
+
|
|
421
|
+
const deadline = this.opts.now() + this.opts.startReadyMs;
|
|
422
|
+
while (this.opts.now() < deadline) {
|
|
423
|
+
// The child may have crashed during the window — `handleExit` owns that
|
|
424
|
+
// (budget / restart). Stop probing; don't overwrite a crash with a
|
|
425
|
+
// port-readiness verdict.
|
|
426
|
+
if (entry.stopRequested || entry.state.status !== "running") return;
|
|
427
|
+
if (await this.opts.portListening(port)) {
|
|
428
|
+
// Bound → healthy. Clear any stale started-but-unbound error.
|
|
429
|
+
if (entry.state.startError) {
|
|
430
|
+
const { startError: _drop, ...rest } = entry.state;
|
|
431
|
+
entry.state = rest;
|
|
432
|
+
}
|
|
433
|
+
return;
|
|
434
|
+
}
|
|
435
|
+
await this.opts.sleep(this.opts.startReadyPollMs);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Window elapsed, still alive but never bound — record the structured
|
|
439
|
+
// started-but-unbound error so `status` / the SPA show why, not a silently
|
|
440
|
+
// healthy `running`. Keep `running` (the process IS up); the diagnostic is
|
|
441
|
+
// the startError field.
|
|
442
|
+
if (entry.state.status === "running" && !entry.stopRequested) {
|
|
443
|
+
entry.state = {
|
|
444
|
+
...entry.state,
|
|
445
|
+
startError: {
|
|
446
|
+
error_type: "started_but_unbound",
|
|
447
|
+
error_description: `${entry.req.short} started (pid ${entry.state.pid}) but is not listening on port ${port} after ${this.opts.startReadyMs}ms — it may still be coming up, or the port is held by another process.`,
|
|
448
|
+
at: new Date(this.opts.now()).toISOString(),
|
|
449
|
+
},
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
192
454
|
/**
|
|
193
455
|
* Stop a supervised module. Sends SIGTERM, awaits the child's exit
|
|
194
456
|
* (so the log-pump drains the final flush before our stdout closes),
|
|
@@ -216,7 +478,13 @@ export class Supervisor {
|
|
|
216
478
|
const proc = entry.proc;
|
|
217
479
|
if (proc) {
|
|
218
480
|
try {
|
|
219
|
-
|
|
481
|
+
// Group-aware kill (hub#88): signal the child's whole process group
|
|
482
|
+
// via `killFn` (default `defaultKillGroup` → `process.kill(-pid)`) so
|
|
483
|
+
// a wrapped startCmd's grandchild is reaped too, not just the wrapper.
|
|
484
|
+
// Mirrors `commands/lifecycle.ts`'s `defaultKill` repointing of
|
|
485
|
+
// `defaultSpawner`'s detached children. Without it, the grandchild
|
|
486
|
+
// stays bound to the port → restart hits EADDRINUSE.
|
|
487
|
+
this.opts.killFn(proc.pid, "SIGTERM");
|
|
220
488
|
} catch {
|
|
221
489
|
// Process may already be dead — fall through.
|
|
222
490
|
}
|
|
@@ -234,7 +502,9 @@ export class Supervisor {
|
|
|
234
502
|
`[supervisor] ${entry.req.short} did not exit ${this.opts.killTimeoutMs}ms after SIGTERM — escalating to SIGKILL.\n`,
|
|
235
503
|
);
|
|
236
504
|
try {
|
|
237
|
-
|
|
505
|
+
// Group-aware SIGKILL escalation — same `killFn` seam as the
|
|
506
|
+
// SIGTERM above so the whole group is reaped, not just the leader.
|
|
507
|
+
this.opts.killFn(proc.pid, "SIGKILL");
|
|
238
508
|
} catch {
|
|
239
509
|
// Process may already be dead between the timeout firing
|
|
240
510
|
// and us reaching kill() — fall through to the await.
|
|
@@ -287,13 +557,17 @@ export class Supervisor {
|
|
|
287
557
|
private spawnAndWatch(entry: ModuleEntry): void {
|
|
288
558
|
const proc = this.opts.spawnFn(entry.req);
|
|
289
559
|
entry.proc = proc;
|
|
560
|
+
// Clear any stale startError from a prior attempt — a fresh running pid is
|
|
561
|
+
// the new ground truth; the readiness gate re-records if it still doesn't
|
|
562
|
+
// bind.
|
|
563
|
+
const { startError: _drop, ...prev } = entry.state;
|
|
290
564
|
entry.state = {
|
|
291
|
-
...
|
|
565
|
+
...prev,
|
|
292
566
|
status: "running",
|
|
293
567
|
pid: proc.pid,
|
|
294
568
|
startedAt: new Date(this.opts.now()).toISOString(),
|
|
295
569
|
};
|
|
296
|
-
this.pipeOutput(entry
|
|
570
|
+
this.pipeOutput(entry, proc);
|
|
297
571
|
void proc.exited.then((exitCode) => this.handleExit(entry, exitCode));
|
|
298
572
|
}
|
|
299
573
|
|
|
@@ -348,16 +622,34 @@ export class Supervisor {
|
|
|
348
622
|
}
|
|
349
623
|
|
|
350
624
|
/**
|
|
351
|
-
*
|
|
352
|
-
*
|
|
353
|
-
*
|
|
354
|
-
*
|
|
355
|
-
*
|
|
625
|
+
* Recent buffered output for a supervised module (§6.5), oldest-first, each
|
|
626
|
+
* element a prefixed line. Returns `undefined` for a module that isn't
|
|
627
|
+
* supervised (no entry) so a `GET /api/modules/:short/logs` handler can
|
|
628
|
+
* distinguish "not supervised" (404) from "supervised but quiet" (empty
|
|
629
|
+
* array). Survives a crash-respawn (same entry/buffer), so the boot/crash
|
|
630
|
+
* lines that preceded the reader connecting are replayable — the whole point.
|
|
631
|
+
*/
|
|
632
|
+
logs(short: string): string[] | undefined {
|
|
633
|
+
return this.modules.get(short)?.logs.snapshot();
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Tap a child's stdout + stderr into the supervisor's `output` callback
|
|
638
|
+
* (hub's stdout by default) AND the per-module ring buffer (§6.5),
|
|
639
|
+
* prefixing each line with the module's short name. Line-buffered: partial
|
|
640
|
+
* chunks accumulate until a newline arrives so multi-byte log lines don't
|
|
641
|
+
* get scrambled across modules. The buffer is fed the same prefixed lines
|
|
642
|
+
* the live stream gets, so a later `/logs` tap replays exactly what hub's
|
|
643
|
+
* stdout already showed.
|
|
356
644
|
*/
|
|
357
|
-
private pipeOutput(
|
|
358
|
-
const prefix = `[${short}] `;
|
|
359
|
-
|
|
360
|
-
|
|
645
|
+
private pipeOutput(entry: ModuleEntry, proc: SupervisedProc): void {
|
|
646
|
+
const prefix = `[${entry.req.short}] `;
|
|
647
|
+
const sink = (line: string): void => {
|
|
648
|
+
this.opts.output(line);
|
|
649
|
+
entry.logs.push(line);
|
|
650
|
+
};
|
|
651
|
+
if (proc.stdout) void pumpLines(proc.stdout, prefix, sink);
|
|
652
|
+
if (proc.stderr) void pumpLines(proc.stderr, prefix, sink);
|
|
361
653
|
}
|
|
362
654
|
}
|
|
363
655
|
|
|
@@ -367,6 +659,8 @@ interface ModuleEntry {
|
|
|
367
659
|
proc?: SupervisedProc;
|
|
368
660
|
crashStamps: number[];
|
|
369
661
|
stopRequested?: boolean;
|
|
662
|
+
/** Bounded ring buffer of recent prefixed output lines (§6.5). */
|
|
663
|
+
logs: LogRingBuffer;
|
|
370
664
|
}
|
|
371
665
|
|
|
372
666
|
async function pumpLines(
|
|
@@ -402,7 +696,20 @@ async function pumpLines(
|
|
|
402
696
|
|
|
403
697
|
const defaultSpawnFn: SpawnFn = (req) => {
|
|
404
698
|
const spawnOpts: Parameters<typeof Bun.spawn>[1] = {
|
|
699
|
+
// Keep stdout/stderr explicitly piped — the supervisor pumps child output
|
|
700
|
+
// into hub's log (`pipeOutput`/`pumpLines`) + the per-module ring buffer.
|
|
701
|
+
// `detached: true` does NOT detach explicitly-piped stdio, so these stay
|
|
702
|
+
// wired even though the child gets its own process group below.
|
|
405
703
|
stdio: ["ignore", "pipe", "pipe"],
|
|
704
|
+
// Spawn in a fresh process group (pid == pgid) so `killFn` (→
|
|
705
|
+
// `process.kill(-pid, sig)`) reaches every descendant, not just the
|
|
706
|
+
// wrapper. Without this, wrapped startCmds like `pnpm exec tsx server.ts`
|
|
707
|
+
// leave the tsx grandchild bound to the port after stop → restart hits
|
|
708
|
+
// EADDRINUSE (hub#88). Mirrors `commands/lifecycle.ts`'s `defaultSpawner`,
|
|
709
|
+
// which set `detached: true` for exactly this reason. We do NOT `unref()`:
|
|
710
|
+
// the supervisor must stay attached for the lifecycle (watch `exited`,
|
|
711
|
+
// pump output, reap on stop).
|
|
712
|
+
detached: true,
|
|
406
713
|
// Inherit env so supervised module sees PATH, HOME, PARACHUTE_HOME, etc.
|
|
407
714
|
// Bun.spawn defaults to empty env — see api-modules-ops.ts:defaultRun.
|
|
408
715
|
// Per-call `req.env` overrides merge on top below.
|
|
@@ -413,3 +720,42 @@ const defaultSpawnFn: SpawnFn = (req) => {
|
|
|
413
720
|
const proc = Bun.spawn([...req.cmd], spawnOpts);
|
|
414
721
|
return proc as unknown as SupervisedProc;
|
|
415
722
|
};
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Map a depcheck `MissingDependencyWire` onto the `ModuleStartError` shape
|
|
726
|
+
* recorded on `ModuleState` (§6.5), stamping `at`. The wire's field names
|
|
727
|
+
* already match (binary / why / docs_url / install / sysadmin_hint), so this
|
|
728
|
+
* is a stamp + passthrough — keeping the supervisor's start-error surface
|
|
729
|
+
* identical to the services.json `ServiceEntryStartError` the detached path
|
|
730
|
+
* records, so the SPA renders the same install card from either source.
|
|
731
|
+
*/
|
|
732
|
+
function startErrorFromWire(wire: MissingDependencyWire, now: () => number): ModuleStartError {
|
|
733
|
+
return {
|
|
734
|
+
error_type: wire.error_type,
|
|
735
|
+
error_description: wire.error_description,
|
|
736
|
+
binary: wire.binary,
|
|
737
|
+
why: wire.why,
|
|
738
|
+
docs_url: wire.docs_url,
|
|
739
|
+
install: wire.install,
|
|
740
|
+
sysadmin_hint: wire.sysadmin_hint,
|
|
741
|
+
at: new Date(now()).toISOString(),
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
/**
|
|
746
|
+
* Production group-aware kill (hub#88). Sends `signal` to the entire process
|
|
747
|
+
* group rooted at `pid` (the negative-pid syscall) so a wrapped startCmd's
|
|
748
|
+
* grandchildren are reaped alongside the wrapper. Mirrors
|
|
749
|
+
* `commands/lifecycle.ts`'s `defaultKill`: on ESRCH the group is already gone
|
|
750
|
+
* (or the child predates the detached-spawn change and has no group with that
|
|
751
|
+
* pgid) — fall back to a bare-pid signal so the caller's intent still lands
|
|
752
|
+
* when there's a positive-pid process to receive it.
|
|
753
|
+
*/
|
|
754
|
+
export const defaultKillGroup: KillFn = (pid, signal) => {
|
|
755
|
+
try {
|
|
756
|
+
process.kill(-pid, signal);
|
|
757
|
+
} catch (err) {
|
|
758
|
+
if ((err as NodeJS.ErrnoException).code !== "ESRCH") throw err;
|
|
759
|
+
process.kill(pid, signal);
|
|
760
|
+
}
|
|
761
|
+
};
|