@openparachute/hub 0.6.2 → 0.6.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,39 @@
1
- import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
- import { type SpawnRequest, type SupervisedProc, Supervisor } from "../supervisor.ts";
1
+ import { describe, expect, test } from "bun:test";
2
+ import { Socket } from "node:net";
3
+ import {
4
+ type KillFn,
5
+ type SpawnRequest,
6
+ type SupervisedProc,
7
+ Supervisor,
8
+ defaultKillGroup,
9
+ } from "../supervisor.ts";
10
+
11
+ /**
12
+ * A `killFn` stub that records every (pid, signal) it receives and forwards
13
+ * the signal to the matching fake proc's own `kill` (so fakes that model
14
+ * "only SIGKILL terminates me" still work). Mirrors how production's
15
+ * `defaultKillGroup` would signal the process group, but stays in-process +
16
+ * deterministic. Tests assert on `.calls` to prove a group send (negative
17
+ * pid) happened. `register` wires a fake's pid → its `kill`.
18
+ */
19
+ function makeKillRecorder(): {
20
+ killFn: KillFn;
21
+ calls: Array<{ pid: number; signal: NodeJS.Signals | number }>;
22
+ register: (proc: FakeProc) => void;
23
+ } {
24
+ const calls: Array<{ pid: number; signal: NodeJS.Signals | number }> = [];
25
+ const byPid = new Map<number, FakeProc>();
26
+ return {
27
+ calls,
28
+ register: (proc) => byPid.set(proc.pid, proc),
29
+ killFn: (pid, signal) => {
30
+ calls.push({ pid, signal });
31
+ // Production signals the group via the negative pid; the supervisor
32
+ // passes the positive leader pid, so map back to the fake by |pid|.
33
+ byPid.get(Math.abs(pid))?.kill(signal);
34
+ },
35
+ };
36
+ }
3
37
 
4
38
  /**
5
39
  * Fake subprocess with controllable exited promise + injectable stdout
@@ -99,12 +133,20 @@ function tick(ms = 10): Promise<void> {
99
133
  return new Promise((r) => setTimeout(r, ms));
100
134
  }
101
135
 
136
+ /**
137
+ * No-op kill seam for tests that exercise `stop()`/`restart()` only as
138
+ * lifecycle cleanup and don't assert on the signal. Keeps the default
139
+ * `defaultKillGroup` (which calls the real `process.kill`) from firing a
140
+ * signal at a fake pid that could collide with a real process on the host.
141
+ */
142
+ const noopKill: KillFn = () => {};
143
+
102
144
  describe("Supervisor.start + status transitions", () => {
103
145
  test("transitions starting → running after spawn", async () => {
104
146
  const proc = makeFakeProc(123);
105
147
  const spawner = makeQueueSpawner();
106
148
  spawner.enqueue(proc);
107
- const sup = new Supervisor({ spawnFn: spawner.spawn });
149
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill });
108
150
 
109
151
  const state = await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
110
152
  expect(state.status).toBe("running");
@@ -123,7 +165,7 @@ describe("Supervisor.start + status transitions", () => {
123
165
  const proc = makeFakeProc(123);
124
166
  const spawner = makeQueueSpawner();
125
167
  spawner.enqueue(proc);
126
- const sup = new Supervisor({ spawnFn: spawner.spawn });
168
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill });
127
169
 
128
170
  await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
129
171
  await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
@@ -146,6 +188,7 @@ describe("Supervisor restart-on-crash", () => {
146
188
 
147
189
  const sup = new Supervisor({
148
190
  spawnFn: spawner.spawn,
191
+ killFn: noopKill,
149
192
  restartDelayMs: 0,
150
193
  sleep: () => Promise.resolve(),
151
194
  });
@@ -177,6 +220,7 @@ describe("Supervisor restart-on-crash", () => {
177
220
  const outputs: string[] = [];
178
221
  const sup = new Supervisor({
179
222
  spawnFn: spawner.spawn,
223
+ killFn: noopKill,
180
224
  maxRestarts: 3,
181
225
  restartDelayMs: 0,
182
226
  sleep: () => Promise.resolve(),
@@ -206,6 +250,7 @@ describe("Supervisor restart-on-crash", () => {
206
250
  let nowVal = 1_000_000;
207
251
  const sup = new Supervisor({
208
252
  spawnFn: spawner.spawn,
253
+ killFn: noopKill,
209
254
  maxRestarts: 2,
210
255
  restartWindowMs: 5_000,
211
256
  restartDelayMs: 0,
@@ -241,8 +286,11 @@ describe("Supervisor.stop", () => {
241
286
  const proc = makeFakeProc(101);
242
287
  const spawner = makeQueueSpawner();
243
288
  spawner.enqueue(proc);
289
+ const killer = makeKillRecorder();
290
+ killer.register(proc);
244
291
  const sup = new Supervisor({
245
292
  spawnFn: spawner.spawn,
293
+ killFn: killer.killFn,
246
294
  restartDelayMs: 0,
247
295
  sleep: () => Promise.resolve(),
248
296
  });
@@ -254,6 +302,9 @@ describe("Supervisor.stop", () => {
254
302
  const stopPromise = sup.stop("vault");
255
303
  expect(proc.killed).toBe(true);
256
304
  expect(proc.killSignal).toBe("SIGTERM");
305
+ // The signal is sent through the group-aware killFn seam (hub#88), with
306
+ // the child's leader pid.
307
+ expect(killer.calls).toEqual([{ pid: 101, signal: "SIGTERM" }]);
257
308
 
258
309
  proc.closeStreams();
259
310
  proc.resolveExit(0);
@@ -278,9 +329,12 @@ describe("Supervisor.stop", () => {
278
329
  };
279
330
  const spawner = makeQueueSpawner();
280
331
  spawner.enqueue(proc);
332
+ const killer = makeKillRecorder();
333
+ killer.register(proc);
281
334
  const outputs: string[] = [];
282
335
  const sup = new Supervisor({
283
336
  spawnFn: spawner.spawn,
337
+ killFn: killer.killFn,
284
338
  restartDelayMs: 0,
285
339
  sleep: () => Promise.resolve(),
286
340
  killTimeoutMs: 5, // Short timeout so the test doesn't pause for 5s.
@@ -291,8 +345,13 @@ describe("Supervisor.stop", () => {
291
345
  proc.closeStreams();
292
346
  await sup.stop("vault");
293
347
 
294
- // SIGTERM first, then SIGKILL after the timeout.
348
+ // SIGTERM first, then SIGKILL after the timeout — both via the group-aware
349
+ // killFn seam with the leader pid.
295
350
  expect(signals).toEqual(["SIGTERM", "SIGKILL"]);
351
+ expect(killer.calls).toEqual([
352
+ { pid: 101, signal: "SIGTERM" },
353
+ { pid: 101, signal: "SIGKILL" },
354
+ ]);
296
355
  expect(outputs.some((l) => l.includes("escalating to SIGKILL"))).toBe(true);
297
356
  expect(sup.get("vault")?.status).toBe("stopped");
298
357
  });
@@ -311,8 +370,11 @@ describe("Supervisor.stop", () => {
311
370
  const signals: (NodeJS.Signals | number | undefined)[] = [];
312
371
  const spawner = makeQueueSpawner();
313
372
  spawner.enqueue(proc);
373
+ const killer = makeKillRecorder();
374
+ killer.register(proc);
314
375
  const sup = new Supervisor({
315
376
  spawnFn: spawner.spawn,
377
+ killFn: killer.killFn,
316
378
  restartDelayMs: 0,
317
379
  sleep: () => Promise.resolve(),
318
380
  killTimeoutMs: 1000, // Plenty of headroom for the 5ms simulated exit.
@@ -332,10 +394,176 @@ describe("Supervisor.stop", () => {
332
394
  // Microtask ordering guarantees they both run before await returns.)
333
395
  expect(exitObservedBeforeReturn).toBe(true);
334
396
  expect(signals).toEqual(["SIGTERM"]);
397
+ expect(killer.calls).toEqual([{ pid: 101, signal: "SIGTERM" }]);
335
398
  expect(sup.get("vault")?.status).toBe("stopped");
336
399
  });
337
400
  });
338
401
 
402
+ describe("Supervisor process-group reaping (hub#88 — EADDRINUSE-on-restart regression)", () => {
403
+ /** Grab a free loopback port by opening + immediately closing a server. */
404
+ async function freeEphemeralPort(): Promise<number> {
405
+ const probe = Bun.serve({ port: 0, hostname: "127.0.0.1", fetch: () => new Response("x") });
406
+ // `port` is `number | undefined` in Bun's types, but a live Bun.serve()
407
+ // always has a bound port — assert it so the test stays type-clean.
408
+ const port = probe.port as number;
409
+ probe.stop(true);
410
+ return port;
411
+ }
412
+
413
+ /** Connect-probe loopback:port — true if something is accepting. */
414
+ function portListening(port: number): Promise<boolean> {
415
+ return new Promise((resolve) => {
416
+ const socket = new Socket();
417
+ let settled = false;
418
+ const done = (v: boolean) => {
419
+ if (settled) return;
420
+ settled = true;
421
+ socket.destroy();
422
+ resolve(v);
423
+ };
424
+ socket.setTimeout(500);
425
+ socket.once("connect", () => done(true));
426
+ socket.once("timeout", () => done(false));
427
+ socket.once("error", () => done(false));
428
+ socket.connect(port, "127.0.0.1");
429
+ });
430
+ }
431
+
432
+ /** Poll until `pred()` is true or the deadline passes. Returns the outcome. */
433
+ async function waitFor(pred: () => Promise<boolean>, timeoutMs: number): Promise<boolean> {
434
+ const deadline = Date.now() + timeoutMs;
435
+ while (Date.now() < deadline) {
436
+ if (await pred()) return true;
437
+ await tick(25);
438
+ }
439
+ return pred();
440
+ }
441
+
442
+ test("stop signals the whole process group, not just the leader (deterministic seam check)", async () => {
443
+ // The load-bearing contract: the supervisor hands `killFn` the child's
444
+ // LEADER pid, and production's `defaultKillGroup` translates that into a
445
+ // group send (`process.kill(-pid)`). A faithful stub records what `killFn`
446
+ // received; we assert the supervisor signalled with the leader pid so the
447
+ // group (wrapper + grandchildren) is reaped together. This is the
448
+ // deterministic counterpart to the real-process round-trip below.
449
+ const leader = makeFakeProc(4242);
450
+ const spawner = makeQueueSpawner();
451
+ spawner.enqueue(leader);
452
+ const killer = makeKillRecorder();
453
+ killer.register(leader);
454
+ const sup = new Supervisor({
455
+ spawnFn: spawner.spawn,
456
+ killFn: killer.killFn,
457
+ restartDelayMs: 0,
458
+ sleep: () => Promise.resolve(),
459
+ });
460
+ await sup.start({ short: "wrapped", cmd: ["sh", "-c", "tsx server.ts"] });
461
+
462
+ const stopP = sup.stop("wrapped");
463
+ leader.closeStreams();
464
+ leader.resolveExit(0);
465
+ await stopP;
466
+
467
+ // killFn received the LEADER pid (4242) — production's defaultKillGroup
468
+ // turns this into `process.kill(-4242)`, reaping the wrapper's whole group
469
+ // incl. the tsx grandchild that would otherwise hold the port (hub#88).
470
+ expect(killer.calls).toEqual([{ pid: 4242, signal: "SIGTERM" }]);
471
+ });
472
+
473
+ test("defaultKillGroup sends to the negative (group) pid, falling back to bare pid on ESRCH", () => {
474
+ // Drive the real defaultKillGroup against a stubbed process.kill so we can
475
+ // assert the group-vs-bare-pid syscall shape without signalling anything.
476
+ const realKill = process.kill;
477
+ const calls: Array<{ pid: number; signal: string | number }> = [];
478
+ try {
479
+ // Case 1: group send succeeds → only the negative-pid call happens.
480
+ (process as { kill: typeof process.kill }).kill = ((
481
+ pid: number,
482
+ signal?: string | number,
483
+ ) => {
484
+ calls.push({ pid, signal: signal ?? 0 });
485
+ return true;
486
+ }) as typeof process.kill;
487
+ defaultKillGroup(555, "SIGTERM");
488
+ expect(calls).toEqual([{ pid: -555, signal: "SIGTERM" }]);
489
+
490
+ // Case 2: group send throws ESRCH (no group / pre-detached child) →
491
+ // fall back to a bare-pid send. Mirrors lifecycle.defaultKill.
492
+ calls.length = 0;
493
+ (process as { kill: typeof process.kill }).kill = ((
494
+ pid: number,
495
+ signal?: string | number,
496
+ ) => {
497
+ calls.push({ pid, signal: signal ?? 0 });
498
+ if (pid < 0) {
499
+ const err = new Error("no such process") as NodeJS.ErrnoException;
500
+ err.code = "ESRCH";
501
+ throw err;
502
+ }
503
+ return true;
504
+ }) as typeof process.kill;
505
+ defaultKillGroup(777, "SIGKILL");
506
+ expect(calls).toEqual([
507
+ { pid: -777, signal: "SIGKILL" },
508
+ { pid: 777, signal: "SIGKILL" },
509
+ ]);
510
+
511
+ // Case 3: a non-ESRCH error propagates (we never swallow EPERM etc.).
512
+ (process as { kill: typeof process.kill }).kill = (() => {
513
+ const err = new Error("operation not permitted") as NodeJS.ErrnoException;
514
+ err.code = "EPERM";
515
+ throw err;
516
+ }) as typeof process.kill;
517
+ expect(() => defaultKillGroup(888, "SIGTERM")).toThrow("operation not permitted");
518
+ } finally {
519
+ process.kill = realKill;
520
+ }
521
+ });
522
+
523
+ test("a wrapped startCmd's grandchild is reaped on restart → fresh spawn re-binds the same port (no EADDRINUSE)", async () => {
524
+ const port = await freeEphemeralPort();
525
+ // Wrapper (the leader the supervisor spawns) backgrounds a *grandchild*
526
+ // bun listener that holds the port, then `wait`s on it. Pre-hub#88, the
527
+ // supervisor killed only the leader (`sh`) — the bun grandchild kept the
528
+ // socket, so the restart's fresh spawn hit EADDRINUSE. With group-spawn
529
+ // (`detached: true`) + group-kill (`defaultKillGroup`), the whole group
530
+ // dies and the port frees.
531
+ const listener = [
532
+ "bun",
533
+ "-e",
534
+ `Bun.serve({ port: ${port}, hostname: "127.0.0.1", fetch: () => new Response("ok") }); setInterval(() => {}, 1e9);`,
535
+ ];
536
+ const wrapper = ["sh", "-c", `${listener.map((a) => `'${a}'`).join(" ")} & wait`];
537
+
538
+ // Real spawnFn + real defaultKillGroup (no seams) — this is the whole point.
539
+ const sup = new Supervisor({ restartDelayMs: 50 });
540
+ try {
541
+ await sup.start({ short: "wrapped", cmd: wrapper });
542
+
543
+ // Grandchild binds the port.
544
+ expect(await waitFor(() => portListening(port), 8000)).toBe(true);
545
+
546
+ // Restart: stop (group-kill reaps the grandchild) → wait for the port
547
+ // to FREE → fresh spawn re-binds it. If the grandchild leaked, the
548
+ // fresh listener would EADDRINUSE-crash and the port would either stay
549
+ // held by the orphan or flap — the round-trip below would fail.
550
+ const restarted = await sup.restart("wrapped");
551
+ expect(restarted?.status).toBe("running");
552
+
553
+ // The port answers again under the fresh spawn — proves no EADDRINUSE.
554
+ expect(await waitFor(() => portListening(port), 8000)).toBe(true);
555
+
556
+ // The discriminating signal: after a final stop, the port FREES. If the
557
+ // group-kill failed to reach the bun grandchild, an orphan would keep
558
+ // the socket and this would never go quiet.
559
+ await sup.stop("wrapped");
560
+ expect(await waitFor(async () => !(await portListening(port)), 8000)).toBe(true);
561
+ } finally {
562
+ await sup.stop("wrapped").catch(() => {});
563
+ }
564
+ }, 20_000);
565
+ });
566
+
339
567
  describe("Supervisor.restart", () => {
340
568
  test("stops the current process and spawns fresh", async () => {
341
569
  const first = makeFakeProc(101);
@@ -346,6 +574,7 @@ describe("Supervisor.restart", () => {
346
574
 
347
575
  const sup = new Supervisor({
348
576
  spawnFn: spawner.spawn,
577
+ killFn: noopKill,
349
578
  restartDelayMs: 0,
350
579
  sleep: () => Promise.resolve(),
351
580
  });
@@ -376,6 +605,7 @@ describe("Supervisor output multiplexing", () => {
376
605
  const outputs: string[] = [];
377
606
  const sup = new Supervisor({
378
607
  spawnFn: spawner.spawn,
608
+ killFn: noopKill,
379
609
  output: (line) => outputs.push(line),
380
610
  });
381
611
  await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
@@ -400,6 +630,7 @@ describe("Supervisor output multiplexing", () => {
400
630
  const outputs: string[] = [];
401
631
  const sup = new Supervisor({
402
632
  spawnFn: spawner.spawn,
633
+ killFn: noopKill,
403
634
  output: (line) => outputs.push(line),
404
635
  });
405
636
  await sup.start({ short: "scribe", cmd: ["bun", "scribe.ts"] });
@@ -427,6 +658,7 @@ describe("Supervisor output multiplexing", () => {
427
658
  const outputs: string[] = [];
428
659
  const sup = new Supervisor({
429
660
  spawnFn: spawner.spawn,
661
+ killFn: noopKill,
430
662
  output: (line) => outputs.push(line),
431
663
  });
432
664
  await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
@@ -450,6 +682,239 @@ describe("Supervisor output multiplexing", () => {
450
682
  });
451
683
  });
452
684
 
685
+ describe("Supervisor per-module log ring buffer (§6.5)", () => {
686
+ test("replays output emitted BEFORE the reader connects (boot/crash-line capture)", async () => {
687
+ const proc = makeFakeProc(101);
688
+ const spawner = makeQueueSpawner();
689
+ spawner.enqueue(proc);
690
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill });
691
+ await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
692
+
693
+ // Lines arrive (incl. a "crash cause") BEFORE anyone taps the logs.
694
+ proc.emitStdout("booting vault\n");
695
+ proc.emitStderr("FATAL: port already in use\n");
696
+ await tick(20);
697
+
698
+ // Reader connects AFTER the fact — the buffer must still have them.
699
+ const logs = sup.logs("vault");
700
+ expect(logs).toEqual(["[vault] booting vault\n", "[vault] FATAL: port already in use\n"]);
701
+
702
+ proc.closeStreams();
703
+ sup.stop("vault");
704
+ proc.resolveExit(0);
705
+ });
706
+
707
+ test("buffer is bounded — oldest lines drop once the byte cap is exceeded", async () => {
708
+ const proc = makeFakeProc(101);
709
+ const spawner = makeQueueSpawner();
710
+ spawner.enqueue(proc);
711
+ // Tiny cap so a handful of lines overflows it. Each prefixed line below is
712
+ // well over a few bytes, so the cap evicts oldest-first.
713
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill, logBufferBytes: 40 });
714
+ await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
715
+
716
+ proc.emitStdout("line-1\n");
717
+ proc.emitStdout("line-2\n");
718
+ proc.emitStdout("line-3\n");
719
+ proc.emitStdout("line-4\n");
720
+ await tick(20);
721
+
722
+ const logs = sup.logs("vault") ?? [];
723
+ // The oldest line(s) were evicted; the newest survives.
724
+ expect(logs.at(-1)).toBe("[vault] line-4\n");
725
+ expect(logs).not.toContain("[vault] line-1\n");
726
+ // Total buffered bytes stay at/under the cap (modulo the always-kept tail).
727
+ const totalBytes = logs.reduce((n, l) => n + Buffer.byteLength(l), 0);
728
+ expect(totalBytes).toBeLessThanOrEqual(40);
729
+
730
+ proc.closeStreams();
731
+ sup.stop("vault");
732
+ proc.resolveExit(0);
733
+ });
734
+
735
+ test("logs() returns undefined for a not-supervised module (404 contract)", () => {
736
+ const sup = new Supervisor({ spawnFn: () => makeFakeProc(0), killFn: noopKill });
737
+ expect(sup.logs("nope")).toBeUndefined();
738
+ });
739
+
740
+ test("buffer survives a crash-respawn so the crash cause stays replayable", async () => {
741
+ const first = makeFakeProc(101);
742
+ const second = makeFakeProc(102);
743
+ const spawner = makeQueueSpawner();
744
+ spawner.enqueue(first);
745
+ spawner.enqueue(second);
746
+ const sup = new Supervisor({
747
+ spawnFn: spawner.spawn,
748
+ killFn: noopKill,
749
+ restartDelayMs: 0,
750
+ sleep: () => Promise.resolve(),
751
+ });
752
+ await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
753
+
754
+ first.emitStderr("crash cause: boom\n");
755
+ await tick(20);
756
+ // Crash → supervisor respawns within budget (reuses the same entry/buffer).
757
+ first.closeStreams();
758
+ first.resolveExit(1);
759
+ await tick();
760
+
761
+ second.emitStdout("recovered\n");
762
+ await tick(20);
763
+
764
+ const logs = sup.logs("vault") ?? [];
765
+ // Both the pre-crash cause and the post-respawn line are present.
766
+ expect(logs).toContain("[vault] crash cause: boom\n");
767
+ expect(logs).toContain("[vault] recovered\n");
768
+
769
+ second.closeStreams();
770
+ sup.stop("vault");
771
+ second.resolveExit(0);
772
+ });
773
+ });
774
+
775
+ describe("Supervisor port-readiness + structured start-error (§6.5)", () => {
776
+ const reqWithPort = (short: string, port: number): SpawnRequest => ({
777
+ short,
778
+ cmd: ["parachute-vault", "serve"],
779
+ env: { PORT: String(port) },
780
+ });
781
+
782
+ test("(a) module that never binds its port → started-but-unbound start-error, status stays running", async () => {
783
+ const proc = makeFakeProc(101);
784
+ const spawner = makeQueueSpawner();
785
+ spawner.enqueue(proc);
786
+ // portListening always false → the readiness gate times out. Short window +
787
+ // instant sleep so the test doesn't actually wait.
788
+ const sup = new Supervisor({
789
+ spawnFn: spawner.spawn,
790
+ killFn: noopKill,
791
+ portListening: async () => false,
792
+ startReadyMs: 50,
793
+ startReadyPollMs: 5,
794
+ sleep: () => Promise.resolve(),
795
+ });
796
+ const state = await sup.start(reqWithPort("vault", 1940));
797
+
798
+ // Process IS up (we don't break the running enum — proxy-state/SPA read it)...
799
+ expect(state.status).toBe("running");
800
+ // ...but the structured start-error explains it's not actually listening.
801
+ expect(state.startError?.error_type).toBe("started_but_unbound");
802
+ expect(state.startError?.error_description).toContain("1940");
803
+ expect(state.startError?.at).toMatch(/^\d{4}-\d{2}-\d{2}T/);
804
+
805
+ proc.closeStreams();
806
+ sup.stop("vault");
807
+ proc.resolveExit(0);
808
+ });
809
+
810
+ test("(b) module that binds promptly → no start-error, clean running", async () => {
811
+ const proc = makeFakeProc(102);
812
+ const spawner = makeQueueSpawner();
813
+ spawner.enqueue(proc);
814
+ let probes = 0;
815
+ const sup = new Supervisor({
816
+ spawnFn: spawner.spawn,
817
+ killFn: noopKill,
818
+ // First probe says "not yet", second says "bound" — exercises the poll
819
+ // loop rather than an instant hit.
820
+ portListening: async () => ++probes >= 2,
821
+ startReadyMs: 1000,
822
+ startReadyPollMs: 5,
823
+ sleep: () => Promise.resolve(),
824
+ });
825
+ const state = await sup.start(reqWithPort("vault", 1940));
826
+
827
+ expect(state.status).toBe("running");
828
+ expect(state.startError).toBeUndefined();
829
+ expect(probes).toBeGreaterThanOrEqual(2);
830
+
831
+ proc.closeStreams();
832
+ sup.stop("vault");
833
+ proc.resolveExit(0);
834
+ });
835
+
836
+ test("(c) preflight MissingDependencyError → structured start-error, NO spawn", async () => {
837
+ const spawner = makeQueueSpawner();
838
+ // No proc enqueued — if start() tried to spawn, the queue spawner throws.
839
+ const sup = new Supervisor({
840
+ spawnFn: spawner.spawn,
841
+ killFn: noopKill,
842
+ which: () => null, // binary not resolvable → preflight throws
843
+ portListening: async () => true,
844
+ startReadyMs: 50,
845
+ sleep: () => Promise.resolve(),
846
+ });
847
+ const state = await sup.start(reqWithPort("vault", 1940));
848
+
849
+ // Doomed spawn aborted: status crashed, structured missing-dependency error.
850
+ expect(state.status).toBe("crashed");
851
+ expect(state.startError?.error_type).toBe("missing_dependency");
852
+ expect(state.startError?.binary).toBe("parachute-vault");
853
+ // Crucially, the supervisor never called the spawner.
854
+ expect(spawner.calls).toHaveLength(0);
855
+ });
856
+
857
+ test("a clean re-start clears a prior started-but-unbound start-error", async () => {
858
+ const first = makeFakeProc(201);
859
+ const second = makeFakeProc(202);
860
+ const spawner = makeQueueSpawner();
861
+ spawner.enqueue(first);
862
+ spawner.enqueue(second);
863
+ let bound = false;
864
+ const sup = new Supervisor({
865
+ spawnFn: spawner.spawn,
866
+ killFn: noopKill,
867
+ portListening: async () => bound,
868
+ startReadyMs: 30,
869
+ startReadyPollMs: 5,
870
+ sleep: () => Promise.resolve(),
871
+ });
872
+
873
+ // First start never binds → start-error recorded.
874
+ const s1 = await sup.start(reqWithPort("vault", 1940));
875
+ expect(s1.startError?.error_type).toBe("started_but_unbound");
876
+
877
+ // Restart with the port now binding → the stale error clears.
878
+ bound = true;
879
+ const restartP = sup.restart("vault");
880
+ first.closeStreams();
881
+ first.resolveExit(0);
882
+ const s2 = await restartP;
883
+ expect(s2?.status).toBe("running");
884
+ expect(s2?.startError).toBeUndefined();
885
+
886
+ second.closeStreams();
887
+ sup.stop("vault");
888
+ second.resolveExit(0);
889
+ });
890
+
891
+ test("readiness gate is skipped when the request carries no PORT", async () => {
892
+ const proc = makeFakeProc(101);
893
+ const spawner = makeQueueSpawner();
894
+ spawner.enqueue(proc);
895
+ let probed = false;
896
+ const sup = new Supervisor({
897
+ spawnFn: spawner.spawn,
898
+ killFn: noopKill,
899
+ portListening: async () => {
900
+ probed = true;
901
+ return false;
902
+ },
903
+ startReadyMs: 1000,
904
+ sleep: () => Promise.resolve(),
905
+ });
906
+ // No env.PORT → nothing to probe; start returns running, no error, no probe.
907
+ const state = await sup.start({ short: "cli-only", cmd: ["parachute-vault", "serve"] });
908
+ expect(state.status).toBe("running");
909
+ expect(state.startError).toBeUndefined();
910
+ expect(probed).toBe(false);
911
+
912
+ proc.closeStreams();
913
+ sup.stop("cli-only");
914
+ proc.resolveExit(0);
915
+ });
916
+ });
917
+
453
918
  describe("Supervisor.list + get", () => {
454
919
  test("list returns snapshot of all supervised modules", async () => {
455
920
  const vault = makeFakeProc(101);
@@ -457,7 +922,7 @@ describe("Supervisor.list + get", () => {
457
922
  const spawner = makeQueueSpawner();
458
923
  spawner.enqueue(vault);
459
924
  spawner.enqueue(scribe);
460
- const sup = new Supervisor({ spawnFn: spawner.spawn });
925
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill });
461
926
 
462
927
  await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
463
928
  await sup.start({ short: "scribe", cmd: ["bun", "scribe.ts"] });