@openparachute/hub 0.6.4-rc.6 → 0.6.4-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/hub",
3
- "version": "0.6.4-rc.6",
3
+ "version": "0.6.4-rc.7",
4
4
  "description": "parachute — the local hub for the Parachute ecosystem (discovery, ports, lifecycle, soon OAuth).",
5
5
  "license": "AGPL-3.0",
6
6
  "publishConfig": {
@@ -40,6 +40,9 @@ describe("install", () => {
40
40
  const calls: string[][] = [];
41
41
  const logs: string[] = [];
42
42
  const code = await install("vault", {
43
+ // --interactive: opt back into vault's full `init` (#579). The light
44
+ // default skips it; this test exercises the interactive path.
45
+ interactive: true,
43
46
  runner: async (cmd) => {
44
47
  calls.push([...cmd]);
45
48
  return 0;
@@ -67,6 +70,8 @@ describe("install", () => {
67
70
  try {
68
71
  const logs: string[] = [];
69
72
  const code = await install("vault", {
73
+ // --interactive: this test asserts init wrote the authoritative entry.
74
+ interactive: true,
70
75
  runner: async (cmd) => {
71
76
  if (cmd[0] === "parachute-vault") {
72
77
  upsertService(
@@ -133,6 +138,8 @@ describe("install", () => {
133
138
  const calls: string[][] = [];
134
139
  const logs: string[] = [];
135
140
  const code = await install("vault", {
141
+ // --interactive: this test asserts init still ran after the bun quirk.
142
+ interactive: true,
136
143
  runner: async (cmd) => {
137
144
  calls.push([...cmd]);
138
145
  // `bun add -g` exits 1; `parachute-vault init` succeeds.
@@ -796,12 +803,14 @@ describe("install", () => {
796
803
  }
797
804
  });
798
805
 
799
- test("linked vault still runs init and defers to init's manifest write", async () => {
806
+ test("linked vault still runs init and defers to init's manifest write (--interactive)", async () => {
800
807
  const { path, cleanup } = makeTempPath();
801
808
  try {
802
809
  const calls: string[][] = [];
803
810
  const logs: string[] = [];
804
811
  const code = await install("vault", {
812
+ // --interactive: this test asserts vault's own init wrote the entry.
813
+ interactive: true,
805
814
  runner: async (cmd) => {
806
815
  calls.push([...cmd]);
807
816
  if (cmd[0] === "parachute-vault") {
@@ -1722,6 +1731,330 @@ describe("install", () => {
1722
1731
  });
1723
1732
  });
1724
1733
 
1734
+ describe("#579 / #580 item 1 — light manual install + guidance", () => {
1735
+ test("default vault install skips the interactive init (no parachute-vault init runs)", async () => {
1736
+ const { path, cleanup } = makeTempPath();
1737
+ try {
1738
+ const calls: string[][] = [];
1739
+ const logs: string[] = [];
1740
+ const code = await install("vault", {
1741
+ // No `interactive` flag → the light default path.
1742
+ runner: async (cmd) => {
1743
+ calls.push([...cmd]);
1744
+ return 0;
1745
+ },
1746
+ manifestPath: path,
1747
+ startService: async () => 0,
1748
+ isLinked: () => false,
1749
+ portProbe: async () => false,
1750
+ log: (l) => logs.push(l),
1751
+ });
1752
+ expect(code).toBe(0);
1753
+ // bun add ran; vault's interactive init did NOT.
1754
+ expect(calls).toEqual([["bun", "add", "-g", "@openparachute/vault"]]);
1755
+ expect(calls).not.toContainEqual(["parachute-vault", "init"]);
1756
+ // The skip is announced + points at the admin UI / --interactive.
1757
+ expect(logs.join("\n")).toMatch(/skipping parachute-vault init/);
1758
+ } finally {
1759
+ cleanup();
1760
+ }
1761
+ });
1762
+
1763
+ test("default vault install still starts the module under the supervisor", async () => {
1764
+ const { path, cleanup } = makeTempPath();
1765
+ try {
1766
+ const startCalls: string[] = [];
1767
+ const code = await install("vault", {
1768
+ runner: async () => 0,
1769
+ manifestPath: path,
1770
+ startService: async (short) => {
1771
+ startCalls.push(short);
1772
+ return 0;
1773
+ },
1774
+ isLinked: () => false,
1775
+ portProbe: async () => false,
1776
+ log: () => {},
1777
+ });
1778
+ expect(code).toBe(0);
1779
+ // Light ≠ no-start: the supervisor owns the lifecycle; vault is started.
1780
+ expect(startCalls).toEqual(["vault"]);
1781
+ } finally {
1782
+ cleanup();
1783
+ }
1784
+ });
1785
+
1786
+ test("guidance block prints the admin URL + extras on a supervised box (loopback)", async () => {
1787
+ const { path, cleanup } = makeTempPath();
1788
+ try {
1789
+ const logs: string[] = [];
1790
+ const code = await install("vault", {
1791
+ runner: async () => 0,
1792
+ manifestPath: path,
1793
+ startService: async () => 0,
1794
+ isLinked: () => false,
1795
+ portProbe: async () => false,
1796
+ log: (l) => logs.push(l),
1797
+ // Deterministic supervised-box context: hub unit installed, not exposed.
1798
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
1799
+ });
1800
+ expect(code).toBe(0);
1801
+ const joined = logs.join("\n");
1802
+ expect(joined).toMatch(/Manage \+ create vaults in the admin UI/);
1803
+ expect(joined).toMatch(/http:\/\/127\.0\.0\.1:1939\/admin\//);
1804
+ expect(joined).toMatch(/parachute-vault mcp-install/);
1805
+ expect(joined).toMatch(/--interactive/);
1806
+ // It does NOT mint a token or wire MCP — just points there.
1807
+ expect(joined).toMatch(/Mint an API token.*admin UI/);
1808
+ } finally {
1809
+ cleanup();
1810
+ }
1811
+ });
1812
+
1813
+ test("guidance uses the exposed public FQDN when the hub is exposed", async () => {
1814
+ const { path, cleanup } = makeTempPath();
1815
+ try {
1816
+ const logs: string[] = [];
1817
+ await install("vault", {
1818
+ runner: async () => 0,
1819
+ manifestPath: path,
1820
+ startService: async () => 0,
1821
+ isLinked: () => false,
1822
+ portProbe: async () => false,
1823
+ log: (l) => logs.push(l),
1824
+ guidanceCtx: {
1825
+ hubUnitInstalled: true,
1826
+ exposeState: {
1827
+ version: 1,
1828
+ layer: "public",
1829
+ mode: "path",
1830
+ canonicalFqdn: "friends.parachute.computer",
1831
+ port: 1939,
1832
+ funnel: false,
1833
+ entries: [],
1834
+ },
1835
+ hubPort: 1939,
1836
+ },
1837
+ });
1838
+ const joined = logs.join("\n");
1839
+ expect(joined).toMatch(/https:\/\/friends\.parachute\.computer\/admin\//);
1840
+ expect(joined).not.toMatch(/127\.0\.0\.1/);
1841
+ } finally {
1842
+ cleanup();
1843
+ }
1844
+ });
1845
+
1846
+ test("no guidance block on a non-supervised box (no hub unit)", async () => {
1847
+ const { path, cleanup } = makeTempPath();
1848
+ try {
1849
+ const logs: string[] = [];
1850
+ await install("vault", {
1851
+ runner: async () => 0,
1852
+ manifestPath: path,
1853
+ startService: async () => 0,
1854
+ isLinked: () => false,
1855
+ portProbe: async () => false,
1856
+ log: (l) => logs.push(l),
1857
+ guidanceCtx: { hubUnitInstalled: false, exposeState: undefined, hubPort: 1939 },
1858
+ });
1859
+ const joined = logs.join("\n");
1860
+ expect(joined).not.toMatch(/Manage \+ create vaults in the admin UI/);
1861
+ } finally {
1862
+ cleanup();
1863
+ }
1864
+ });
1865
+
1866
+ test("--interactive runs vault init and suppresses the guidance block", async () => {
1867
+ const { path, cleanup } = makeTempPath();
1868
+ try {
1869
+ const calls: string[][] = [];
1870
+ const logs: string[] = [];
1871
+ const code = await install("vault", {
1872
+ interactive: true,
1873
+ runner: async (cmd) => {
1874
+ calls.push([...cmd]);
1875
+ return 0;
1876
+ },
1877
+ manifestPath: path,
1878
+ startService: async () => 0,
1879
+ isLinked: () => false,
1880
+ portProbe: async () => false,
1881
+ log: (l) => logs.push(l),
1882
+ // Even on a supervised box, --interactive means the service's own
1883
+ // init owns the next-steps surface — no light guidance block.
1884
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
1885
+ });
1886
+ expect(code).toBe(0);
1887
+ expect(calls).toContainEqual(["parachute-vault", "init"]);
1888
+ expect(logs.join("\n")).not.toMatch(/Manage \+ create vaults in the admin UI/);
1889
+ } finally {
1890
+ cleanup();
1891
+ }
1892
+ });
1893
+
1894
+ test("scribe (no interactive init) is unaffected — no skip log, no vault guidance", async () => {
1895
+ const { path, cleanup } = makeTempPath();
1896
+ const configDir = join(path, "..");
1897
+ try {
1898
+ const logs: string[] = [];
1899
+ const code = await install("scribe", {
1900
+ runner: async () => 0,
1901
+ manifestPath: path,
1902
+ configDir,
1903
+ startService: async () => 0,
1904
+ isLinked: () => false,
1905
+ portProbe: async () => false,
1906
+ scribeAvailability: { kind: "not-tty" },
1907
+ log: (l) => logs.push(l),
1908
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
1909
+ });
1910
+ expect(code).toBe(0);
1911
+ const joined = logs.join("\n");
1912
+ expect(joined).not.toMatch(/skipping/);
1913
+ expect(joined).not.toMatch(/Manage \+ create vaults in the admin UI/);
1914
+ } finally {
1915
+ cleanup();
1916
+ }
1917
+ });
1918
+ });
1919
+
1920
+ describe("#580 item 3 — install-time stale-unit sweep", () => {
1921
+ test("sweeps stale per-module units before starting on a supervised box", async () => {
1922
+ const { path, cleanup } = makeTempPath();
1923
+ try {
1924
+ const logs: string[] = [];
1925
+ let sweepCalls = 0;
1926
+ const code = await install("vault", {
1927
+ runner: async () => 0,
1928
+ manifestPath: path,
1929
+ startService: async () => 0,
1930
+ isLinked: () => false,
1931
+ portProbe: async () => false,
1932
+ log: (l) => logs.push(l),
1933
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
1934
+ disableStaleModuleUnits: () => {
1935
+ sweepCalls += 1;
1936
+ return {
1937
+ actions: [
1938
+ {
1939
+ short: "vault",
1940
+ kind: "launchd",
1941
+ unit: "computer.parachute.vault",
1942
+ result: "disabled",
1943
+ messages: [" ✓ Disabled stale computer.parachute.vault"],
1944
+ },
1945
+ ],
1946
+ };
1947
+ },
1948
+ });
1949
+ expect(code).toBe(0);
1950
+ expect(sweepCalls).toBe(1);
1951
+ expect(logs.join("\n")).toMatch(
1952
+ /Swept 1 stale per-module autostart unit\(s\).*computer\.parachute\.vault/,
1953
+ );
1954
+ } finally {
1955
+ cleanup();
1956
+ }
1957
+ });
1958
+
1959
+ test("does NOT sweep on a non-supervised box (no hub unit)", async () => {
1960
+ const { path, cleanup } = makeTempPath();
1961
+ try {
1962
+ let sweepCalls = 0;
1963
+ await install("vault", {
1964
+ runner: async () => 0,
1965
+ manifestPath: path,
1966
+ startService: async () => 0,
1967
+ isLinked: () => false,
1968
+ portProbe: async () => false,
1969
+ log: () => {},
1970
+ guidanceCtx: { hubUnitInstalled: false, exposeState: undefined, hubPort: 1939 },
1971
+ disableStaleModuleUnits: () => {
1972
+ sweepCalls += 1;
1973
+ return { actions: [] };
1974
+ },
1975
+ });
1976
+ // No supervised hub → the per-module unit is the legitimate lifecycle;
1977
+ // the sweep must not run.
1978
+ expect(sweepCalls).toBe(0);
1979
+ } finally {
1980
+ cleanup();
1981
+ }
1982
+ });
1983
+
1984
+ test("does NOT sweep under --no-start (caller owns the process model)", async () => {
1985
+ const { path, cleanup } = makeTempPath();
1986
+ try {
1987
+ let sweepCalls = 0;
1988
+ await install("vault", {
1989
+ runner: async () => 0,
1990
+ manifestPath: path,
1991
+ startService: async () => 0,
1992
+ isLinked: () => false,
1993
+ portProbe: async () => false,
1994
+ noStart: true,
1995
+ log: () => {},
1996
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
1997
+ disableStaleModuleUnits: () => {
1998
+ sweepCalls += 1;
1999
+ return { actions: [] };
2000
+ },
2001
+ });
2002
+ expect(sweepCalls).toBe(0);
2003
+ } finally {
2004
+ cleanup();
2005
+ }
2006
+ });
2007
+
2008
+ test("does NOT sweep under --no-create (wizard defers the start; N2)", async () => {
2009
+ // Parallel to the --no-start guard above. `noCreate` (the wizard's
2010
+ // install path) also suppresses the start — and the sweep touches real
2011
+ // launchctl/systemctl on a live box, so it must NOT fire when we're not
2012
+ // about to start the module. A silent regression here would have the
2013
+ // wizard disabling operator units mid-init.
2014
+ const { path, cleanup } = makeTempPath();
2015
+ try {
2016
+ let sweepCalls = 0;
2017
+ await install("vault", {
2018
+ runner: async () => 0,
2019
+ manifestPath: path,
2020
+ startService: async () => 0,
2021
+ isLinked: () => false,
2022
+ portProbe: async () => false,
2023
+ noCreate: true,
2024
+ log: () => {},
2025
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
2026
+ disableStaleModuleUnits: () => {
2027
+ sweepCalls += 1;
2028
+ return { actions: [] };
2029
+ },
2030
+ });
2031
+ expect(sweepCalls).toBe(0);
2032
+ } finally {
2033
+ cleanup();
2034
+ }
2035
+ });
2036
+
2037
+ test("a clean no-op sweep (nothing stale) logs nothing extra", async () => {
2038
+ const { path, cleanup } = makeTempPath();
2039
+ try {
2040
+ const logs: string[] = [];
2041
+ await install("vault", {
2042
+ runner: async () => 0,
2043
+ manifestPath: path,
2044
+ startService: async () => 0,
2045
+ isLinked: () => false,
2046
+ portProbe: async () => false,
2047
+ log: (l) => logs.push(l),
2048
+ guidanceCtx: { hubUnitInstalled: true, exposeState: undefined, hubPort: 1939 },
2049
+ disableStaleModuleUnits: () => ({ actions: [] }),
2050
+ });
2051
+ expect(logs.join("\n")).not.toMatch(/Swept .* stale per-module/);
2052
+ } finally {
2053
+ cleanup();
2054
+ }
2055
+ });
2056
+ });
2057
+
1725
2058
  describe("hub#573 — install auto-start converges on supervised detection", () => {
1726
2059
  test("the default start opts opt into real supervisor detection + the migrate offer", () => {
1727
2060
  const log = () => {};
@@ -178,6 +178,203 @@ describe("Supervisor.start + status transitions", () => {
178
178
  });
179
179
  });
180
180
 
181
+ describe("Supervisor port-squatter detection (#580 item 4)", () => {
182
+ test("foreign pid on the module port → port_squatter error, no spawn", async () => {
183
+ const spawner = makeQueueSpawner();
184
+ // Note: NOTHING enqueued — if `start` tried to spawn, the spawner throws.
185
+ const sup = new Supervisor({
186
+ spawnFn: spawner.spawn,
187
+ killFn: noopKill,
188
+ // A rogue pid 1921 holds :1940; it is NOT one of our children.
189
+ pidOnPort: (port) => (port === 1940 ? 1921 : undefined),
190
+ ownerOfPid: (pid) => (pid === 1921 ? "bun /x/vault/src/server.ts" : undefined),
191
+ });
192
+
193
+ const state = await sup.start({
194
+ short: "vault",
195
+ cmd: ["bun", "vault.ts"],
196
+ env: { PORT: "1940" },
197
+ });
198
+
199
+ // No spawn attempted (spawner.calls empty), module is `crashed` with the
200
+ // structured, actionable squatter error.
201
+ expect(spawner.calls).toHaveLength(0);
202
+ expect(state.status).toBe("crashed");
203
+ expect(state.pid).toBeUndefined();
204
+ expect(state.startError?.error_type).toBe("port_squatter");
205
+ expect(state.startError?.error_description).toContain("port 1940 is held by pid 1921");
206
+ expect(state.startError?.error_description).toContain("bun /x/vault/src/server.ts");
207
+ expect(state.startError?.error_description).toContain("kill 1921 && parachute start vault");
208
+ });
209
+
210
+ test("squatter message omits cmdline when ownerOfPid can't read it", async () => {
211
+ const spawner = makeQueueSpawner();
212
+ const sup = new Supervisor({
213
+ spawnFn: spawner.spawn,
214
+ killFn: noopKill,
215
+ pidOnPort: () => 4242,
216
+ ownerOfPid: () => undefined,
217
+ });
218
+
219
+ const state = await sup.start({
220
+ short: "vault",
221
+ cmd: ["bun", "vault.ts"],
222
+ env: { PORT: "1940" },
223
+ });
224
+ expect(state.startError?.error_type).toBe("port_squatter");
225
+ expect(state.startError?.error_description).toContain("port 1940 is held by pid 4242");
226
+ expect(state.startError?.error_description).toContain("kill 4242 && parachute start vault");
227
+ // No parenthetical cmdline.
228
+ expect(state.startError?.error_description).not.toContain("(");
229
+ });
230
+
231
+ test("free port → no squatter error, module spawns normally", async () => {
232
+ const proc = makeFakeProc(500);
233
+ const spawner = makeQueueSpawner();
234
+ spawner.enqueue(proc);
235
+ const sup = new Supervisor({
236
+ spawnFn: spawner.spawn,
237
+ killFn: noopKill,
238
+ pidOnPort: () => undefined, // port free / detection unavailable
239
+ });
240
+
241
+ const state = await sup.start({
242
+ short: "vault",
243
+ cmd: ["bun", "vault.ts"],
244
+ env: { PORT: "1940" },
245
+ });
246
+ expect(spawner.calls).toHaveLength(1);
247
+ expect(state.status).toBe("running");
248
+ expect(state.startError).toBeUndefined();
249
+
250
+ proc.closeStreams();
251
+ sup.stop("vault");
252
+ proc.resolveExit(0);
253
+ });
254
+
255
+ test("port held by one of OUR OWN children is not a squatter", async () => {
256
+ // vault is up on pid 700 holding :1940; starting a sibling (scribe) that
257
+ // somehow reports the same holder pid must NOT be flagged — the holder is
258
+ // a supervised child, not a foreign rogue.
259
+ const vaultProc = makeFakeProc(700);
260
+ const scribeProc = makeFakeProc(701);
261
+ const spawner = makeQueueSpawner();
262
+ spawner.enqueue(vaultProc);
263
+ spawner.enqueue(scribeProc);
264
+ const sup = new Supervisor({
265
+ spawnFn: spawner.spawn,
266
+ killFn: noopKill,
267
+ // vault's port (1940) is free so vault spawns (pid 700). scribe's port
268
+ // (1943) then reports vault's pid 700 as the holder — a supervised child,
269
+ // NOT a foreign rogue, so scribe must still spawn.
270
+ pidOnPort: (port) => (port === 1943 ? 700 : undefined),
271
+ });
272
+
273
+ await sup.start({ short: "vault", cmd: ["bun", "vault.ts"], env: { PORT: "1940" } });
274
+ const scribe = await sup.start({
275
+ short: "scribe",
276
+ cmd: ["bun", "scribe.ts"],
277
+ env: { PORT: "1943" },
278
+ });
279
+ // scribe spawned (no false-positive squatter); both children spawned.
280
+ expect(spawner.calls).toHaveLength(2);
281
+ expect(scribe.status).toBe("running");
282
+ expect(scribe.startError).toBeUndefined();
283
+
284
+ vaultProc.closeStreams();
285
+ scribeProc.closeStreams();
286
+ sup.stop("vault");
287
+ sup.stop("scribe");
288
+ vaultProc.resolveExit(0);
289
+ scribeProc.resolveExit(0);
290
+ });
291
+
292
+ test("a CRASHED child's stale pid does NOT vouch for a port holder (N1 liveness)", async () => {
293
+ // vault spawns (pid 800), then crashes for good (maxRestarts: 1). Its entry
294
+ // keeps `proc.pid === 800` (never cleared on exit) but status is `crashed`.
295
+ // A fresh `start` where pid 800 now holds :1940 must be flagged as a
296
+ // SQUATTER — the stale pid of a dead child must not excuse the holder.
297
+ const first = makeFakeProc(800);
298
+ const spawner = makeQueueSpawner();
299
+ spawner.enqueue(first);
300
+ let portHeld = false;
301
+ const sup = new Supervisor({
302
+ spawnFn: spawner.spawn,
303
+ killFn: noopKill,
304
+ maxRestarts: 1,
305
+ restartDelayMs: 0,
306
+ sleep: () => Promise.resolve(),
307
+ // Free before the crash; pid 800 "holds" :1940 after we flip portHeld.
308
+ pidOnPort: (port) => (portHeld && port === 1940 ? 800 : undefined),
309
+ ownerOfPid: () => "bun /x/vault/src/server.ts",
310
+ });
311
+
312
+ await sup.start({ short: "vault", cmd: ["bun", "vault.ts"], env: { PORT: "1940" } });
313
+ // Crash past the budget → status `crashed`, entry.proc.pid still 800.
314
+ first.closeStreams();
315
+ first.resolveExit(1);
316
+ await tick();
317
+ expect(sup.get("vault")?.status).toBe("crashed");
318
+
319
+ // Now pid 800 holds the port. A re-start must NOT treat 800 as "ours".
320
+ portHeld = true;
321
+ const restarted = await sup.start({
322
+ short: "vault",
323
+ cmd: ["bun", "vault.ts"],
324
+ env: { PORT: "1940" },
325
+ });
326
+ expect(restarted.status).toBe("crashed");
327
+ expect(restarted.startError?.error_type).toBe("port_squatter");
328
+ expect(restarted.startError?.error_description).toContain("port 1940 is held by pid 800");
329
+ // No second spawn — the squatter check aborted before re-spawning.
330
+ expect(spawner.calls).toHaveLength(1);
331
+ });
332
+
333
+ test("no declared PORT → squatter check skipped (request without env.PORT)", async () => {
334
+ const proc = makeFakeProc(900);
335
+ const spawner = makeQueueSpawner();
336
+ spawner.enqueue(proc);
337
+ let probed = false;
338
+ const sup = new Supervisor({
339
+ spawnFn: spawner.spawn,
340
+ killFn: noopKill,
341
+ pidOnPort: () => {
342
+ probed = true;
343
+ return 1;
344
+ },
345
+ });
346
+
347
+ const state = await sup.start({ short: "vault", cmd: ["bun", "vault.ts"] });
348
+ // No PORT in the request → we never probe a port, and the module spawns.
349
+ expect(probed).toBe(false);
350
+ expect(state.status).toBe("running");
351
+
352
+ proc.closeStreams();
353
+ sup.stop("vault");
354
+ proc.resolveExit(0);
355
+ });
356
+
357
+ test("stub-spawner path defaults to no squatter (existing fake-proc tests unaffected)", async () => {
358
+ const proc = makeFakeProc(123);
359
+ const spawner = makeQueueSpawner();
360
+ spawner.enqueue(proc);
361
+ // No pidOnPort injected → on the stub-spawner (test) path it defaults to
362
+ // "no squatter", so a request carrying a PORT still spawns.
363
+ const sup = new Supervisor({ spawnFn: spawner.spawn, killFn: noopKill });
364
+ const state = await sup.start({
365
+ short: "vault",
366
+ cmd: ["bun", "vault.ts"],
367
+ env: { PORT: "1940" },
368
+ });
369
+ expect(spawner.calls).toHaveLength(1);
370
+ expect(state.status).toBe("running");
371
+
372
+ proc.closeStreams();
373
+ sup.stop("vault");
374
+ proc.resolveExit(0);
375
+ });
376
+ });
377
+
181
378
  describe("Supervisor restart-on-crash", () => {
182
379
  test("restarts a crashed module within the budget", async () => {
183
380
  const first = makeFakeProc(101);
package/src/cli.ts CHANGED
@@ -449,11 +449,14 @@ async function main(argv: string[]): Promise<number> {
449
449
  return 1;
450
450
  }
451
451
  const noStart = keyExtract.rest.includes("--no-start");
452
- const installArgs = keyExtract.rest.filter((a) => a !== "--no-start");
452
+ const interactive = keyExtract.rest.includes("--interactive");
453
+ const installArgs = keyExtract.rest.filter(
454
+ (a) => a !== "--no-start" && a !== "--interactive",
455
+ );
453
456
  const service = installArgs[0];
454
457
  if (!service) {
455
458
  console.error(
456
- "usage: parachute install <service|all> [--channel rc|latest] [--tag <name>] [--no-start]",
459
+ "usage: parachute install <service|all> [--channel rc|latest] [--tag <name>] [--no-start] [--interactive]",
457
460
  );
458
461
  console.error(
459
462
  " parachute install scribe [--scribe-provider <name>] [--scribe-key <key>]",
@@ -467,6 +470,7 @@ async function main(argv: string[]): Promise<number> {
467
470
  installOpts.channel = channelExtract.value;
468
471
  }
469
472
  if (noStart) installOpts.noStart = true;
473
+ if (interactive) installOpts.interactive = true;
470
474
  if (providerExtract.value) installOpts.scribeProvider = providerExtract.value;
471
475
  if (keyExtract.value) installOpts.scribeKey = keyExtract.value;
472
476
  const mod = await loadCommand("install", () => import("./commands/install.ts"));
@@ -4,6 +4,9 @@ import { dirname, join } from "node:path";
4
4
  import { autoWireScribeAuth } from "../auto-wire.ts";
5
5
  import { bunGlobalPrefixes, isLinked as defaultIsLinkedShared } from "../bun-link.ts";
6
6
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
7
+ import { type ExposeState, readExposeState } from "../expose-state.ts";
8
+ import { HUB_DEFAULT_PORT, readHubPort } from "../hub-control.ts";
9
+ import { type HubUnitDeps, defaultHubUnitDeps, isHubUnitInstalled } from "../hub-unit.ts";
7
10
  import {
8
11
  type ModuleManifest,
9
12
  ModuleManifestError,
@@ -26,6 +29,11 @@ import {
26
29
  synthesizeManifestForKnownModule,
27
30
  } from "../service-spec.ts";
28
31
  import { findService, readManifest, upsertService } from "../services-manifest.ts";
32
+ import {
33
+ type DisableStaleModuleUnitsOpts,
34
+ type DisableStaleModuleUnitsResult,
35
+ disableStaleModuleUnits as defaultDisableStaleModuleUnits,
36
+ } from "../stale-module-units.ts";
29
37
  import { WELL_KNOWN_PATH } from "../well-known.ts";
30
38
  import { type LifecycleOpts, start as lifecycleStart } from "./lifecycle.ts";
31
39
  import { migrateNotice } from "./migrate.ts";
@@ -215,6 +223,60 @@ export interface InstallOpts {
215
223
  * leave it false so today's behavior is unchanged.
216
224
  */
217
225
  noCreate?: boolean;
226
+ /**
227
+ * `parachute install vault --interactive` (#579 / #580 item 1): opt back into
228
+ * the FULL interactive module setup — the service's own `spec.init` (vault's
229
+ * vault-name prompt, "install as MCP in Claude Code?", "mint an API token?")
230
+ * and, for vault, its self-registered standalone daemon.
231
+ *
232
+ * Default: false. The manual `parachute install <svc>` path is now LIGHT
233
+ * (matching `parachute init`'s Step 2.5): install the package, seed/register
234
+ * services.json, start under the supervisor, and print a short guidance block
235
+ * pointing at the admin UI + the optional extras (`parachute-vault
236
+ * mcp-install`, token minting in the UI). No interactive interview, no
237
+ * vault-side daemon registration that would race the supervisor for :1940.
238
+ *
239
+ * The old "drag me through the full init" behavior is opt-in via this flag.
240
+ * When `true` AND the spec ships an `init` command, install runs `spec.init`
241
+ * as it did pre-#579. When `false` (the default) for a module whose `init`
242
+ * would otherwise run an interview, install SKIPS `spec.init` (the
243
+ * `noCreate`-equivalent quiet path) and emits the guidance block instead.
244
+ *
245
+ * Orthogonal to `noCreate` (which `parachute init` uses to ALSO skip the
246
+ * post-install start). The light manual path still starts the module under
247
+ * the supervisor; only the interactive interview is suppressed.
248
+ */
249
+ interactive?: boolean;
250
+ /**
251
+ * Test seam for the supervised-hub probe + admin-URL resolution that drive
252
+ * the light-install guidance block. Production reads the real expose-state /
253
+ * hub-port / hub-unit deps; tests inject deterministic values so the guidance
254
+ * assertions don't depend on the operator's live box.
255
+ */
256
+ guidanceCtx?: {
257
+ /** Is a hub unit installed (→ supervised box)? Defaults to the real probe. */
258
+ hubUnitInstalled?: boolean;
259
+ /** Hub-unit deps for the real `isHubUnitInstalled` probe. */
260
+ hubUnitDeps?: HubUnitDeps;
261
+ /** Live expose state (→ public admin URL). Defaults to `readExposeState()`. */
262
+ exposeState?: ExposeState | undefined;
263
+ /** Hub loopback port for the admin URL fallback. Defaults to `readHubPort()`. */
264
+ hubPort?: number | undefined;
265
+ };
266
+ /**
267
+ * Test seam for the install-time stale-unit sweep (#580 item 3). Production
268
+ * wires `disableStaleModuleUnits` (the #522 migrate/teardown sweep, reused
269
+ * verbatim — known-module shorts only, hub + cloudflared skipped, idempotent,
270
+ * non-fatal). Tests inject a fake so no real launchctl/systemctl runs and the
271
+ * sweep's invocation (and logged actions) can be asserted.
272
+ *
273
+ * The sweep fires only when a supervised hub is present (the same
274
+ * `guidanceCtx.hubUnitInstalled` discriminant) and the module is being
275
+ * started — a leftover standalone `parachute-<short>` unit (KeepAlive /
276
+ * RunAtLoad) would otherwise keep an unsupervised module bound to the port,
277
+ * crash-looping the supervisor's own child (the #580 field signature).
278
+ */
279
+ disableStaleModuleUnits?: (opts?: DisableStaleModuleUnitsOpts) => DisableStaleModuleUnitsResult;
218
280
  /**
219
281
  * `parachute install scribe` only: pre-pick the transcription provider so
220
282
  * the prompt doesn't fire. Validated against scribe's known providers — an
@@ -586,6 +648,78 @@ export function defaultStartLifecycleOpts(ctx: {
586
648
  };
587
649
  }
588
650
 
651
+ /**
652
+ * Read the expose-state, swallowing a malformed-file error to undefined so the
653
+ * guidance block degrades to the loopback admin URL instead of throwing mid-
654
+ * install. Mirrors init's tolerant read of the same file.
655
+ */
656
+ function safeReadExposeState(): ExposeState | undefined {
657
+ try {
658
+ return readExposeState();
659
+ } catch {
660
+ return undefined;
661
+ }
662
+ }
663
+
664
+ /**
665
+ * Resolve the canonical admin URL the light-install guidance points at — the
666
+ * SAME resolution `parachute init` uses (`init.ts:resolveAdminUrl`): the live
667
+ * expose-state public FQDN when the hub is exposed, otherwise the loopback
668
+ * `http://127.0.0.1:<port>/admin/`. Kept as a thin local copy (rather than
669
+ * importing init.ts) so the install command doesn't pull in the wizard module
670
+ * graph; the shape is asserted against init's in tests.
671
+ */
672
+ function resolveGuidanceAdminUrl(
673
+ exposeState: ExposeState | undefined,
674
+ hubPort: number | undefined,
675
+ ): string {
676
+ if (exposeState?.canonicalFqdn) {
677
+ return `https://${exposeState.canonicalFqdn}/admin/`;
678
+ }
679
+ return `http://127.0.0.1:${hubPort ?? HUB_DEFAULT_PORT}/admin/`;
680
+ }
681
+
682
+ /**
683
+ * The post-install guidance block for the LIGHT manual install path (#579).
684
+ *
685
+ * Replaces the old interactive interview ("name your vault / install MCP / mint
686
+ * a token") with a short pointer to where the operator manages + creates vaults
687
+ * (the admin UI) plus one-liners for the optional extras they used to be dragged
688
+ * through up front. Aaron's framing: "I just wanna install vault and then I'm
689
+ * managing it through the UI" — the install confirms the module is up and tells
690
+ * them where to go next, no token minted, no MCP wired, until they ask.
691
+ *
692
+ * Returns an empty array for modules that don't carry the interactive-init
693
+ * footprint (so the generic `postInstallFooter` stays the surface for those).
694
+ *
695
+ * VAULT-ONLY for now, intentionally (N4). Vault is the only SERVICE_SPECS module
696
+ * that ships an interactive `spec.init` today, so it's the only one whose light
697
+ * path drops an interview that needs replacing with guidance. When a FUTURE
698
+ * module ships its own `spec.init` (and thus takes the light-path skip), add its
699
+ * guidance arm HERE — or, if the per-module copy starts to diverge meaningfully,
700
+ * lift the guidance text onto the ServiceSpec shape (e.g. a
701
+ * `lightInstallGuidance?: (adminUrl) => string[]` extra) so each module owns its
702
+ * own next-steps block instead of this central switch. The empty-array fallback
703
+ * keeps every other module silent here regardless.
704
+ */
705
+ export function buildLightInstallGuidance(short: string, adminUrl: string): string[] {
706
+ if (short === "vault") {
707
+ return [
708
+ "",
709
+ "Vault is installed and running under the hub supervisor.",
710
+ "Manage + create vaults in the admin UI:",
711
+ ` ${adminUrl}`,
712
+ "",
713
+ "Optional, when you want them (not needed to start):",
714
+ " • Connect a vault to Claude Code: parachute-vault mcp-install",
715
+ " • Mint an API token for other MCP clients: do it from the admin UI (Tokens).",
716
+ "",
717
+ "Run the full interactive setup instead with: parachute install vault --interactive",
718
+ ];
719
+ }
720
+ return [];
721
+ }
722
+
589
723
  export async function install(input: string, opts: InstallOpts = {}): Promise<number> {
590
724
  const runner = opts.runner ?? defaultRunner;
591
725
  const manifestPath = opts.manifestPath ?? SERVICES_MANIFEST_PATH;
@@ -766,7 +900,35 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
766
900
  ? spec.manifestName
767
901
  : manifest.name;
768
902
 
769
- if (spec.init && !opts.noCreate) {
903
+ // Whether to run the module's interactive `spec.init` (#579 / #580 item 1).
904
+ //
905
+ // The manual `parachute install <svc>` path is now LIGHT by default: we do
906
+ // NOT drag the operator through `spec.init`'s interview (for vault: vault-name
907
+ // prompt, "install as MCP?", "mint a token?", and a self-registered standalone
908
+ // daemon that would race the supervisor for :1940). The operator installs the
909
+ // module and manages it from the admin UI. `spec.init` runs ONLY when the
910
+ // caller explicitly opts back in with `--interactive` (and isn't in the
911
+ // `noCreate` quiet path the wizard uses). Modules without a `spec.init` are
912
+ // unaffected — there's no interview to suppress.
913
+ const runInteractiveInit = spec.init !== undefined && opts.interactive === true && !opts.noCreate;
914
+ if (runInteractiveInit && spec.init) {
915
+ // Reviewer surprise 2 / #580: the interactive path runs the module's OWN
916
+ // init, which (for vault today) registers a standalone platform daemon
917
+ // (launchd KeepAlive / systemd Restart=always). On a SUPERVISED hub that
918
+ // daemon races the supervisor for the module's port — the exact #580
919
+ // EADDRINUSE-crash-loop condition the light path avoids by not running init.
920
+ // Warn so an operator who reaches for --interactive on a supervised box
921
+ // knows to pass the daemon-off flag (or prefer the light default).
922
+ const supervisedForWarn =
923
+ opts.guidanceCtx?.hubUnitInstalled ??
924
+ (opts.guidanceCtx !== undefined || manifestPath === SERVICES_MANIFEST_PATH
925
+ ? isHubUnitInstalled(opts.guidanceCtx?.hubUnitDeps ?? defaultHubUnitDeps)
926
+ : false);
927
+ if (supervisedForWarn) {
928
+ log(
929
+ `⚠ --interactive runs ${short}'s own setup, which may register a standalone daemon. On a supervised hub that daemon races the supervisor for ${short}'s port (#580). Prefer the light default, or pass --no-autostart through to ${short}'s init.`,
930
+ );
931
+ }
770
932
  // Forward --vault-name from the InstallOpts when set so `parachute setup`
771
933
  // (and any future programmatic caller) can pre-answer the name prompt.
772
934
  const initCmd =
@@ -781,6 +943,15 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
781
943
  }
782
944
  } else if (spec.init && opts.noCreate) {
783
945
  log(`(skipping ${spec.init.join(" ")} — --no-create: module installed, no instance created)`);
946
+ } else if (spec.init) {
947
+ // Light path: the module ships an interactive init but the operator didn't
948
+ // ask for it. Skip the interview; the guidance block at the end of install
949
+ // tells them where to manage + create instances. The supervisor (started
950
+ // below) owns the lifecycle, so vault's own daemon registration is
951
+ // deliberately NOT triggered here — that's the :1940 race #580 fixed.
952
+ log(
953
+ `(skipping ${spec.init.join(" ")} — manage ${short} from the admin UI; re-run with --interactive for the full setup)`,
954
+ );
784
955
  }
785
956
 
786
957
  // Hub-as-port-authority (#53): pick the service's port now and reflect it
@@ -903,6 +1074,40 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
903
1074
  const notice = migrateNotice(configDir, now());
904
1075
  if (notice) log(notice);
905
1076
 
1077
+ // Install-time stale-unit sweep (#580 item 3 / #522 part 2). Before we start
1078
+ // the module under the supervisor, disable any leftover STANDALONE per-module
1079
+ // autostart unit (a pre-supervisor `parachute-<short>.service` with
1080
+ // Restart=always, or a `computer.parachute.<short>` LaunchAgent with
1081
+ // KeepAlive). Such a unit keeps RESPAWNING an unsupervised module that binds
1082
+ // the module's port; the supervised child then EADDRINUSE-crash-loops and
1083
+ // lands `crashed` — the recurring field signature in #580 / #522. Reuses the
1084
+ // exact #522 migrate/teardown sweep (`disableStaleModuleUnits`): known-module
1085
+ // shorts only, hub + cloudflared explicitly skipped, idempotent (already-
1086
+ // disabled/absent = silent no-op), non-fatal (a failed disable warns +
1087
+ // continues). Gated on a supervised hub being present — on a non-supervised
1088
+ // box the per-module unit IS the legitimate lifecycle and we must not touch
1089
+ // it. Only runs on the start path (skipped under --no-start / --no-create).
1090
+ const willStart = !opts.noStart && !opts.noCreate;
1091
+ if (willStart) {
1092
+ const gctx = opts.guidanceCtx;
1093
+ const sweepAllowed =
1094
+ opts.disableStaleModuleUnits !== undefined || manifestPath === SERVICES_MANIFEST_PATH;
1095
+ const supervisedForSweep =
1096
+ gctx?.hubUnitInstalled ?? isHubUnitInstalled(gctx?.hubUnitDeps ?? defaultHubUnitDeps);
1097
+ if (sweepAllowed && supervisedForSweep) {
1098
+ const sweep = opts.disableStaleModuleUnits ?? defaultDisableStaleModuleUnits;
1099
+ const result = sweep({ log: (l) => log(l) });
1100
+ const disabled = result.actions.filter((a) => a.result === "disabled");
1101
+ if (disabled.length > 0) {
1102
+ log(
1103
+ `Swept ${disabled.length} stale per-module autostart unit(s) so the supervisor owns the port(s): ${disabled
1104
+ .map((a) => a.unit)
1105
+ .join(", ")}.`,
1106
+ );
1107
+ }
1108
+ }
1109
+ }
1110
+
906
1111
  // Auto-start: vault and notes' inits historically left a daemon running, but
907
1112
  // scribe (and any service without a daemon-launching init) didn't — so
908
1113
  // launch-day `install scribe` ended with a silent install and the user
@@ -932,6 +1137,44 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
932
1137
  for (const line of footer) log(line);
933
1138
  }
934
1139
 
1140
+ // Light-install guidance block (#579 / #580 item 1). When we suppressed the
1141
+ // module's interactive init (light path: it ships an init, the operator
1142
+ // didn't pass --interactive, and this isn't the wizard's noCreate path),
1143
+ // replace the absent interview with a short pointer to the admin UI + the
1144
+ // optional extras. Skipped for --interactive (the service's own footer
1145
+ // covers it) and for noCreate (the wizard prints its own admin URL).
1146
+ //
1147
+ // INFORMATIONAL, independent of the start path (N3): this block is *guidance*,
1148
+ // not an action, so it deliberately does NOT gate on `willStart` /
1149
+ // `!opts.noStart` the way the stale-unit sweep above does. Even under
1150
+ // `--no-start` (CI / piped installs) the operator still benefits from "here's
1151
+ // where to manage it once it's up" — the admin URL + extras are equally true
1152
+ // whether or not THIS invocation started the daemon.
1153
+ //
1154
+ // The supervised-hub probe + admin-URL resolution touch real on-disk state
1155
+ // (the hub plist / expose-state / hub-port file). Gate the production probe
1156
+ // on `manifestPath === SERVICES_MANIFEST_PATH` — the same isolation gate the
1157
+ // well-known regen uses — so a test driving install against a tempdir
1158
+ // manifestPath never reads the operator's real `~/.parachute`. Tests opt into
1159
+ // the guidance assertions by passing `guidanceCtx` explicitly.
1160
+ const guidanceProbeAllowed =
1161
+ opts.guidanceCtx !== undefined || manifestPath === SERVICES_MANIFEST_PATH;
1162
+ if (spec.init && !opts.interactive && !opts.noCreate && guidanceProbeAllowed) {
1163
+ const gctx = opts.guidanceCtx;
1164
+ const supervised =
1165
+ gctx?.hubUnitInstalled ?? isHubUnitInstalled(gctx?.hubUnitDeps ?? defaultHubUnitDeps);
1166
+ // Only emit the "managed under the supervisor" guidance when there's a
1167
+ // supervised hub to manage it through. On a non-supervised box (no hub
1168
+ // unit) the admin UI may not be reachable, so we stay quiet and let the
1169
+ // generic install output stand — the operator can run --interactive.
1170
+ if (supervised) {
1171
+ const exposeState = gctx && "exposeState" in gctx ? gctx.exposeState : safeReadExposeState();
1172
+ const hubPort = gctx && "hubPort" in gctx ? gctx.hubPort : readHubPort(configDir);
1173
+ const adminUrl = resolveGuidanceAdminUrl(exposeState, hubPort);
1174
+ for (const line of buildLightInstallGuidance(short, adminUrl)) log(line);
1175
+ }
1176
+ }
1177
+
935
1178
  // Final registration check — the service may have written its own
936
1179
  // authoritative entry during init or first boot, replacing the seed (or
937
1180
  // filling a gap when the service had no seedEntry). Re-read at exit so the
package/src/help.ts CHANGED
@@ -42,7 +42,7 @@ export function installHelp(): string {
42
42
  return `parachute install — install and register a Parachute service
43
43
 
44
44
  Usage:
45
- parachute install <service> [--channel rc|latest] [--tag <name>] [--no-start]
45
+ parachute install <service> [--channel rc|latest] [--tag <name>] [--no-start] [--interactive]
46
46
  parachute install all [--channel rc|latest] [--tag <name>] [--no-start]
47
47
  parachute install scribe [--scribe-provider <name>] [--scribe-key <key>]
48
48
 
@@ -52,7 +52,10 @@ Services:
52
52
 
53
53
  What it does:
54
54
  1. bun add -g @openparachute/<service>[@<tag>]
55
- 2. run any service-specific init (e.g. \`parachute-vault init\`)
55
+ 2. register + start the module under the hub supervisor (LIGHT by default —
56
+ no interactive interview; for vault: no vault-name / MCP / token prompts
57
+ and no competing standalone daemon). Pass \`--interactive\` to run the
58
+ service's own full setup (e.g. \`parachute-vault init\`) instead.
56
59
  3. assign a canonical port (1939–1949) and reflect it in
57
60
  \`~/.parachute/services.json\` — the single source of truth at boot
58
61
  (services follow a 4-tier resolvePort ladder; services.json wins).
@@ -73,6 +76,15 @@ Flags:
73
76
  Skipped if the package is already \`bun link\`-ed locally.
74
77
  --no-start skip the post-install daemon start. For piped / CI
75
78
  installs that own their own process model.
79
+ --interactive run the module's full interactive setup instead of
80
+ the light default. For vault: the vault-name /
81
+ "install MCP in Claude Code?" / "mint a token?"
82
+ interview + its own standalone daemon registration.
83
+ On a supervised hub that standalone daemon can RACE
84
+ the supervisor for the module's port (EADDRINUSE
85
+ crash-loop, #580) — prefer the light default + manage
86
+ from the admin UI unless you specifically want the
87
+ old interview.
76
88
  --scribe-provider <name> set scribe's transcription provider non-interactively.
77
89
  Known: parakeet-mlx (default), onnx-asr, whisper, groq, openai.
78
90
  Skips the interactive picker.
@@ -89,7 +101,8 @@ Environment:
89
101
  and \`--tag\`. Defaults to \`latest\` when unset.
90
102
 
91
103
  Examples:
92
- parachute install vault # installs, runs init, starts vault
104
+ parachute install vault # light: installs + starts vault, points you at the admin UI
105
+ parachute install vault --interactive # full interactive vault init (name / MCP / token prompts)
93
106
  parachute install surface # installs surface (auto-bootstraps Notes)
94
107
  parachute install notes # back-compat: legacy notes-daemon (Phase 2 deprecating)
95
108
  parachute install scribe # installs, prompts for provider, starts scribe
package/src/supervisor.ts CHANGED
@@ -34,14 +34,55 @@
34
34
  * child state to disk (transient — re-derived from services.json on every boot).
35
35
  */
36
36
 
37
+ import { spawnSync } from "node:child_process";
37
38
  import {
38
39
  MissingDependencyError,
39
40
  type MissingDependencyWire,
40
41
  ensureExecutable,
41
42
  rethrowIfMissing,
42
43
  } from "@openparachute/depcheck";
44
+ import { defaultPidOnPort } from "./hub-control.ts";
43
45
  import { type PortListeningFn, defaultPortListening } from "./port-probe.ts";
44
46
 
47
+ /**
48
+ * Which pid (if any) holds a TCP LISTEN on `port`. Production wires
49
+ * `hub-control.ts:defaultPidOnPort` (an `lsof -ti :<port> -sTCP:LISTEN`
50
+ * shell-out, macOS + Linux); a box without `lsof` / on an unsupported platform
51
+ * returns undefined → the squatter check degrades gracefully (falls back to the
52
+ * existing started-but-unbound error). Injectable so tests stay deterministic.
53
+ */
54
+ export type PidOnPortFn = (port: number) => number | undefined;
55
+
56
+ /**
57
+ * Best-effort command line of a pid (the squatter-surfacing detail). Returns
58
+ * undefined when it can't be read; the message then omits the cmdline.
59
+ */
60
+ export type OwnerProbeFn = (pid: number) => string | undefined;
61
+
62
+ /**
63
+ * Production `ownerOfPid`: `ps -o command= -p <pid>` → the process's full argv
64
+ * (one line). Mirrors `migrate-cutover.ts:defaultOwnerOfPid` (inlined rather
65
+ * than imported to keep the supervisor off the heavy command-module graph).
66
+ * Any failure (no `ps`, pid gone, permission, garbage) → undefined, so the
67
+ * squatter message degrades to "command line unavailable".
68
+ */
69
+ export const defaultOwnerOfPid: OwnerProbeFn = (pid) => {
70
+ try {
71
+ const result = spawnSync("ps", ["-o", "command=", "-p", String(pid)], {
72
+ encoding: "utf8",
73
+ timeout: 2000,
74
+ });
75
+ if (result.status !== 0) return undefined;
76
+ const line = result.stdout
77
+ .split("\n")
78
+ .map((s) => s.trim())
79
+ .find((s) => s.length > 0);
80
+ return line === undefined || line.length === 0 ? undefined : line;
81
+ } catch {
82
+ return undefined;
83
+ }
84
+ };
85
+
45
86
  export type ModuleStatus = "starting" | "running" | "stopped" | "crashed" | "restarting";
46
87
 
47
88
  /**
@@ -221,6 +262,29 @@ export interface SupervisorOpts {
221
262
  * Tests exercising the missing-binary branch inject `which: () => null`.
222
263
  */
223
264
  readonly which?: (cmd: string) => string | null;
265
+ /**
266
+ * Pre-spawn port-squatter detection (#580 item 4). Returns the pid holding a
267
+ * TCP LISTEN on the module's port, or undefined when the port is free /
268
+ * undetectable. Before spawning a module, the supervisor checks whether the
269
+ * declared port is already held by a pid it does NOT own (not one of its live
270
+ * children). If so it records a structured `port_squatter` start-error with
271
+ * an actionable message and DOES NOT spawn — so a rogue process holding the
272
+ * port (the #580 field signature: a bare `vault/src/server.ts` outside the
273
+ * supervisor on :1940) surfaces in `status` instead of the supervised child
274
+ * EADDRINUSE-crash-looping into a bare `supervisor: crashed`.
275
+ *
276
+ * Detection ONLY — never auto-kills (that's an operator's unrelated process).
277
+ * Defaults to `hub-control.ts:defaultPidOnPort` on the production path; the
278
+ * stub-spawner test path defaults to "no squatter" (returns undefined) so
279
+ * existing fake-proc tests are unaffected unless they inject this explicitly.
280
+ */
281
+ readonly pidOnPort?: PidOnPortFn;
282
+ /**
283
+ * Best-effort cmdline probe for the squatter pid (the actionable message
284
+ * detail). Defaults to {@link defaultOwnerOfPid} on the production path; the
285
+ * stub-spawner test path defaults to "unknown" (returns undefined).
286
+ */
287
+ readonly ownerOfPid?: OwnerProbeFn;
224
288
  }
225
289
 
226
290
  /**
@@ -336,6 +400,12 @@ export class Supervisor {
336
400
  lateBindWatchMs: opts.lateBindWatchMs ?? DEFAULT_LATE_BIND_WATCH_MS,
337
401
  lateBindPollMs: opts.lateBindPollMs ?? DEFAULT_LATE_BIND_POLL_MS,
338
402
  which: opts.which ?? (isProductionPath ? Bun.which : () => "/stub/bin/preflight-skipped"),
403
+ // Squatter detection (#580 item 4): real probes on the production path;
404
+ // the stub-spawner test path defaults to "no squatter / unknown owner" so
405
+ // fake-proc tests (which never hold a real port) aren't tripped. Tests
406
+ // opt in by injecting `pidOnPort` / `ownerOfPid`.
407
+ pidOnPort: opts.pidOnPort ?? (isProductionPath ? defaultPidOnPort : () => undefined),
408
+ ownerOfPid: opts.ownerOfPid ?? (isProductionPath ? defaultOwnerOfPid : () => undefined),
339
409
  };
340
410
  }
341
411
 
@@ -389,6 +459,25 @@ export class Supervisor {
389
459
  }
390
460
  }
391
461
 
462
+ // Pre-spawn port-squatter detection (#580 item 4). If the module's declared
463
+ // port is already held by a process the supervisor does NOT own (not one of
464
+ // its live children), spawning would EADDRINUSE-crash-loop the child into a
465
+ // bare `supervisor: crashed` with no clue why. Detect the foreign holder and
466
+ // record a structured, actionable `port_squatter` start-error INSTEAD of
467
+ // spawning — the operator sees the offending pid + cmdline + a copy-paste
468
+ // recovery in `status` / the SPA. Detection only: we never kill someone
469
+ // else's process (it may be the operator's unrelated dev server).
470
+ const squatter = this.detectPortSquatter(entry);
471
+ if (squatter) {
472
+ entry.state = {
473
+ ...entry.state,
474
+ status: "crashed",
475
+ pid: undefined,
476
+ startError: squatter,
477
+ };
478
+ return entry.state;
479
+ }
480
+
392
481
  // Belt-and-suspenders for a spawn that slips past the preflight (binary
393
482
  // removed between check + spawn, or a path that didn't preflight): a
394
483
  // not-found spawn throw becomes the same structured MissingDependencyError
@@ -428,6 +517,65 @@ export class Supervisor {
428
517
  return entry.state;
429
518
  }
430
519
 
520
+ /**
521
+ * The set of pids the supervisor currently owns AND that are still alive — its
522
+ * live children's pids. Used by the squatter check to decide whether a process
523
+ * holding a module's port is "ours" (a re-probe of our own just-spawned child,
524
+ * or a sibling) vs a foreign rogue.
525
+ *
526
+ * Liveness guard (N1): `entry.proc` is NEVER cleared on exit (`handleExit`
527
+ * only updates `entry.state`), so a recycled OS pid could otherwise be
528
+ * misclassified as "our own child" and wrongly excused from the squatter
529
+ * check. We therefore only count an entry whose child is actually running —
530
+ * `state.status` is `running` or `starting`. A `crashed` / `restarting` /
531
+ * `stopped` module's recorded pid is stale (the process is gone or being
532
+ * replaced) and must not vouch for whoever now holds the port. An entry with
533
+ * no `proc` (never spawned) contributes no pid either.
534
+ */
535
+ private supervisedPids(): Set<number> {
536
+ const pids = new Set<number>();
537
+ for (const entry of this.modules.values()) {
538
+ if (entry.state.status !== "running" && entry.state.status !== "starting") continue;
539
+ const pid = entry.proc?.pid;
540
+ if (typeof pid === "number" && pid > 0) pids.add(pid);
541
+ }
542
+ return pids;
543
+ }
544
+
545
+ /**
546
+ * Pre-spawn port-squatter check (#580 item 4). Returns a structured
547
+ * `port_squatter` start-error when the module's declared port is held by a
548
+ * process the supervisor does NOT own; undefined when the port is free, the
549
+ * holder is one of our own children, or detection isn't available on this
550
+ * platform (no `lsof` → `pidOnPort` returns undefined → we degrade to the
551
+ * existing started-but-unbound path post-spawn).
552
+ *
553
+ * Ownership precedent mirrors `migrate-cutover.ts:sweepOrphanOnPort`'s "is
554
+ * this mine?" check — here the discriminant is "is the holder one of my live
555
+ * children's pids?". We deliberately do NOT kill the holder (detection only):
556
+ * a foreign pid on a module port may be the operator's unrelated process.
557
+ */
558
+ private detectPortSquatter(entry: ModuleEntry): ModuleStartError | undefined {
559
+ const portStr = entry.req.env?.PORT;
560
+ const port = portStr ? Number(portStr) : Number.NaN;
561
+ if (!Number.isFinite(port) || port <= 0) return undefined; // No declared port.
562
+
563
+ const holder = this.opts.pidOnPort(port);
564
+ if (holder === undefined) return undefined; // Port free, or detection unavailable.
565
+ if (this.supervisedPids().has(holder)) return undefined; // Our own child.
566
+
567
+ const cmdline = this.opts.ownerOfPid(holder);
568
+ const who = cmdline ? `pid ${holder} (${cmdline})` : `pid ${holder}`;
569
+ const short = entry.req.short;
570
+ return {
571
+ error_type: "port_squatter",
572
+ error_description:
573
+ `port ${port} is held by ${who} outside the supervisor — ` +
574
+ `kill it and retry: kill ${holder} && parachute start ${short}`,
575
+ at: new Date(this.opts.now()).toISOString(),
576
+ };
577
+ }
578
+
431
579
  /**
432
580
  * Poll the module's port until it binds or `startReadyMs` elapses (§6.5).
433
581
  * Skipped when the gate is disabled (stub-spawner test path) or the request