@openparachute/hub 0.6.4-rc.9 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/expose-supervisor-version.test.ts +104 -0
- package/src/__tests__/hub-unit.test.ts +181 -0
- package/src/__tests__/init.test.ts +401 -0
- package/src/__tests__/install.test.ts +90 -0
- package/src/__tests__/migrate-cutover.test.ts +1 -0
- package/src/commands/expose-supervisor.ts +45 -0
- package/src/commands/init.ts +63 -1
- package/src/commands/install.ts +42 -1
- package/src/hub-unit.ts +255 -0
|
@@ -341,6 +341,96 @@ describe("install", () => {
|
|
|
341
341
|
}
|
|
342
342
|
});
|
|
343
343
|
|
|
344
|
+
test("names the squatter holding the canonical port when the walk assigns a fallback (#590)", async () => {
|
|
345
|
+
// Field bug #590 item 2: a stale pre-supervisor vault zombie squats 1940;
|
|
346
|
+
// the install-time port walk silently routed to a fallback. Now it names the
|
|
347
|
+
// holder (pid + command line) + hints it may be a stale daemon. Detection
|
|
348
|
+
// only — never kills. Reuses the #581 pidOnPort / ownerOfPid seams.
|
|
349
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
350
|
+
try {
|
|
351
|
+
const logs: string[] = [];
|
|
352
|
+
const code = await install("vault", {
|
|
353
|
+
runner: async () => 0,
|
|
354
|
+
manifestPath: path,
|
|
355
|
+
configDir,
|
|
356
|
+
startService: async () => 0,
|
|
357
|
+
isLinked: () => false,
|
|
358
|
+
// Only vault's canonical 1940 is held → the walk picks a fallback in-range.
|
|
359
|
+
portProbe: async (p) => p === 1940,
|
|
360
|
+
// Inject the #581 seams: a foreign pid squats 1940.
|
|
361
|
+
pidOnPort: (p) => (p === 1940 ? 1234 : undefined),
|
|
362
|
+
ownerOfPid: (pid) => (pid === 1234 ? "bun /opt/vault/src/server.ts" : undefined),
|
|
363
|
+
log: (l) => logs.push(l),
|
|
364
|
+
});
|
|
365
|
+
expect(code).toBe(0);
|
|
366
|
+
const joined = logs.join("\n");
|
|
367
|
+
// The fallback warning still fires…
|
|
368
|
+
expect(joined).toMatch(/canonical port 1940 is in use; assigned/);
|
|
369
|
+
// …and now it NAMES the squatter + hints at a stale daemon.
|
|
370
|
+
expect(joined).toContain("pid 1234 (bun /opt/vault/src/server.ts)");
|
|
371
|
+
expect(joined).toMatch(/stale pre-supervisor daemon/);
|
|
372
|
+
expect(joined).toContain("kill 1234");
|
|
373
|
+
const entry = findService("parachute-vault", path);
|
|
374
|
+
expect(entry?.port).not.toBe(1940);
|
|
375
|
+
} finally {
|
|
376
|
+
cleanup();
|
|
377
|
+
}
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
test("squatter pid present but command line unreadable → names the pid alone (#590)", async () => {
|
|
381
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
382
|
+
try {
|
|
383
|
+
const logs: string[] = [];
|
|
384
|
+
const code = await install("vault", {
|
|
385
|
+
runner: async () => 0,
|
|
386
|
+
manifestPath: path,
|
|
387
|
+
configDir,
|
|
388
|
+
startService: async () => 0,
|
|
389
|
+
isLinked: () => false,
|
|
390
|
+
portProbe: async (p) => p === 1940,
|
|
391
|
+
pidOnPort: (p) => (p === 1940 ? 4321 : undefined),
|
|
392
|
+
ownerOfPid: () => undefined, // ps failed / pid gone
|
|
393
|
+
log: (l) => logs.push(l),
|
|
394
|
+
});
|
|
395
|
+
expect(code).toBe(0);
|
|
396
|
+
const joined = logs.join("\n");
|
|
397
|
+
expect(joined).toContain("held by pid 4321.");
|
|
398
|
+
expect(joined).not.toContain("(undefined)");
|
|
399
|
+
} finally {
|
|
400
|
+
cleanup();
|
|
401
|
+
}
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
test("no squatter naming when the canonical port is free (#590 — no false positive)", async () => {
|
|
405
|
+
const { path, configDir, cleanup } = makeTempPath();
|
|
406
|
+
try {
|
|
407
|
+
const logs: string[] = [];
|
|
408
|
+
let pidProbed = false;
|
|
409
|
+
const code = await install("vault", {
|
|
410
|
+
runner: async () => 0,
|
|
411
|
+
manifestPath: path,
|
|
412
|
+
configDir,
|
|
413
|
+
startService: async () => 0,
|
|
414
|
+
isLinked: () => false,
|
|
415
|
+
portProbe: async () => false, // canonical 1940 is free
|
|
416
|
+
pidOnPort: () => {
|
|
417
|
+
pidProbed = true;
|
|
418
|
+
return 9999;
|
|
419
|
+
},
|
|
420
|
+
ownerOfPid: () => "should-not-appear",
|
|
421
|
+
log: (l) => logs.push(l),
|
|
422
|
+
});
|
|
423
|
+
expect(code).toBe(0);
|
|
424
|
+
const joined = logs.join("\n");
|
|
425
|
+
// Canonical assigned → no fallback warning, no squatter probe at all.
|
|
426
|
+
expect(joined).not.toMatch(/is in use; assigned/);
|
|
427
|
+
expect(joined).not.toContain("should-not-appear");
|
|
428
|
+
expect(pidProbed).toBe(false);
|
|
429
|
+
} finally {
|
|
430
|
+
cleanup();
|
|
431
|
+
}
|
|
432
|
+
});
|
|
433
|
+
|
|
344
434
|
test("`install lens` aliases to notes with a rename notice", async () => {
|
|
345
435
|
// Transition alias for the brief Notes→Lens rename (Apr 19) that was
|
|
346
436
|
// reverted on launch eve (Apr 22). Accepted for one release cycle so
|
|
@@ -16,15 +16,18 @@
|
|
|
16
16
|
* `expose-cloudflare.ts` (cloudflared) use so the two paths can't drift.
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
+
import pkg from "../../package.json" with { type: "json" };
|
|
19
20
|
import { readHubPort } from "../hub-control.ts";
|
|
20
21
|
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
21
22
|
import {
|
|
22
23
|
type EnsureHubUnitOpts,
|
|
23
24
|
type EnsureHubUnitResult,
|
|
25
|
+
type EnsureHubVersionMatchesResult,
|
|
24
26
|
HUB_UNIT_DEFAULT_PORT,
|
|
25
27
|
type HubUnitDeps,
|
|
26
28
|
defaultHubUnitDeps,
|
|
27
29
|
ensureHubUnit as ensureHubUnitImpl,
|
|
30
|
+
ensureHubVersionMatches as ensureHubVersionMatchesImpl,
|
|
28
31
|
} from "../hub-unit.ts";
|
|
29
32
|
import {
|
|
30
33
|
type DriveModuleOpDeps,
|
|
@@ -54,6 +57,17 @@ export interface ExposeSupervisorOpts {
|
|
|
54
57
|
hubUnitDeps?: HubUnitDeps;
|
|
55
58
|
/** Ensure the hub unit is up before / during expose (§3.2 / §4.3a). */
|
|
56
59
|
ensureHubUnit?: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
|
|
60
|
+
/**
|
|
61
|
+
* Version-check-and-restart at the expose adoption point (#590). After the
|
|
62
|
+
* hub unit is confirmed up, compare the RUNNING hub's `/health` version to the
|
|
63
|
+
* installed package version; restart the managed unit on mismatch so an expose
|
|
64
|
+
* never wires a tunnel to a stale zombie. Production wires
|
|
65
|
+
* `ensureHubVersionMatches`; tests inject a stub.
|
|
66
|
+
*/
|
|
67
|
+
ensureHubVersion?: (ctx: {
|
|
68
|
+
port: number;
|
|
69
|
+
log: (line: string) => void;
|
|
70
|
+
}) => Promise<EnsureHubVersionMatchesResult>;
|
|
57
71
|
/** Drive a per-module op against the running hub (reads operator.token). */
|
|
58
72
|
driveModuleOp?: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
|
|
59
73
|
/**
|
|
@@ -83,6 +97,10 @@ export interface ExposeSupervisorOpts {
|
|
|
83
97
|
export interface ResolvedExposeSupervisor {
|
|
84
98
|
hubUnitDeps: HubUnitDeps;
|
|
85
99
|
ensureHubUnit: (opts: EnsureHubUnitOpts) => Promise<EnsureHubUnitResult>;
|
|
100
|
+
ensureHubVersion: (ctx: {
|
|
101
|
+
port: number;
|
|
102
|
+
log: (line: string) => void;
|
|
103
|
+
}) => Promise<EnsureHubVersionMatchesResult>;
|
|
86
104
|
driveModuleOp: (short: string, op: ModuleOp, deps: DriveModuleOpDeps) => Promise<ModuleOpResult>;
|
|
87
105
|
openDb: (configDir: string) => import("bun:sqlite").Database;
|
|
88
106
|
selfHealOperatorTokenIssuer: (
|
|
@@ -105,6 +123,15 @@ export function resolveExposeSupervisor(
|
|
|
105
123
|
return {
|
|
106
124
|
hubUnitDeps,
|
|
107
125
|
ensureHubUnit: opts?.ensureHubUnit ?? ensureHubUnitImpl,
|
|
126
|
+
ensureHubVersion:
|
|
127
|
+
opts?.ensureHubVersion ??
|
|
128
|
+
((ctx) =>
|
|
129
|
+
ensureHubVersionMatchesImpl({
|
|
130
|
+
installedVersion: pkg.version,
|
|
131
|
+
port: ctx.port,
|
|
132
|
+
deps: hubUnitDeps,
|
|
133
|
+
log: ctx.log,
|
|
134
|
+
})),
|
|
108
135
|
driveModuleOp: opts?.driveModuleOp ?? driveModuleOpImpl,
|
|
109
136
|
openDb: opts?.openDb ?? ((configDir) => openHubDb(hubDbPath(configDir))),
|
|
110
137
|
selfHealOperatorTokenIssuer:
|
|
@@ -145,6 +172,24 @@ export async function ensureHubUnitForExpose(
|
|
|
145
172
|
): Promise<{ ok: boolean; port: number }> {
|
|
146
173
|
const ensured = await sup.ensureHubUnit({ port, deps: sup.hubUnitDeps, log });
|
|
147
174
|
if (ensured.outcome === "already-up" || ensured.outcome === "started") {
|
|
175
|
+
// #590: the hub is up — but is it the version we installed? A zombie that
|
|
176
|
+
// merely answers /health must not become the target of a fresh tunnel.
|
|
177
|
+
// Compare + restart-on-mismatch (once). A non-unit-managed mismatch is NOT
|
|
178
|
+
// killed: surface it + fail the expose so the operator resolves it; a
|
|
179
|
+
// still-mismatched-after-restart (bun-linked branch) warns + continues.
|
|
180
|
+
try {
|
|
181
|
+
const versionResult = await sup.ensureHubVersion({ port: ensured.port, log });
|
|
182
|
+
for (const m of versionResult.messages) log(m);
|
|
183
|
+
if (
|
|
184
|
+
versionResult.outcome === "not-unit-managed" ||
|
|
185
|
+
versionResult.outcome === "restart-failed"
|
|
186
|
+
) {
|
|
187
|
+
return { ok: false, port: ensured.port };
|
|
188
|
+
}
|
|
189
|
+
} catch (err) {
|
|
190
|
+
// A version-check failure must never block expose — degrade to a note.
|
|
191
|
+
log(`note: hub version check skipped (${err instanceof Error ? err.message : String(err)})`);
|
|
192
|
+
}
|
|
148
193
|
return { ok: true, port: ensured.port };
|
|
149
194
|
}
|
|
150
195
|
for (const m of ensured.messages) log(m);
|
package/src/commands/init.ts
CHANGED
|
@@ -35,12 +35,18 @@
|
|
|
35
35
|
import { spawnSync } from "node:child_process";
|
|
36
36
|
import { join } from "node:path";
|
|
37
37
|
import { fileURLToPath } from "node:url";
|
|
38
|
+
import pkg from "../../package.json" with { type: "json" };
|
|
38
39
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
39
40
|
import { type ExposeState, readExposeState } from "../expose-state.ts";
|
|
40
41
|
import { type EnsureHubOpts, HUB_DEFAULT_PORT, HUB_SVC, readHubPort } from "../hub-control.ts";
|
|
41
42
|
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
42
43
|
import { deriveHubOrigin } from "../hub-origin.ts";
|
|
43
|
-
import {
|
|
44
|
+
import {
|
|
45
|
+
type EnsureHubVersionMatchesResult,
|
|
46
|
+
ensureHubUnit,
|
|
47
|
+
ensureHubVersionMatches,
|
|
48
|
+
installAndStartHubUnit,
|
|
49
|
+
} from "../hub-unit.ts";
|
|
44
50
|
import { issueOperatorToken, readOperatorTokenFile } from "../operator-token.ts";
|
|
45
51
|
import { type AliveFn, defaultAlive, processState } from "../process-state.ts";
|
|
46
52
|
import { findService, readManifestLenient } from "../services-manifest.ts";
|
|
@@ -81,6 +87,18 @@ export interface InitOpts {
|
|
|
81
87
|
* Design §3.3 (init row), §4.1/§4.2, appendix (c).
|
|
82
88
|
*/
|
|
83
89
|
ensureHub?: (opts: EnsureHubOpts) => Promise<{ pid: number; port: number; started: boolean }>;
|
|
90
|
+
/**
|
|
91
|
+
* Test seam: version-check-and-restart at the hub adoption point (#590).
|
|
92
|
+
* After init confirms a hub is answering on the canonical port, it compares
|
|
93
|
+
* the RUNNING hub's `/health` version against this installed package version;
|
|
94
|
+
* on mismatch it restarts the managed unit (once) so a freshly-installed hub
|
|
95
|
+
* never adopts a stale zombie. Production wires `ensureHubVersionMatches`;
|
|
96
|
+
* tests stub it to assert the call without touching launchctl / the live hub.
|
|
97
|
+
*/
|
|
98
|
+
ensureHubVersion?: (ctx: {
|
|
99
|
+
port: number;
|
|
100
|
+
log: (line: string) => void;
|
|
101
|
+
}) => Promise<EnsureHubVersionMatchesResult>;
|
|
84
102
|
/**
|
|
85
103
|
* Test seam: guarantee an operator token exists once the hub is up (design
|
|
86
104
|
* §3.1 / §3.3). Production reads `operator.token`; if absent AND a hub user
|
|
@@ -582,6 +600,18 @@ export async function init(opts: InitOpts = {}): Promise<number> {
|
|
|
582
600
|
// spawn). The `ensureHub` seam is preserved for tests (and the return shape is
|
|
583
601
|
// unchanged); only the production default flipped.
|
|
584
602
|
const ensureHub = opts.ensureHub ?? defaultEnsureHubViaUnit;
|
|
603
|
+
// #590: after the hub is confirmed up, compare its RUNNING version to the
|
|
604
|
+
// installed package version and restart the managed unit on mismatch, so a
|
|
605
|
+
// freshly-installed hub never adopts a stale zombie that merely answers
|
|
606
|
+
// /health. Injectable for tests.
|
|
607
|
+
const ensureHubVersion =
|
|
608
|
+
opts.ensureHubVersion ??
|
|
609
|
+
((ctx) =>
|
|
610
|
+
ensureHubVersionMatches({
|
|
611
|
+
installedVersion: pkg.version,
|
|
612
|
+
port: ctx.port,
|
|
613
|
+
log: ctx.log,
|
|
614
|
+
}));
|
|
585
615
|
const guaranteeOperatorToken = opts.guaranteeOperatorToken ?? defaultGuaranteeOperatorToken;
|
|
586
616
|
const readExposeStateFn = opts.readExposeStateFn ?? (() => readExposeState());
|
|
587
617
|
const isTty = opts.isTty ?? Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
@@ -640,6 +670,38 @@ export async function init(opts: InitOpts = {}): Promise<number> {
|
|
|
640
670
|
// overridden, so the fallback is almost always correct.
|
|
641
671
|
if (hubPort === undefined) hubPort = HUB_DEFAULT_PORT;
|
|
642
672
|
|
|
673
|
+
// Step 1.25 (#590): the hub answered /health, but is it the version we just
|
|
674
|
+
// installed? A zombie LaunchAgent survives `rm -rf ~/.parachute`, so a brand-
|
|
675
|
+
// new install can adopt month-old code that merely keeps the port. Compare the
|
|
676
|
+
// RUNNING version to the installed package version; on mismatch, restart the
|
|
677
|
+
// managed unit (once) so the tunnel/wizard/vault-install downstream bind to the
|
|
678
|
+
// NEW code. A non-unit-managed hub (legacy detached pid / dev `bun run serve`)
|
|
679
|
+
// is NOT killed — we surface the mismatch + an actionable message and bail so
|
|
680
|
+
// the operator decides. A still-mismatched-after-restart (bun-linked branch)
|
|
681
|
+
// warns + continues rather than looping.
|
|
682
|
+
try {
|
|
683
|
+
const versionResult = await ensureHubVersion({ port: hubPort, log });
|
|
684
|
+
for (const m of versionResult.messages) log(m);
|
|
685
|
+
if (versionResult.outcome === "not-unit-managed") {
|
|
686
|
+
// We can't safely take over a hub we don't own. Stop here so init doesn't
|
|
687
|
+
// wire a fresh tunnel + credentials to a stale runtime (the #590 field bug).
|
|
688
|
+
log("");
|
|
689
|
+
log("Resolve the version mismatch above, then re-run `parachute init`.");
|
|
690
|
+
return 1;
|
|
691
|
+
}
|
|
692
|
+
if (versionResult.outcome === "restart-failed") {
|
|
693
|
+
log("");
|
|
694
|
+
log("The hub service manager rejected the restart command.");
|
|
695
|
+
log("Try checking the logs:");
|
|
696
|
+
log(" parachute logs hub");
|
|
697
|
+
return 1;
|
|
698
|
+
}
|
|
699
|
+
// `match` / `not-running` / `restarted` / `still-mismatched` → continue.
|
|
700
|
+
} catch (err) {
|
|
701
|
+
// A version-check failure must never block init — degrade to a note.
|
|
702
|
+
log(`note: hub version check skipped (${err instanceof Error ? err.message : String(err)})`);
|
|
703
|
+
}
|
|
704
|
+
|
|
643
705
|
// Step 1.5: guarantee an operator token exists (design §3.1 / §3.3). Under
|
|
644
706
|
// the unified model every per-module verb is an authenticated module-ops
|
|
645
707
|
// call, so the steady-state operator needs an `operator.token` on disk — the
|
package/src/commands/install.ts
CHANGED
|
@@ -5,7 +5,12 @@ import { autoWireScribeAuth } from "../auto-wire.ts";
|
|
|
5
5
|
import { bunGlobalPrefixes, isLinked as defaultIsLinkedShared } from "../bun-link.ts";
|
|
6
6
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
7
7
|
import { type ExposeState, readExposeState } from "../expose-state.ts";
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
HUB_DEFAULT_PORT,
|
|
10
|
+
type PidOnPortFn,
|
|
11
|
+
defaultPidOnPort,
|
|
12
|
+
readHubPort,
|
|
13
|
+
} from "../hub-control.ts";
|
|
9
14
|
import { type HubUnitDeps, defaultHubUnitDeps, isHubUnitInstalled } from "../hub-unit.ts";
|
|
10
15
|
import {
|
|
11
16
|
type ModuleManifest,
|
|
@@ -34,6 +39,7 @@ import {
|
|
|
34
39
|
type DisableStaleModuleUnitsResult,
|
|
35
40
|
disableStaleModuleUnits as defaultDisableStaleModuleUnits,
|
|
36
41
|
} from "../stale-module-units.ts";
|
|
42
|
+
import { type OwnerProbeFn, defaultOwnerOfPid } from "../supervisor.ts";
|
|
37
43
|
import { WELL_KNOWN_PATH } from "../well-known.ts";
|
|
38
44
|
import { type LifecycleOpts, start as lifecycleStart } from "./lifecycle.ts";
|
|
39
45
|
import { migrateNotice } from "./migrate.ts";
|
|
@@ -301,6 +307,22 @@ export interface InstallOpts {
|
|
|
301
307
|
* unless the test populates services.json directly.
|
|
302
308
|
*/
|
|
303
309
|
portProbe?: (port: number) => Promise<boolean>;
|
|
310
|
+
/**
|
|
311
|
+
* Test seam for the install-time port-squatter naming (#590 item 2). When the
|
|
312
|
+
* canonical port walk has to assign a fallback port because the canonical one
|
|
313
|
+
* is held, this looks up the pid LISTENing on the canonical port so the
|
|
314
|
+
* warning can name the holder (`pid 1234 (bun .../vault/src/server.ts)`) — the
|
|
315
|
+
* same #581 `pidOnPort` / `ownerOfPid` seams the supervisor start-path uses,
|
|
316
|
+
* reused (not duplicated). Detection-only — never kills. Production wires
|
|
317
|
+
* `defaultPidOnPort` (`lsof -ti :<port>`); tests inject a deterministic stub.
|
|
318
|
+
*/
|
|
319
|
+
pidOnPort?: PidOnPortFn;
|
|
320
|
+
/**
|
|
321
|
+
* Test seam for the install-time port-squatter naming (#590 item 2): the
|
|
322
|
+
* best-effort command line of the squatting pid. Production wires
|
|
323
|
+
* `defaultOwnerOfPid` (`ps -o command= -p <pid>`); tests inject a stub.
|
|
324
|
+
*/
|
|
325
|
+
ownerOfPid?: OwnerProbeFn;
|
|
304
326
|
/**
|
|
305
327
|
* Test seam for reading `<packageDir>/.parachute/module.json`. Production
|
|
306
328
|
* uses the real file reader; tests inject a map from package-dir → manifest
|
|
@@ -974,6 +996,25 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
|
|
|
974
996
|
});
|
|
975
997
|
if (portResult.warning) {
|
|
976
998
|
log(`⚠ ${portResult.warning}`);
|
|
999
|
+
// #590 item 2: the canonical port was held, so we walked to a fallback. Name
|
|
1000
|
+
// the squatter — the supervisor start-path does this post-#581; do it here at
|
|
1001
|
+
// install-time too. Reuse the #581 pidOnPort / ownerOfPid seams (detection
|
|
1002
|
+
// only; never kill). When the holder is a foreign pid (not one of OUR rows —
|
|
1003
|
+
// which is the common case when a stale pre-supervisor daemon is squatting),
|
|
1004
|
+
// surface its pid + command line + a hint.
|
|
1005
|
+
if (canonicalPort !== undefined && portResult.source !== "canonical") {
|
|
1006
|
+
const pidOnPort = opts.pidOnPort ?? defaultPidOnPort;
|
|
1007
|
+
const ownerOfPid = opts.ownerOfPid ?? defaultOwnerOfPid;
|
|
1008
|
+
const holder = pidOnPort(canonicalPort);
|
|
1009
|
+
if (holder !== undefined) {
|
|
1010
|
+
const cmdline = ownerOfPid(holder);
|
|
1011
|
+
const who = cmdline ? `pid ${holder} (${cmdline})` : `pid ${holder}`;
|
|
1012
|
+
log(` canonical port ${canonicalPort} is held by ${who}.`);
|
|
1013
|
+
log(
|
|
1014
|
+
` This may be a stale pre-supervisor daemon. If so, stop it (kill ${holder}) and re-run \`parachute install ${entryName}\` to reclaim the canonical port.`,
|
|
1015
|
+
);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
977
1018
|
}
|
|
978
1019
|
|
|
979
1020
|
// Find-or-seed the manifest entry. Re-read after the seed write so a silent
|
package/src/hub-unit.ts
CHANGED
|
@@ -76,6 +76,16 @@ export interface HubUnitDeps extends ManagedUnitDeps {
|
|
|
76
76
|
* uses a bounded `fetch`; tests inject a deterministic stub.
|
|
77
77
|
*/
|
|
78
78
|
probeHealth: (port: number) => Promise<boolean>;
|
|
79
|
+
/**
|
|
80
|
+
* HTTP `/health` probe that ALSO reads the JSON `version` field of the
|
|
81
|
+
* running hub (#590). Resolves to `{ ok, version }` — `ok` mirrors
|
|
82
|
+
* {@link probeHealth} (2xx), `version` is the running hub's reported version
|
|
83
|
+
* (or `undefined` when the body has no `version` field — a very old hub that
|
|
84
|
+
* predates the field; the caller treats that as a mismatch). Resolves to
|
|
85
|
+
* `null` when the hub doesn't answer at all (connection-refused / timeout).
|
|
86
|
+
* Production uses a bounded `fetch`; tests inject a deterministic stub.
|
|
87
|
+
*/
|
|
88
|
+
probeHealthVersion: (port: number) => Promise<{ ok: boolean; version?: string } | null>;
|
|
79
89
|
/** TCP connect-probe for readiness polling (reuses `defaultPortListening`). */
|
|
80
90
|
portListening: PortListeningFn;
|
|
81
91
|
/** Sleep between readiness polls (tests pin to 0). */
|
|
@@ -98,9 +108,41 @@ async function defaultProbeHealth(port: number): Promise<boolean> {
|
|
|
98
108
|
}
|
|
99
109
|
}
|
|
100
110
|
|
|
111
|
+
/**
|
|
112
|
+
* Default version-aware `/health` probe (#590). Reads the JSON body and pulls
|
|
113
|
+
* out the `version` field. Returns `null` on any network error / timeout (the
|
|
114
|
+
* hub isn't answering); `{ ok, version }` otherwise — `version` is `undefined`
|
|
115
|
+
* when the body has no string `version` field (a very old hub, or a non-JSON
|
|
116
|
+
* body), which the caller treats as a mismatch. 1.5s timeout, mirroring
|
|
117
|
+
* {@link defaultProbeHealth}.
|
|
118
|
+
*/
|
|
119
|
+
async function defaultProbeHealthVersion(
|
|
120
|
+
port: number,
|
|
121
|
+
): Promise<{ ok: boolean; version?: string } | null> {
|
|
122
|
+
try {
|
|
123
|
+
const res = await fetch(`http://127.0.0.1:${port}/health`, {
|
|
124
|
+
signal: AbortSignal.timeout(1500),
|
|
125
|
+
});
|
|
126
|
+
let version: string | undefined;
|
|
127
|
+
try {
|
|
128
|
+
const body = (await res.json()) as unknown;
|
|
129
|
+
if (body && typeof body === "object" && "version" in body) {
|
|
130
|
+
const v = (body as { version?: unknown }).version;
|
|
131
|
+
if (typeof v === "string" && v.length > 0) version = v;
|
|
132
|
+
}
|
|
133
|
+
} catch {
|
|
134
|
+
// Non-JSON body → no version. Leave `version` undefined (→ mismatch).
|
|
135
|
+
}
|
|
136
|
+
return version !== undefined ? { ok: res.ok, version } : { ok: res.ok };
|
|
137
|
+
} catch {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
101
142
|
export const defaultHubUnitDeps: HubUnitDeps = {
|
|
102
143
|
...defaultManagedUnitDeps,
|
|
103
144
|
probeHealth: defaultProbeHealth,
|
|
145
|
+
probeHealthVersion: defaultProbeHealthVersion,
|
|
104
146
|
portListening: defaultPortListening,
|
|
105
147
|
sleep: (ms) => new Promise((r) => setTimeout(r, ms)),
|
|
106
148
|
};
|
|
@@ -158,6 +200,16 @@ export function isHubUnitInstalled(deps: HubUnitDeps): boolean {
|
|
|
158
200
|
* Is a service manager (systemd / launchd) available on this platform at all?
|
|
159
201
|
* macOS → launchctl; Linux → systemctl. A box with neither (a bare container,
|
|
160
202
|
* an init-less host) has no manager — the foreground-`serve`-only path (R19/D1).
|
|
203
|
+
*
|
|
204
|
+
* NOTE: production `deps.which` is `Bun.which`, which resolves against the
|
|
205
|
+
* process PATH. This ASSUMES `launchctl` (`/bin/launchctl`) / `systemctl` are on
|
|
206
|
+
* the PATH — true on any normal macOS / systemd box. A deliberately stripped
|
|
207
|
+
* PATH (a `nix develop` shell that omits `/bin`, a minimal CI image) would make
|
|
208
|
+
* `which` return null and misclassify a genuinely launchd-managed hub as
|
|
209
|
+
* not-unit-managed. The #590 version-check then degrades to the "stop it
|
|
210
|
+
* yourself" path rather than restarting the unit — a safe (never-kill)
|
|
211
|
+
* degradation, but worth knowing if a dev sees not-unit-managed on a box that
|
|
212
|
+
* clearly runs the hub under launchd.
|
|
161
213
|
*/
|
|
162
214
|
export function hasServiceManager(deps: HubUnitDeps): boolean {
|
|
163
215
|
if (deps.platform === "darwin") return deps.which("launchctl") !== null;
|
|
@@ -342,6 +394,209 @@ export function restartHubUnit(deps: HubUnitDeps): HubUnitManagerOpResult {
|
|
|
342
394
|
return { outcome: "ok", messages: [] };
|
|
343
395
|
}
|
|
344
396
|
|
|
397
|
+
/**
|
|
398
|
+
* Outcome of {@link ensureHubVersionMatches} (#590).
|
|
399
|
+
*/
|
|
400
|
+
export type HubVersionOutcome =
|
|
401
|
+
/** The running hub's version matched the installed version — no action. */
|
|
402
|
+
| "match"
|
|
403
|
+
/** Hub wasn't answering `/health` at all — nothing to compare (no-op). */
|
|
404
|
+
| "not-running"
|
|
405
|
+
/**
|
|
406
|
+
* Versions mismatched, the hub is unit-managed, the unit was restarted, and
|
|
407
|
+
* the running version now matches the installed version. The zombie was
|
|
408
|
+
* cleared.
|
|
409
|
+
*/
|
|
410
|
+
| "restarted"
|
|
411
|
+
/**
|
|
412
|
+
* Versions mismatched and the hub is unit-managed, but after the (single)
|
|
413
|
+
* restart the running version STILL doesn't match — e.g. a bun-linked
|
|
414
|
+
* checkout on a feature branch whose package.json version trails the running
|
|
415
|
+
* code, or a restart that adopted yet-another stale build. We restart at most
|
|
416
|
+
* once and then continue rather than loop (the restart-loop guard).
|
|
417
|
+
*/
|
|
418
|
+
| "still-mismatched"
|
|
419
|
+
/**
|
|
420
|
+
* Versions mismatched but the running hub is NOT unit-managed (a legacy
|
|
421
|
+
* detached pid, or a dev `bun run serve` in a terminal, or no service
|
|
422
|
+
* manager at all). We do NOT kill it blindly — we surface the mismatch +
|
|
423
|
+
* an actionable message and stop.
|
|
424
|
+
*/
|
|
425
|
+
| "not-unit-managed"
|
|
426
|
+
/**
|
|
427
|
+
* Versions mismatched, the hub is unit-managed, but the restart command
|
|
428
|
+
* itself failed (the manager rejected it). Surface the manager's error.
|
|
429
|
+
*/
|
|
430
|
+
| "restart-failed";
|
|
431
|
+
|
|
432
|
+
export interface EnsureHubVersionMatchesResult {
|
|
433
|
+
outcome: HubVersionOutcome;
|
|
434
|
+
/** The running hub's reported version (undefined when it had no version field / wasn't running). */
|
|
435
|
+
runningVersion?: string;
|
|
436
|
+
/** The installed package version we compared against. */
|
|
437
|
+
installedVersion: string;
|
|
438
|
+
/** Human-readable lines the caller should surface (mismatch notice, actionable hints). */
|
|
439
|
+
messages: string[];
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
export interface EnsureHubVersionMatchesOpts {
|
|
443
|
+
/** The installed package version (the caller reads its own `package.json`). */
|
|
444
|
+
installedVersion: string;
|
|
445
|
+
/** Hub port to probe (default 1939). */
|
|
446
|
+
port?: number;
|
|
447
|
+
/** Injectable deps (defaults to production). */
|
|
448
|
+
deps?: HubUnitDeps;
|
|
449
|
+
/** Readiness budget after a restart, in ms (default 15s). */
|
|
450
|
+
readyTimeoutMs?: number;
|
|
451
|
+
/** Poll interval for the post-restart re-probe, in ms (default 250). */
|
|
452
|
+
readyPollMs?: number;
|
|
453
|
+
log?: (line: string) => void;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Version-check-and-restart at a hub adoption point (#590).
|
|
458
|
+
*
|
|
459
|
+
* The field bug: a freshly-installed hub (e.g. 0.6.4-rc.9) adopts an
|
|
460
|
+
* arbitrarily-stale RUNNING hub (0.5.14-rc.4) merely because it answers
|
|
461
|
+
* `/health` on 1939 — a zombie LaunchAgent survives `rm -rf ~/.parachute`, and
|
|
462
|
+
* everything downstream (tunnel, wizard, vault install) then binds to month-old
|
|
463
|
+
* code running against a directory deleted out from under it.
|
|
464
|
+
*
|
|
465
|
+
* This helper closes that edge. Given the INSTALLED package version (the caller
|
|
466
|
+
* reads its own `package.json` at runtime), it:
|
|
467
|
+
* 1. Probes `/health` for the RUNNING version. Not answering → `not-running`
|
|
468
|
+
* (nothing to adopt; the caller's bringup path handles starting it).
|
|
469
|
+
* 2. Version matches → `match` (today's behavior, no extra restart).
|
|
470
|
+
* 3. Version mismatches (INCLUDING a hub with no `version` field — a very old
|
|
471
|
+
* hub — which reads as "undefined ≠ installed"):
|
|
472
|
+
* a. If the running hub is NOT unit-managed (no manager / no unit
|
|
473
|
+
* installed) → `not-unit-managed`. We do NOT kill it blindly: a
|
|
474
|
+
* detached legacy pid or a dev `bun run serve` may be the operator's,
|
|
475
|
+
* and KeepAlive-less processes aren't ours to reap. Surface an
|
|
476
|
+
* actionable message and stop.
|
|
477
|
+
* b. If it IS unit-managed → restart the unit ONCE
|
|
478
|
+
* ({@link restartHubUnit}), then re-probe `/health` until the version
|
|
479
|
+
* matches or the timeout elapses:
|
|
480
|
+
* - now matches → `restarted` (zombie cleared).
|
|
481
|
+
* - still mismatched → `still-mismatched` (restart-loop guard: we
|
|
482
|
+
* restart at most once; a bun-linked branch checkout whose
|
|
483
|
+
* package.json trails the code stays here — warn + continue, do
|
|
484
|
+
* not loop).
|
|
485
|
+
* - restart command failed → `restart-failed`.
|
|
486
|
+
*
|
|
487
|
+
* The CALLER decides whether a given outcome is fatal. `init` and the expose
|
|
488
|
+
* chains both want: `match`/`not-running`/`restarted` → continue silently-ish;
|
|
489
|
+
* `not-unit-managed`/`still-mismatched`/`restart-failed` → warn loudly (and, for
|
|
490
|
+
* init, optionally bail) so a brand-new tunnel never wires to a zombie.
|
|
491
|
+
*
|
|
492
|
+
* Everything is behind the {@link HubUnitDeps} seam — no real launchctl /
|
|
493
|
+
* systemctl / HTTP call in tests.
|
|
494
|
+
*/
|
|
495
|
+
export async function ensureHubVersionMatches(
|
|
496
|
+
opts: EnsureHubVersionMatchesOpts,
|
|
497
|
+
): Promise<EnsureHubVersionMatchesResult> {
|
|
498
|
+
const deps = opts.deps ?? defaultHubUnitDeps;
|
|
499
|
+
const port = opts.port ?? HUB_UNIT_DEFAULT_PORT;
|
|
500
|
+
const installedVersion = opts.installedVersion;
|
|
501
|
+
const readyTimeoutMs = opts.readyTimeoutMs ?? 15_000;
|
|
502
|
+
const readyPollMs = opts.readyPollMs ?? 250;
|
|
503
|
+
const log = opts.log ?? (() => {});
|
|
504
|
+
|
|
505
|
+
const probe = await deps.probeHealthVersion(port);
|
|
506
|
+
if (probe === null) {
|
|
507
|
+
// Hub isn't answering — nothing to compare. The caller's bringup path owns
|
|
508
|
+
// starting it; this helper is a no-op here.
|
|
509
|
+
return { outcome: "not-running", installedVersion, messages: [] };
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
const runningVersion = probe.version;
|
|
513
|
+
if (runningVersion === installedVersion) {
|
|
514
|
+
// Exactly today's behavior — versions agree, no extra restart.
|
|
515
|
+
return { outcome: "match", runningVersion, installedVersion, messages: [] };
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// Mismatch (includes the no-`version`-field very-old-hub case → undefined).
|
|
519
|
+
const runningLabel = runningVersion ?? "an older version (no version field)";
|
|
520
|
+
|
|
521
|
+
// Is this hub one we can restart through the manager? If there's no manager,
|
|
522
|
+
// or no unit installed, the running hub is a legacy detached pid / a dev
|
|
523
|
+
// foreground `serve` — NOT ours to reap. Surface + stop (do not kill blindly).
|
|
524
|
+
if (!hasServiceManager(deps) || !isHubUnitInstalled(deps)) {
|
|
525
|
+
return {
|
|
526
|
+
outcome: "not-unit-managed",
|
|
527
|
+
runningVersion,
|
|
528
|
+
installedVersion,
|
|
529
|
+
messages: [
|
|
530
|
+
`⚠ the running hub is ${runningLabel} but ${installedVersion} is installed.`,
|
|
531
|
+
" The running hub is NOT managed by a Parachute service unit (a detached process or a foreground `parachute serve` / `bun src/cli.ts serve`), so it won't be restarted automatically.",
|
|
532
|
+
` Stop it yourself (find it with \`lsof -ti :${port}\` then \`kill <pid>\`, or quit the foreground \`parachute serve\` / \`bun src/cli.ts serve\` on a dev checkout), then re-run so the new code is adopted.`,
|
|
533
|
+
],
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Unit-managed mismatch: restart the unit ONCE to pick up the new code.
|
|
538
|
+
log(
|
|
539
|
+
`⚠ the running hub is ${runningLabel} but ${installedVersion} is installed — restarting the hub unit to pick up the new code.`,
|
|
540
|
+
);
|
|
541
|
+
const restart = restartHubUnit(deps);
|
|
542
|
+
if (restart.outcome !== "ok") {
|
|
543
|
+
return {
|
|
544
|
+
outcome: "restart-failed",
|
|
545
|
+
runningVersion,
|
|
546
|
+
installedVersion,
|
|
547
|
+
messages: [
|
|
548
|
+
`⚠ the running hub is ${runningLabel} but ${installedVersion} is installed, and the hub unit restart failed.`,
|
|
549
|
+
...restart.messages,
|
|
550
|
+
],
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Builders for the two terminal outcomes of the post-restart re-probe loop.
|
|
555
|
+
const restartedResult = (v: string): EnsureHubVersionMatchesResult => ({
|
|
556
|
+
outcome: "restarted",
|
|
557
|
+
runningVersion: v,
|
|
558
|
+
installedVersion,
|
|
559
|
+
messages: [`✓ hub unit restarted; now running ${installedVersion}.`],
|
|
560
|
+
});
|
|
561
|
+
const stillMismatchedResult = (last: string | undefined): EnsureHubVersionMatchesResult => {
|
|
562
|
+
const reports = last ? ` (reports ${last})` : "";
|
|
563
|
+
return {
|
|
564
|
+
outcome: "still-mismatched",
|
|
565
|
+
...(last !== undefined ? { runningVersion: last } : {}),
|
|
566
|
+
installedVersion,
|
|
567
|
+
messages: [
|
|
568
|
+
`⚠ restarted the hub unit, but it is still not reporting ${installedVersion}${reports}.`,
|
|
569
|
+
" This can happen with a bun-linked checkout on a feature branch whose package.json version trails the running code.",
|
|
570
|
+
` Continuing — verify with \`parachute status\` / \`curl http://127.0.0.1:${port}/health\` if the hub should be on a specific version.`,
|
|
571
|
+
],
|
|
572
|
+
};
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
// Re-probe `/health` until the running version matches the installed version
|
|
576
|
+
// or the readiness budget elapses. Restart-loop guard: we restart AT MOST
|
|
577
|
+
// once — if it still mismatches after this single restart (e.g. a bun-linked
|
|
578
|
+
// checkout on a branch), we warn + continue rather than looping.
|
|
579
|
+
const deadline = Date.now() + readyTimeoutMs;
|
|
580
|
+
for (;;) {
|
|
581
|
+
const after = await deps.probeHealthVersion(port);
|
|
582
|
+
if (after !== null && after.version === installedVersion) {
|
|
583
|
+
return restartedResult(installedVersion);
|
|
584
|
+
}
|
|
585
|
+
if (Date.now() >= deadline) {
|
|
586
|
+
// Report the last-observed (still-stale) version if the hub came back.
|
|
587
|
+
return stillMismatchedResult(after?.version ?? runningVersion);
|
|
588
|
+
}
|
|
589
|
+
if (readyPollMs > 0) await deps.sleep(readyPollMs);
|
|
590
|
+
else break;
|
|
591
|
+
}
|
|
592
|
+
// readyPollMs === 0 fast-path: one more probe, then settle.
|
|
593
|
+
const finalProbe = await deps.probeHealthVersion(port);
|
|
594
|
+
if (finalProbe !== null && finalProbe.version === installedVersion) {
|
|
595
|
+
return restartedResult(installedVersion);
|
|
596
|
+
}
|
|
597
|
+
return stillMismatchedResult(finalProbe?.version ?? runningVersion);
|
|
598
|
+
}
|
|
599
|
+
|
|
345
600
|
/**
|
|
346
601
|
* Run-state of the hub UNIT as reported by the platform manager (design §6.4).
|
|
347
602
|
* This is the manager's view — NOT a liveness verdict. The hub answering
|