@openparachute/hub 0.6.4 → 0.6.5-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/cloudflare-tunnel.test.ts +78 -0
- package/src/__tests__/expose-cloudflare.test.ts +253 -0
- package/src/__tests__/hub-db-liveness.test.ts +139 -0
- package/src/__tests__/hub-server.test.ts +145 -6
- package/src/__tests__/hub-unit.test.ts +110 -1
- package/src/cloudflare/tunnel.ts +70 -0
- package/src/commands/expose-cloudflare.ts +157 -2
- package/src/commands/serve.ts +14 -4
- package/src/hub-db-liveness.ts +211 -0
- package/src/hub-server.ts +1175 -1104
- package/src/hub-unit.ts +74 -27
package/src/hub-unit.ts
CHANGED
|
@@ -85,7 +85,9 @@ export interface HubUnitDeps extends ManagedUnitDeps {
|
|
|
85
85
|
* `null` when the hub doesn't answer at all (connection-refused / timeout).
|
|
86
86
|
* Production uses a bounded `fetch`; tests inject a deterministic stub.
|
|
87
87
|
*/
|
|
88
|
-
probeHealthVersion: (
|
|
88
|
+
probeHealthVersion: (
|
|
89
|
+
port: number,
|
|
90
|
+
) => Promise<{ ok: boolean; version?: string; db?: string } | null>;
|
|
89
91
|
/** TCP connect-probe for readiness polling (reuses `defaultPortListening`). */
|
|
90
92
|
portListening: PortListeningFn;
|
|
91
93
|
/** Sleep between readiness polls (tests pin to 0). */
|
|
@@ -118,27 +120,48 @@ async function defaultProbeHealth(port: number): Promise<boolean> {
|
|
|
118
120
|
*/
|
|
119
121
|
async function defaultProbeHealthVersion(
|
|
120
122
|
port: number,
|
|
121
|
-
): Promise<{ ok: boolean; version?: string } | null> {
|
|
123
|
+
): Promise<{ ok: boolean; version?: string; db?: string } | null> {
|
|
122
124
|
try {
|
|
123
125
|
const res = await fetch(`http://127.0.0.1:${port}/health`, {
|
|
124
126
|
signal: AbortSignal.timeout(1500),
|
|
125
127
|
});
|
|
126
128
|
let version: string | undefined;
|
|
129
|
+
let db: string | undefined;
|
|
127
130
|
try {
|
|
128
131
|
const body = (await res.json()) as unknown;
|
|
129
|
-
if (body && typeof body === "object"
|
|
132
|
+
if (body && typeof body === "object") {
|
|
130
133
|
const v = (body as { version?: unknown }).version;
|
|
131
134
|
if (typeof v === "string" && v.length > 0) version = v;
|
|
135
|
+
// `db` liveness verdict (#594): "ok" / "error: <class>" / "unconfigured".
|
|
136
|
+
// Threaded through so the adoption probe can treat a db-error hub as
|
|
137
|
+
// needing a restart even when its version matches.
|
|
138
|
+
const d = (body as { db?: unknown }).db;
|
|
139
|
+
if (typeof d === "string" && d.length > 0) db = d;
|
|
132
140
|
}
|
|
133
141
|
} catch {
|
|
134
|
-
// Non-JSON body → no version. Leave
|
|
142
|
+
// Non-JSON body → no version/db. Leave undefined (→ mismatch / unknown db).
|
|
135
143
|
}
|
|
136
|
-
|
|
144
|
+
const out: { ok: boolean; version?: string; db?: string } = { ok: res.ok };
|
|
145
|
+
if (version !== undefined) out.version = version;
|
|
146
|
+
if (db !== undefined) out.db = db;
|
|
147
|
+
return out;
|
|
137
148
|
} catch {
|
|
138
149
|
return null;
|
|
139
150
|
}
|
|
140
151
|
}
|
|
141
152
|
|
|
153
|
+
/**
|
|
154
|
+
* True when a `/health` `db` field reports a non-recoverable liveness fault
|
|
155
|
+
* (#594) — anything starting with "error:" (e.g. "error: fatal" from the
|
|
156
|
+
* dead-handle field repro). "ok" and "unconfigured" are not faults: a
|
|
157
|
+
* pre-wizard hub with no DB rows still reports a working handle. A missing
|
|
158
|
+
* `db` field (an older hub that predates #594) reads as "unknown → don't
|
|
159
|
+
* treat as a fault" so we never restart a hub merely for lacking the field.
|
|
160
|
+
*/
|
|
161
|
+
function healthReportsDbFault(db: string | undefined): boolean {
|
|
162
|
+
return typeof db === "string" && db.startsWith("error:");
|
|
163
|
+
}
|
|
164
|
+
|
|
142
165
|
export const defaultHubUnitDeps: HubUnitDeps = {
|
|
143
166
|
...defaultManagedUnitDeps,
|
|
144
167
|
probeHealth: defaultProbeHealth,
|
|
@@ -510,13 +533,22 @@ export async function ensureHubVersionMatches(
|
|
|
510
533
|
}
|
|
511
534
|
|
|
512
535
|
const runningVersion = probe.version;
|
|
513
|
-
|
|
514
|
-
|
|
536
|
+
const dbFault = healthReportsDbFault(probe.db);
|
|
537
|
+
if (runningVersion === installedVersion && !dbFault) {
|
|
538
|
+
// Versions agree AND the DB handle is live — today's behavior, no restart.
|
|
515
539
|
return { outcome: "match", runningVersion, installedVersion, messages: [] };
|
|
516
540
|
}
|
|
517
541
|
|
|
518
|
-
//
|
|
519
|
-
|
|
542
|
+
// From here we know the running hub needs a restart: EITHER its version is
|
|
543
|
+
// stale (the #590 zombie-adoption case) OR it's reporting a dead DB handle
|
|
544
|
+
// (#594 — a hub that adopted-as-version-match but whose state dir was deleted
|
|
545
|
+
// under it; /health stays 200 while every DB route 500s). Both run through
|
|
546
|
+
// the same restart-once machinery. `runningLabel` describes whichever fault
|
|
547
|
+
// we're acting on so the operator sees an accurate reason.
|
|
548
|
+
const versionMismatch = runningVersion !== installedVersion;
|
|
549
|
+
const runningLabel = versionMismatch
|
|
550
|
+
? (runningVersion ?? "an older version (no version field)")
|
|
551
|
+
: `${runningVersion} with a dead database handle (${probe.db})`;
|
|
520
552
|
|
|
521
553
|
// Is this hub one we can restart through the manager? If there's no manager,
|
|
522
554
|
// or no unit installed, the running hub is a legacy detached pid / a dev
|
|
@@ -556,45 +588,60 @@ export async function ensureHubVersionMatches(
|
|
|
556
588
|
outcome: "restarted",
|
|
557
589
|
runningVersion: v,
|
|
558
590
|
installedVersion,
|
|
559
|
-
messages: [`✓ hub unit restarted; now running ${installedVersion}.`],
|
|
591
|
+
messages: [`✓ hub unit restarted; now running ${installedVersion} with a live database.`],
|
|
560
592
|
});
|
|
561
|
-
const stillMismatchedResult = (
|
|
562
|
-
|
|
593
|
+
const stillMismatchedResult = (
|
|
594
|
+
last: { version?: string; db?: string } | undefined,
|
|
595
|
+
): EnsureHubVersionMatchesResult => {
|
|
596
|
+
const lastVersion = last?.version;
|
|
597
|
+
const reports = lastVersion ? ` (reports ${lastVersion})` : "";
|
|
598
|
+
const dbStillBad = healthReportsDbFault(last?.db);
|
|
563
599
|
return {
|
|
564
600
|
outcome: "still-mismatched",
|
|
565
|
-
...(
|
|
601
|
+
...(lastVersion !== undefined ? { runningVersion: lastVersion } : {}),
|
|
566
602
|
installedVersion,
|
|
567
|
-
messages:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
603
|
+
messages: dbStillBad
|
|
604
|
+
? [
|
|
605
|
+
`⚠ restarted the hub unit, but its database still reports a fault (${last?.db}).`,
|
|
606
|
+
" The state directory may still be missing or the database file corrupted.",
|
|
607
|
+
` Check it with \`curl http://127.0.0.1:${port}/health\` and ensure ~/.parachute exists.`,
|
|
608
|
+
]
|
|
609
|
+
: [
|
|
610
|
+
`⚠ restarted the hub unit, but it is still not reporting ${installedVersion}${reports}.`,
|
|
611
|
+
" This can happen with a bun-linked checkout on a feature branch whose package.json version trails the running code.",
|
|
612
|
+
` Continuing — verify with \`parachute status\` / \`curl http://127.0.0.1:${port}/health\` if the hub should be on a specific version.`,
|
|
613
|
+
],
|
|
572
614
|
};
|
|
573
615
|
};
|
|
574
616
|
|
|
575
|
-
//
|
|
576
|
-
//
|
|
577
|
-
//
|
|
578
|
-
|
|
617
|
+
// A re-probe counts as "healed" only when the version matches AND the DB
|
|
618
|
+
// handle is live — a restart that came back on the right version but with a
|
|
619
|
+
// still-dead handle hasn't actually fixed the #594 fault.
|
|
620
|
+
const probeHealed = (p: { version?: string; db?: string } | null): boolean =>
|
|
621
|
+
p !== null && p.version === installedVersion && !healthReportsDbFault(p.db);
|
|
622
|
+
|
|
623
|
+
// Re-probe `/health` until the hub is healed or the readiness budget elapses.
|
|
624
|
+
// Restart-loop guard: we restart AT MOST once — if it still mismatches /
|
|
625
|
+
// db-faults after this single restart (e.g. a bun-linked checkout on a
|
|
626
|
+
// branch, or a still-missing state dir), we warn + continue rather than loop.
|
|
579
627
|
const deadline = Date.now() + readyTimeoutMs;
|
|
580
628
|
for (;;) {
|
|
581
629
|
const after = await deps.probeHealthVersion(port);
|
|
582
|
-
if (after
|
|
630
|
+
if (probeHealed(after)) {
|
|
583
631
|
return restartedResult(installedVersion);
|
|
584
632
|
}
|
|
585
633
|
if (Date.now() >= deadline) {
|
|
586
|
-
|
|
587
|
-
return stillMismatchedResult(after?.version ?? runningVersion);
|
|
634
|
+
return stillMismatchedResult(after ?? { version: runningVersion });
|
|
588
635
|
}
|
|
589
636
|
if (readyPollMs > 0) await deps.sleep(readyPollMs);
|
|
590
637
|
else break;
|
|
591
638
|
}
|
|
592
639
|
// readyPollMs === 0 fast-path: one more probe, then settle.
|
|
593
640
|
const finalProbe = await deps.probeHealthVersion(port);
|
|
594
|
-
if (finalProbe
|
|
641
|
+
if (probeHealed(finalProbe)) {
|
|
595
642
|
return restartedResult(installedVersion);
|
|
596
643
|
}
|
|
597
|
-
return stillMismatchedResult(finalProbe
|
|
644
|
+
return stillMismatchedResult(finalProbe ?? { version: runningVersion });
|
|
598
645
|
}
|
|
599
646
|
|
|
600
647
|
/**
|