@openparachute/hub 0.5.14-rc.2 → 0.5.14-rc.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -15
- package/package.json +7 -3
- package/src/__tests__/account-home-ui.test.ts +251 -15
- package/src/__tests__/account-vault-token.test.ts +355 -0
- package/src/__tests__/admin-vaults.test.ts +70 -4
- package/src/__tests__/api-mint-token.test.ts +693 -5
- package/src/__tests__/api-modules-config.test.ts +16 -10
- package/src/__tests__/api-modules-ops.test.ts +45 -0
- package/src/__tests__/api-modules.test.ts +92 -75
- package/src/__tests__/api-ready.test.ts +135 -0
- package/src/__tests__/api-revoke-token.test.ts +384 -0
- package/src/__tests__/api-users.test.ts +7 -2
- package/src/__tests__/auth.test.ts +157 -30
- package/src/__tests__/cli.test.ts +44 -5
- package/src/__tests__/cloudflare-detect.test.ts +60 -5
- package/src/__tests__/expose-2fa-warning.test.ts +31 -17
- package/src/__tests__/expose-auth-preflight.test.ts +71 -72
- package/src/__tests__/expose-cloudflare.test.ts +582 -11
- package/src/__tests__/expose-interactive.test.ts +10 -4
- package/src/__tests__/expose-public-auto.test.ts +5 -1
- package/src/__tests__/expose.test.ts +52 -2
- package/src/__tests__/hub-server.test.ts +396 -10
- package/src/__tests__/hub.test.ts +85 -6
- package/src/__tests__/init.test.ts +928 -0
- package/src/__tests__/lifecycle.test.ts +464 -2
- package/src/__tests__/migrate.test.ts +433 -51
- package/src/__tests__/oauth-handlers.test.ts +1252 -83
- package/src/__tests__/oauth-ui.test.ts +12 -1
- package/src/__tests__/operator-token-issuer-self-heal.test.ts +412 -0
- package/src/__tests__/proxy-error-ui.test.ts +212 -0
- package/src/__tests__/proxy-state.test.ts +192 -0
- package/src/__tests__/resource-binding.test.ts +97 -0
- package/src/__tests__/scope-explanations.test.ts +77 -12
- package/src/__tests__/services-manifest.test.ts +122 -4
- package/src/__tests__/setup-wizard.test.ts +633 -53
- package/src/__tests__/status.test.ts +36 -0
- package/src/__tests__/two-factor-flow.test.ts +602 -0
- package/src/__tests__/two-factor.test.ts +183 -0
- package/src/__tests__/upgrade.test.ts +78 -1
- package/src/__tests__/users.test.ts +68 -0
- package/src/__tests__/vault-auth-status.test.ts +312 -11
- package/src/__tests__/vault-hub-origin-env.test.ts +263 -0
- package/src/__tests__/wizard.test.ts +372 -0
- package/src/account-home-ui.ts +488 -38
- package/src/account-vault-token.ts +282 -0
- package/src/admin-handlers.ts +159 -4
- package/src/admin-login-ui.ts +49 -5
- package/src/admin-vaults.ts +48 -15
- package/src/api-account.ts +14 -0
- package/src/api-mint-token.ts +132 -24
- package/src/api-modules-ops.ts +49 -11
- package/src/api-modules.ts +29 -12
- package/src/api-ready.ts +102 -0
- package/src/api-revoke-token.ts +107 -21
- package/src/api-users.ts +29 -3
- package/src/cli.ts +112 -25
- package/src/clients.ts +18 -6
- package/src/cloudflare/config.ts +10 -4
- package/src/cloudflare/detect.ts +82 -20
- package/src/commands/auth.ts +165 -24
- package/src/commands/expose-2fa-warning.ts +34 -32
- package/src/commands/expose-auth-preflight.ts +89 -78
- package/src/commands/expose-cloudflare.ts +471 -16
- package/src/commands/expose-interactive.ts +10 -11
- package/src/commands/expose-public-auto.ts +6 -4
- package/src/commands/expose.ts +8 -0
- package/src/commands/init.ts +594 -0
- package/src/commands/install.ts +33 -2
- package/src/commands/lifecycle.ts +386 -17
- package/src/commands/migrate.ts +293 -41
- package/src/commands/status.ts +22 -0
- package/src/commands/upgrade.ts +55 -11
- package/src/commands/wizard.ts +847 -0
- package/src/env-file.ts +10 -0
- package/src/help.ts +157 -15
- package/src/hub-db.ts +39 -1
- package/src/hub-server.ts +119 -13
- package/src/hub-settings.ts +11 -0
- package/src/hub.ts +82 -14
- package/src/oauth-handlers.ts +298 -21
- package/src/oauth-ui.ts +10 -0
- package/src/operator-token.ts +151 -0
- package/src/pending-login.ts +116 -0
- package/src/proxy-error-ui.ts +506 -0
- package/src/proxy-state.ts +131 -0
- package/src/rate-limit.ts +51 -0
- package/src/resource-binding.ts +134 -0
- package/src/scope-attenuation.ts +85 -0
- package/src/scope-explanations.ts +131 -14
- package/src/services-manifest.ts +112 -0
- package/src/setup-wizard.ts +738 -125
- package/src/tailscale/run.ts +28 -11
- package/src/totp.ts +201 -0
- package/src/two-factor-handlers.ts +287 -0
- package/src/two-factor-store.ts +181 -0
- package/src/two-factor-ui.ts +462 -0
- package/src/users.ts +58 -0
- package/src/vault/auth-status.ts +200 -25
- package/src/vault-hub-origin-env.ts +163 -0
- package/web/ui/dist/assets/index-BiBlvEaj.css +1 -0
- package/web/ui/dist/assets/index-CIN3mnmf.js +61 -0
- package/web/ui/dist/index.html +2 -2
- package/src/__tests__/vault-tokens-create-interactive.test.ts +0 -183
- package/src/commands/vault-tokens-create-interactive.ts +0 -143
- package/web/ui/dist/assets/index-7DtAXz7y.css +0 -1
- package/web/ui/dist/assets/index-tRmPbbC7.js +0 -61
package/src/commands/install.ts
CHANGED
|
@@ -190,6 +190,31 @@ export interface InstallOpts {
|
|
|
190
190
|
* (#45) to pre-collect the answer up front. Ignored for non-vault installs.
|
|
191
191
|
*/
|
|
192
192
|
vaultName?: string;
|
|
193
|
+
/**
|
|
194
|
+
* "Install the module, but don't create a first vault instance" (hub#168 — the
|
|
195
|
+
* wizard-parity work for Aaron's 2026-05-28 directive: "always install the
|
|
196
|
+
* vault module, but creating a vault should be optional").
|
|
197
|
+
*
|
|
198
|
+
* Default: false (today's behavior — install runs the service's `init` and
|
|
199
|
+
* starts the daemon, which for vault auto-creates a `default` row).
|
|
200
|
+
*
|
|
201
|
+
* When true:
|
|
202
|
+
* - The `bun add -g <pkg>` step still runs (puts the binary on PATH).
|
|
203
|
+
* - `spec.init` is SKIPPED. For vault this means no `parachute-vault init`
|
|
204
|
+
* → no default-vault row is created from this code path.
|
|
205
|
+
* - `lifecycle.start` is SKIPPED. The supervisor/wizard owns spawning;
|
|
206
|
+
* starting vault here would trigger its server-side auto-init (which
|
|
207
|
+
* creates a `default` vault on first boot when `listVaults().length === 0`).
|
|
208
|
+
* - services.json is still seeded (`spec.seedEntry`) + installDir stamped
|
|
209
|
+
* so subsequent supervisor spawns find the module + module.json.
|
|
210
|
+
*
|
|
211
|
+
* Intended for `parachute init` — install the module so the wizard can offer
|
|
212
|
+
* Create/Import/Skip without a follow-up bun-add round-trip, but defer
|
|
213
|
+
* vault-instance creation to whichever path the wizard's vault step takes.
|
|
214
|
+
* On the existing CLI surfaces (`parachute install vault`, `parachute setup`),
|
|
215
|
+
* leave it false so today's behavior is unchanged.
|
|
216
|
+
*/
|
|
217
|
+
noCreate?: boolean;
|
|
193
218
|
/**
|
|
194
219
|
* `parachute install scribe` only: pre-pick the transcription provider so
|
|
195
220
|
* the prompt doesn't fire. Validated against scribe's known providers — an
|
|
@@ -708,7 +733,7 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
|
|
|
708
733
|
? spec.manifestName
|
|
709
734
|
: manifest.name;
|
|
710
735
|
|
|
711
|
-
if (spec.init) {
|
|
736
|
+
if (spec.init && !opts.noCreate) {
|
|
712
737
|
// Forward --vault-name from the InstallOpts when set so `parachute setup`
|
|
713
738
|
// (and any future programmatic caller) can pre-answer the name prompt.
|
|
714
739
|
const initCmd =
|
|
@@ -721,6 +746,8 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
|
|
|
721
746
|
log(`${initCmd.join(" ")} exited ${initCode}`);
|
|
722
747
|
return initCode;
|
|
723
748
|
}
|
|
749
|
+
} else if (spec.init && opts.noCreate) {
|
|
750
|
+
log(`(skipping ${spec.init.join(" ")} — --no-create: module installed, no instance created)`);
|
|
724
751
|
}
|
|
725
752
|
|
|
726
753
|
// Hub-as-port-authority (#53): pick the service's port now and reflect it
|
|
@@ -849,7 +876,11 @@ export async function install(input: string, opts: InstallOpts = {}): Promise<nu
|
|
|
849
876
|
// wondering why nothing happened. Always end with the daemon running unless
|
|
850
877
|
// the caller opted out (CI / piped scripts). Idempotent: if the service is
|
|
851
878
|
// already up, lifecycle.start no-ops via the existing PID-file check.
|
|
852
|
-
|
|
879
|
+
//
|
|
880
|
+
// `noCreate` (hub#168) also suppresses auto-start: starting vault would
|
|
881
|
+
// trigger its server-side first-boot auto-init (creating a default vault),
|
|
882
|
+
// which is exactly what --no-create is supposed to defer.
|
|
883
|
+
if (!opts.noStart && !opts.noCreate) {
|
|
853
884
|
const startService =
|
|
854
885
|
opts.startService ??
|
|
855
886
|
((short: string) => lifecycleStart(short, { manifestPath, configDir, log }));
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
import { existsSync, openSync } from "node:fs";
|
|
1
|
+
import { existsSync, openSync, readFileSync } from "node:fs";
|
|
2
|
+
import { Socket } from "node:net";
|
|
2
3
|
import { join } from "node:path";
|
|
4
|
+
import {
|
|
5
|
+
MissingDependencyError,
|
|
6
|
+
ensureExecutable,
|
|
7
|
+
rethrowIfMissing,
|
|
8
|
+
} from "@openparachute/depcheck";
|
|
3
9
|
import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
|
|
4
10
|
import { readEnvFileValues } from "../env-file.ts";
|
|
5
11
|
import { readExposeState } from "../expose-state.ts";
|
|
@@ -12,8 +18,10 @@ import {
|
|
|
12
18
|
readHubPort,
|
|
13
19
|
stopHub,
|
|
14
20
|
} from "../hub-control.ts";
|
|
21
|
+
import { hubDbPath, openHubDb } from "../hub-db.ts";
|
|
15
22
|
import { HUB_ORIGIN_ENV, deriveHubOrigin } from "../hub-origin.ts";
|
|
16
23
|
import { ModuleManifestError, readModuleManifest } from "../module-manifest.ts";
|
|
24
|
+
import { type OperatorIssuerHealStatus, selfHealOperatorTokenIssuer } from "../operator-token.ts";
|
|
17
25
|
import {
|
|
18
26
|
type AliveFn,
|
|
19
27
|
clearPid,
|
|
@@ -32,7 +40,13 @@ import {
|
|
|
32
40
|
knownServices,
|
|
33
41
|
shortNameForManifest,
|
|
34
42
|
} from "../service-spec.ts";
|
|
35
|
-
import {
|
|
43
|
+
import {
|
|
44
|
+
type ServiceEntry,
|
|
45
|
+
clearStartError,
|
|
46
|
+
readManifest,
|
|
47
|
+
recordStartError,
|
|
48
|
+
} from "../services-manifest.ts";
|
|
49
|
+
import { persistVaultHubOrigin, selfHealVaultHubOrigin } from "../vault-hub-origin-env.ts";
|
|
36
50
|
|
|
37
51
|
/**
|
|
38
52
|
* Tiny seam over `Bun.spawn` for lifecycle tests. The real spawner opens the
|
|
@@ -83,6 +97,44 @@ export const defaultSpawner: Spawner = {
|
|
|
83
97
|
export type KillFn = (pid: number, signal: NodeJS.Signals | number) => void;
|
|
84
98
|
export type SleepFn = (ms: number) => Promise<void>;
|
|
85
99
|
|
|
100
|
+
/**
|
|
101
|
+
* "Is something listening on this TCP port on loopback?" seam. Pairs with the
|
|
102
|
+
* spawn-then-die settle (hub#194) to catch the *other* silent-start failure
|
|
103
|
+
* shape (hub#487): a service that lives long enough to clear the liveness
|
|
104
|
+
* check but never binds its port because the port is already held (EADDRINUSE
|
|
105
|
+
* from an orphan). The recorded pid stays alive (vault's process supervisor
|
|
106
|
+
* retries / lingers) so `alive(pid)` says "running" while `parachute status`
|
|
107
|
+
* shows it inactive because nothing answers on the port.
|
|
108
|
+
*
|
|
109
|
+
* Tests inject a deterministic stub; production uses `defaultPortListening`.
|
|
110
|
+
*/
|
|
111
|
+
export type PortListeningFn = (port: number) => Promise<boolean>;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Connect-probe: open a TCP socket to 127.0.0.1:<port> and see if it's
|
|
115
|
+
* accepted. A successful connect means *something* is listening; we close
|
|
116
|
+
* immediately. Connection refused / timeout means nothing is bound yet.
|
|
117
|
+
* `node:net` rather than `Bun.connect` because the latter has no clean
|
|
118
|
+
* "connection refused → false" without a custom socket handler, and the net
|
|
119
|
+
* Socket's `error`/`connect` events map directly onto the boolean we want.
|
|
120
|
+
*/
|
|
121
|
+
export const defaultPortListening: PortListeningFn = (port) =>
|
|
122
|
+
new Promise((resolve) => {
|
|
123
|
+
const socket = new Socket();
|
|
124
|
+
let settled = false;
|
|
125
|
+
const done = (listening: boolean) => {
|
|
126
|
+
if (settled) return;
|
|
127
|
+
settled = true;
|
|
128
|
+
socket.destroy();
|
|
129
|
+
resolve(listening);
|
|
130
|
+
};
|
|
131
|
+
socket.setTimeout(1000);
|
|
132
|
+
socket.once("connect", () => done(true));
|
|
133
|
+
socket.once("timeout", () => done(false));
|
|
134
|
+
socket.once("error", () => done(false));
|
|
135
|
+
socket.connect(port, "127.0.0.1");
|
|
136
|
+
});
|
|
137
|
+
|
|
86
138
|
/**
|
|
87
139
|
* Group-aware liveness: returns true if the process group (pgid == pid)
|
|
88
140
|
* still has any member. Pairs with `defaultSpawner`'s `detached: true` —
|
|
@@ -129,6 +181,35 @@ export const defaultKill: KillFn = (pid, signal) => {
|
|
|
129
181
|
|
|
130
182
|
export const defaultSleep: SleepFn = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
131
183
|
|
|
184
|
+
/**
|
|
185
|
+
* Read the trailing `n` lines of a logfile, best-effort. Used to surface the
|
|
186
|
+
* real boot error when a start fails — operators shouldn't have to manually
|
|
187
|
+
* `tail` the log to learn *why* the daemon died. Returns [] on any read
|
|
188
|
+
* error (missing file, permissions) so the caller falls back to the generic
|
|
189
|
+
* "tail the log" hint without throwing.
|
|
190
|
+
*/
|
|
191
|
+
function readLogTail(logFile: string, n: number): string[] {
|
|
192
|
+
try {
|
|
193
|
+
const content = readFileSync(logFile, "utf8");
|
|
194
|
+
const trimmed = content.replace(/\n$/, "");
|
|
195
|
+
if (trimmed === "") return [];
|
|
196
|
+
return trimmed.split("\n").slice(-n);
|
|
197
|
+
} catch {
|
|
198
|
+
return [];
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Heuristic EADDRINUSE detector over a logfile tail. cloudflared, Bun, and
|
|
204
|
+
* Node all surface port collisions with recognizable phrases; we match the
|
|
205
|
+
* common ones rather than parse a structured error (there isn't one across
|
|
206
|
+
* runtimes). False positives are harmless — the worst case is we *also* print
|
|
207
|
+
* the port-in-use remedy on an unrelated failure, which is still actionable.
|
|
208
|
+
*/
|
|
209
|
+
function detectAddrInUse(logTail: readonly string[]): boolean {
|
|
210
|
+
return logTail.some((line) => /EADDRINUSE|address already in use|port .* in use/i.test(line));
|
|
211
|
+
}
|
|
212
|
+
|
|
132
213
|
export interface LifecycleOpts {
|
|
133
214
|
spawner?: Spawner;
|
|
134
215
|
kill?: KillFn;
|
|
@@ -160,6 +241,30 @@ export interface LifecycleOpts {
|
|
|
160
241
|
* settle.
|
|
161
242
|
*/
|
|
162
243
|
startSettleMs?: number;
|
|
244
|
+
/**
|
|
245
|
+
* Probe whether the service's port is listening, post-spawn. Pairs with the
|
|
246
|
+
* settle (hub#194) to catch the EADDRINUSE-orphan shape (hub#487): the
|
|
247
|
+
* process survives the liveness window (vault lingers / retries) but never
|
|
248
|
+
* binds because the port is already held, so `start` would otherwise report
|
|
249
|
+
* "✓ started" while `status` shows it inactive. Tests inject a stub;
|
|
250
|
+
* production uses `defaultPortListening` (a loopback TCP connect probe).
|
|
251
|
+
*/
|
|
252
|
+
portListening?: PortListeningFn;
|
|
253
|
+
/**
|
|
254
|
+
* How long `start` polls for the service to bind its port after the
|
|
255
|
+
* liveness settle passes. Default 4000ms in production — long enough to
|
|
256
|
+
* cover vault/scribe cold-boot (DB open, route registration) without making
|
|
257
|
+
* a healthy start feel laggy. Polled at `startReadyPollMs` intervals; the
|
|
258
|
+
* first time the port answers we declare success. If the window elapses
|
|
259
|
+
* with the process still alive but the port silent, we print a non-fatal
|
|
260
|
+
* warning (the daemon may still be coming up) rather than failing — only a
|
|
261
|
+
* *dead* process is a hard failure. Defaulting policy mirrors
|
|
262
|
+
* `startSettleMs`: 0 (skipped) unless `portListening` is injected or the
|
|
263
|
+
* production path (no spawner override) is active.
|
|
264
|
+
*/
|
|
265
|
+
startReadyMs?: number;
|
|
266
|
+
/** Poll interval while waiting for the port to come up. Default 200ms. */
|
|
267
|
+
startReadyPollMs?: number;
|
|
163
268
|
/**
|
|
164
269
|
* Override the hub origin passed to services as PARACHUTE_HUB_ORIGIN. If
|
|
165
270
|
* unset, `start` derives it from `expose-state.json` (when exposed) or
|
|
@@ -175,9 +280,36 @@ export interface LifecycleOpts {
|
|
|
175
280
|
* `ensureHubRunning` and `lifecycle.stop("hub")` dispatches to
|
|
176
281
|
* `stopHub`. Tests inject stubs to avoid spawning real bun processes.
|
|
177
282
|
*/
|
|
283
|
+
/**
|
|
284
|
+
* PATH-resolution seam for the start preflight (`@openparachute/depcheck`
|
|
285
|
+
* `ensureExecutable`). Production uses the real `Bun.which`; a missing
|
|
286
|
+
* startCmd binary then surfaces the friendly missing-dependency UX +
|
|
287
|
+
* persists it to services.json.
|
|
288
|
+
*
|
|
289
|
+
* Defaulting policy mirrors `startSettleMs`: when a stub `spawner` is
|
|
290
|
+
* injected (the test path) `which` defaults to a permissive resolver
|
|
291
|
+
* (`() => "<stub>"`) so existing stub-spawner tests don't trip the preflight
|
|
292
|
+
* against binaries that aren't on the test host's PATH (`parachute-vault`,
|
|
293
|
+
* `notes-serve`). Production (no spawner override) gets the real `Bun.which`.
|
|
294
|
+
* Tests that want to exercise the missing-binary branch inject `which`
|
|
295
|
+
* explicitly (e.g. `which: () => null`).
|
|
296
|
+
*/
|
|
297
|
+
which?: (cmd: string) => string | null;
|
|
178
298
|
hub?: {
|
|
179
299
|
ensureRunning?: (opts: EnsureHubOpts) => Promise<EnsureHubResult>;
|
|
180
300
|
stop?: (opts: StopHubOpts) => Promise<boolean>;
|
|
301
|
+
/**
|
|
302
|
+
* Self-heal the operator token's stale `iss` after `start hub` (hub#481).
|
|
303
|
+
* Production opens hub.db at `<configDir>/hub.db` and delegates to
|
|
304
|
+
* `selfHealOperatorTokenIssuer`. Tests inject a stub to assert the call
|
|
305
|
+
* happens — or to make it throw and prove a self-heal failure never fails
|
|
306
|
+
* `start hub`.
|
|
307
|
+
*/
|
|
308
|
+
selfHealOperatorToken?: (args: {
|
|
309
|
+
issuer: string;
|
|
310
|
+
configDir: string;
|
|
311
|
+
log: (line: string) => void;
|
|
312
|
+
}) => Promise<OperatorIssuerHealStatus>;
|
|
181
313
|
};
|
|
182
314
|
}
|
|
183
315
|
|
|
@@ -193,9 +325,42 @@ interface Resolved {
|
|
|
193
325
|
killWaitMs: number;
|
|
194
326
|
pollIntervalMs: number;
|
|
195
327
|
startSettleMs: number;
|
|
328
|
+
portListening: PortListeningFn;
|
|
329
|
+
startReadyMs: number;
|
|
330
|
+
startReadyPollMs: number;
|
|
331
|
+
which: (cmd: string) => string | null;
|
|
196
332
|
hubOrigin: string | undefined;
|
|
197
333
|
ensureHub: (opts: EnsureHubOpts) => Promise<EnsureHubResult>;
|
|
198
334
|
stopHubFn: (opts: StopHubOpts) => Promise<boolean>;
|
|
335
|
+
selfHealOperatorTokenFn: (args: {
|
|
336
|
+
issuer: string;
|
|
337
|
+
configDir: string;
|
|
338
|
+
log: (line: string) => void;
|
|
339
|
+
}) => Promise<OperatorIssuerHealStatus>;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Production self-heal: open hub.db at `<configDir>/hub.db`, run
|
|
344
|
+
* `selfHealOperatorTokenIssuer`, and close the db. Derives the db path the
|
|
345
|
+
* same way the rest of the repo does (`hubDbPath(configDir)`); `openHubDb`
|
|
346
|
+
* runs migrations + WAL on open, matching `commands/auth.ts`. Tests override
|
|
347
|
+
* this whole seam, so the db-open only happens on the production path.
|
|
348
|
+
*/
|
|
349
|
+
async function defaultSelfHealOperatorToken(args: {
|
|
350
|
+
issuer: string;
|
|
351
|
+
configDir: string;
|
|
352
|
+
log: (line: string) => void;
|
|
353
|
+
}): Promise<OperatorIssuerHealStatus> {
|
|
354
|
+
const db = openHubDb(hubDbPath(args.configDir));
|
|
355
|
+
try {
|
|
356
|
+
return await selfHealOperatorTokenIssuer(db, {
|
|
357
|
+
issuer: args.issuer,
|
|
358
|
+
configDir: args.configDir,
|
|
359
|
+
log: args.log,
|
|
360
|
+
});
|
|
361
|
+
} finally {
|
|
362
|
+
db.close();
|
|
363
|
+
}
|
|
199
364
|
}
|
|
200
365
|
|
|
201
366
|
function resolve(opts: LifecycleOpts): Resolved {
|
|
@@ -219,9 +384,26 @@ function resolve(opts: LifecycleOpts): Resolved {
|
|
|
219
384
|
// override `alive`, which re-enables the default 250ms.
|
|
220
385
|
startSettleMs:
|
|
221
386
|
opts.startSettleMs ?? (opts.spawner === undefined || opts.alive !== undefined ? 250 : 0),
|
|
387
|
+
portListening: opts.portListening ?? defaultPortListening,
|
|
388
|
+
// Same defaulting policy as startSettleMs: production (no spawner
|
|
389
|
+
// override) gets the real 4s readiness window; tests that inject a stub
|
|
390
|
+
// spawner get 0 (skipped) unless they explicitly opt in via
|
|
391
|
+
// `portListening` or `startReadyMs`, so existing stub-spawner tests don't
|
|
392
|
+
// start probing a fake port.
|
|
393
|
+
startReadyMs:
|
|
394
|
+
opts.startReadyMs ??
|
|
395
|
+
(opts.spawner === undefined || opts.portListening !== undefined ? 4000 : 0),
|
|
396
|
+
startReadyPollMs: opts.startReadyPollMs ?? 200,
|
|
397
|
+
// Same defaulting policy as startSettleMs/startReadyMs: production (no
|
|
398
|
+
// spawner override) preflights with the real Bun.which; stub-spawner tests
|
|
399
|
+
// get a permissive resolver so the preflight doesn't trip against binaries
|
|
400
|
+
// that aren't on the test host's PATH. Explicit `which` always wins.
|
|
401
|
+
which:
|
|
402
|
+
opts.which ?? (opts.spawner === undefined ? Bun.which : () => "/stub/bin/preflight-skipped"),
|
|
222
403
|
hubOrigin: resolveHubOrigin(opts.hubOrigin, configDir),
|
|
223
404
|
ensureHub: opts.hub?.ensureRunning ?? ensureHubRunning,
|
|
224
405
|
stopHubFn: opts.hub?.stop ?? stopHub,
|
|
406
|
+
selfHealOperatorTokenFn: opts.hub?.selfHealOperatorToken ?? defaultSelfHealOperatorToken,
|
|
225
407
|
};
|
|
226
408
|
}
|
|
227
409
|
|
|
@@ -452,42 +634,185 @@ export async function start(svc: string | undefined, opts: LifecycleOpts = {}):
|
|
|
452
634
|
if (entry.installDir) spawnerOpts.cwd = entry.installDir;
|
|
453
635
|
const passOpts =
|
|
454
636
|
spawnerOpts.env !== undefined || spawnerOpts.cwd !== undefined ? spawnerOpts : undefined;
|
|
637
|
+
|
|
638
|
+
// Pre-flight the startCmd binary (`@openparachute/depcheck`) so a missing
|
|
639
|
+
// executable surfaces the friendly install UX inline AND is persisted onto
|
|
640
|
+
// the services.json row, so a *later* `parachute status` (a separate
|
|
641
|
+
// invocation that only reads the manifest) + the SPA modules pane show
|
|
642
|
+
// "vault: failed to start — parachute-vault not installed" with install
|
|
643
|
+
// info, rather than a bare "failed"/orphan-timeout. The binary is `cmd[0]`
|
|
644
|
+
// (e.g. `parachute-vault` for an npm install, `bun` for a bun-linked one).
|
|
645
|
+
const startBinary = cmd[0];
|
|
646
|
+
if (startBinary) {
|
|
647
|
+
try {
|
|
648
|
+
ensureExecutable(startBinary, { which: r.which });
|
|
649
|
+
} catch (err) {
|
|
650
|
+
if (err instanceof MissingDependencyError) {
|
|
651
|
+
failures++;
|
|
652
|
+
r.log(`✗ ${short} failed to start:`);
|
|
653
|
+
for (const line of err.message.split("\n")) r.log(` ${line}`);
|
|
654
|
+
recordStartError(entry.name, err.toWire(), r.manifestPath);
|
|
655
|
+
continue;
|
|
656
|
+
}
|
|
657
|
+
throw err;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
455
661
|
let pid: number;
|
|
456
662
|
try {
|
|
457
663
|
pid = r.spawner.spawn(cmd, logFile, passOpts);
|
|
458
664
|
} catch (err) {
|
|
665
|
+
// Belt-and-suspenders: a missing binary that slipped past the pre-flight
|
|
666
|
+
// (race) still becomes a MissingDependencyError via rethrowIfMissing.
|
|
667
|
+
if (startBinary) {
|
|
668
|
+
try {
|
|
669
|
+
rethrowIfMissing(err, startBinary);
|
|
670
|
+
} catch (missing) {
|
|
671
|
+
if (missing instanceof MissingDependencyError) {
|
|
672
|
+
failures++;
|
|
673
|
+
r.log(`✗ ${short} failed to start:`);
|
|
674
|
+
for (const line of missing.message.split("\n")) r.log(` ${line}`);
|
|
675
|
+
recordStartError(entry.name, missing.toWire(), r.manifestPath);
|
|
676
|
+
continue;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
459
680
|
failures++;
|
|
460
681
|
const msg = err instanceof Error ? err.message : String(err);
|
|
461
682
|
r.log(`✗ ${short} failed to start: ${msg}`);
|
|
462
683
|
continue;
|
|
463
684
|
}
|
|
685
|
+
// A successful spawn clears any stale start-error recorded from a prior
|
|
686
|
+
// missing-dependency failure so `parachute status` doesn't keep showing it.
|
|
687
|
+
clearStartError(entry.name, r.manifestPath);
|
|
464
688
|
writePid(short, pid, r.configDir);
|
|
465
689
|
|
|
466
|
-
//
|
|
467
|
-
//
|
|
468
|
-
//
|
|
469
|
-
//
|
|
470
|
-
//
|
|
471
|
-
//
|
|
690
|
+
// Boot-readiness gating (hub#194 + hub#487). A spawn returning a pid only
|
|
691
|
+
// proves the kernel forked the process — it says nothing about whether the
|
|
692
|
+
// service survived its boot or bound its port. Two silent-start shapes:
|
|
693
|
+
//
|
|
694
|
+
// (1) spawn-then-immediately-die (hub#194): the child throws before
|
|
695
|
+
// listening (notes-serve's Bun.resolveSync failing for bun-linked
|
|
696
|
+
// installs) and exits microseconds later. Caught by the settle below.
|
|
697
|
+
//
|
|
698
|
+
// (2) alive-but-never-bound (hub#487): the port is already held by an
|
|
699
|
+
// orphan, the child hits EADDRINUSE, but its process *lingers* (or a
|
|
700
|
+
// supervisor retries) long enough to clear the liveness check. `start`
|
|
701
|
+
// would report "✓ started" while `parachute status` shows it inactive
|
|
702
|
+
// because nothing answers on the port. Aaron hit exactly this with an
|
|
703
|
+
// orphan holding vault's 1940 on a fresh EC2 box. Caught by the
|
|
704
|
+
// port-readiness poll below.
|
|
705
|
+
//
|
|
706
|
+
// On any failure we surface the tail of the logfile so the operator sees
|
|
707
|
+
// the real boot error inline, and we specifically call out EADDRINUSE with
|
|
708
|
+
// the `lsof -ti:<port>` remedy.
|
|
709
|
+
const reportStartFailure = (reason: string): void => {
|
|
710
|
+
clearPid(short, r.configDir);
|
|
711
|
+
failures++;
|
|
712
|
+
const tail = readLogTail(logFile, 20);
|
|
713
|
+
if (detectAddrInUse(tail)) {
|
|
714
|
+
r.log(
|
|
715
|
+
`✗ ${short} failed to start: port ${entry.port} is already in use. Stop the existing process first — find it with \`lsof -ti:${entry.port}\` (then \`kill <pid>\`), or run \`parachute restart ${short}\`.`,
|
|
716
|
+
);
|
|
717
|
+
} else {
|
|
718
|
+
r.log(`✗ ${short} failed to start: ${reason}`);
|
|
719
|
+
}
|
|
720
|
+
if (tail.length > 0) {
|
|
721
|
+
r.log(` ── last ${tail.length} log line(s) (${logFile}) ──`);
|
|
722
|
+
for (const line of tail) r.log(` │ ${line}`);
|
|
723
|
+
} else {
|
|
724
|
+
r.log(` Tail the log for details: tail -50 ${logFile}`);
|
|
725
|
+
}
|
|
726
|
+
};
|
|
727
|
+
|
|
472
728
|
if (r.startSettleMs > 0) {
|
|
473
729
|
await r.sleep(r.startSettleMs);
|
|
474
730
|
if (!r.alive(pid)) {
|
|
475
|
-
|
|
476
|
-
|
|
731
|
+
reportStartFailure(
|
|
732
|
+
`spawned pid ${pid} but the process exited within ${r.startSettleMs}ms.`,
|
|
733
|
+
);
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
// Port-readiness poll (hub#487). The process is alive; now confirm it
|
|
739
|
+
// actually bound its port before claiming success. Poll up to
|
|
740
|
+
// `startReadyMs`, re-checking liveness each iteration so a *later* death
|
|
741
|
+
// (e.g. a slow EADDRINUSE crash) is still reported as a failure. A process
|
|
742
|
+
// that stays alive but never binds within the window gets a non-fatal
|
|
743
|
+
// warning rather than a hard failure — some daemons legitimately do slow
|
|
744
|
+
// boot work, and we'd rather not flip a healthy-but-slow start to red.
|
|
745
|
+
if (r.startReadyMs > 0) {
|
|
746
|
+
const deadline = r.now() + r.startReadyMs;
|
|
747
|
+
let listening = false;
|
|
748
|
+
let died = false;
|
|
749
|
+
while (r.now() < deadline) {
|
|
750
|
+
if (!r.alive(pid)) {
|
|
751
|
+
died = true;
|
|
752
|
+
break;
|
|
753
|
+
}
|
|
754
|
+
if (await r.portListening(entry.port)) {
|
|
755
|
+
listening = true;
|
|
756
|
+
break;
|
|
757
|
+
}
|
|
758
|
+
await r.sleep(r.startReadyPollMs);
|
|
759
|
+
}
|
|
760
|
+
if (died) {
|
|
761
|
+
reportStartFailure(`spawned pid ${pid} but the process exited during startup.`);
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
if (!listening) {
|
|
765
|
+
// Last-chance liveness check — the loop may have exited on the
|
|
766
|
+
// deadline right as the process died.
|
|
767
|
+
if (!r.alive(pid)) {
|
|
768
|
+
reportStartFailure(`spawned pid ${pid} but the process exited during startup.`);
|
|
769
|
+
continue;
|
|
770
|
+
}
|
|
477
771
|
r.log(
|
|
478
|
-
|
|
772
|
+
`⚠ ${short} started (pid ${pid}) but port ${entry.port} isn't accepting connections yet after ${r.startReadyMs}ms.`,
|
|
479
773
|
);
|
|
480
|
-
r.log(
|
|
774
|
+
r.log(
|
|
775
|
+
` It may still be coming up — check \`parachute status\` and \`parachute logs ${short}\`.`,
|
|
776
|
+
);
|
|
777
|
+
if (r.hubOrigin) r.log(` ${HUB_ORIGIN_ENV}=${r.hubOrigin}`);
|
|
778
|
+
if (short === "vault") persistVaultHubOriginForStart(r);
|
|
481
779
|
continue;
|
|
482
780
|
}
|
|
483
781
|
}
|
|
484
782
|
|
|
485
783
|
r.log(`✓ ${short} started (pid ${pid}); logs: ${logFile}`);
|
|
486
784
|
if (r.hubOrigin) r.log(` ${HUB_ORIGIN_ENV}=${r.hubOrigin}`);
|
|
785
|
+
if (short === "vault") persistVaultHubOriginForStart(r);
|
|
487
786
|
}
|
|
488
787
|
return failures === 0 ? 0 : 1;
|
|
489
788
|
}
|
|
490
789
|
|
|
790
|
+
/**
|
|
791
|
+
* Durable-persist vault's `PARACHUTE_HUB_ORIGIN` on a vault `start`. Two cases,
|
|
792
|
+
* in order:
|
|
793
|
+
*
|
|
794
|
+
* 1. The resolved spawn origin (`r.hubOrigin`) is a real public origin — write
|
|
795
|
+
* it. This is the long-standing happy path: an exposure is live, the
|
|
796
|
+
* launchd / systemd daemon (which boots vault out-of-band and never sees
|
|
797
|
+
* this spawn env) needs it in `.env` to validate hub-minted JWTs' `iss`.
|
|
798
|
+
* `persistVaultHubOrigin` skips loopback / unchanged values itself.
|
|
799
|
+
*
|
|
800
|
+
* 2. Self-heal: even when `r.hubOrigin` resolved to loopback or undefined
|
|
801
|
+
* (e.g. the hub.port file outran the expose-state read, or this is a bare
|
|
802
|
+
* `restart vault` on a deploy whose `.env` was never written), consult
|
|
803
|
+
* `expose-state.json` directly. If it advertises a public origin and
|
|
804
|
+
* vault's persisted value is unset / loopback, write the public origin.
|
|
805
|
+
* This is what lets an EXISTING broken Cloudflare deploy self-correct on
|
|
806
|
+
* the next `parachute restart vault`, not only fresh exposes.
|
|
807
|
+
*
|
|
808
|
+
* Case 1 covers the override / freshly-resolved path; case 2 catches the gap
|
|
809
|
+
* the Cloudflare 401 P0 fell through. See `vault-hub-origin-env.ts`.
|
|
810
|
+
*/
|
|
811
|
+
function persistVaultHubOriginForStart(r: Resolved): void {
|
|
812
|
+
if (r.hubOrigin) persistVaultHubOrigin(r.configDir, r.hubOrigin, r.log);
|
|
813
|
+
selfHealVaultHubOrigin(r.configDir, r.log, join(r.configDir, "expose-state.json"));
|
|
814
|
+
}
|
|
815
|
+
|
|
491
816
|
export async function stop(svc: string | undefined, opts: LifecycleOpts = {}): Promise<number> {
|
|
492
817
|
const r = resolve(opts);
|
|
493
818
|
if (svc === HUB_SVC) return stopHubSvc(r);
|
|
@@ -567,6 +892,12 @@ async function startHubSvc(r: Resolved): Promise<number> {
|
|
|
567
892
|
} else {
|
|
568
893
|
r.log(`hub already running (pid ${result.pid}) on port ${result.port}.`);
|
|
569
894
|
}
|
|
895
|
+
// Self-heal a stale operator-token issuer (hub#481). Runs whether the hub
|
|
896
|
+
// was freshly started OR already running — a token stamped at loopback
|
|
897
|
+
// before exposure must heal even when the hub is already up. The loopback /
|
|
898
|
+
// provenance guards live inside `selfHealOperatorTokenIssuer`, so the only
|
|
899
|
+
// gate here is "is there a real issuer to heal toward?".
|
|
900
|
+
await selfHealOperatorTokenOnStart(r);
|
|
570
901
|
return 0;
|
|
571
902
|
} catch (err) {
|
|
572
903
|
r.log(`✗ hub failed to start: ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -574,6 +905,36 @@ async function startHubSvc(r: Resolved): Promise<number> {
|
|
|
574
905
|
}
|
|
575
906
|
}
|
|
576
907
|
|
|
908
|
+
/**
|
|
909
|
+
* Re-issue the operator token under the hub's current origin when its `iss`
|
|
910
|
+
* went stale after an init-at-loopback → expose transition (hub#481). Mirrors
|
|
911
|
+
* `persistVaultHubOriginForStart`'s quiet style: emit a single line only when
|
|
912
|
+
* a rotation actually happens; stay silent for fresh / absent / skipped.
|
|
913
|
+
*
|
|
914
|
+
* The ENTIRE self-heal is wrapped here so it can NEVER block or fail
|
|
915
|
+
* `start hub` — a db-open error, a corrupt token, anything — degrades to a
|
|
916
|
+
* brief warning and `start hub` still returns 0.
|
|
917
|
+
*/
|
|
918
|
+
async function selfHealOperatorTokenOnStart(r: Resolved): Promise<void> {
|
|
919
|
+
if (!r.hubOrigin) return;
|
|
920
|
+
try {
|
|
921
|
+
const status = await r.selfHealOperatorTokenFn({
|
|
922
|
+
issuer: r.hubOrigin,
|
|
923
|
+
configDir: r.configDir,
|
|
924
|
+
log: r.log,
|
|
925
|
+
});
|
|
926
|
+
if (status.kind === "rotated") {
|
|
927
|
+
r.log(` refreshed operator.token issuer → ${r.hubOrigin} (was stale after exposure)`);
|
|
928
|
+
}
|
|
929
|
+
} catch (err) {
|
|
930
|
+
r.log(
|
|
931
|
+
` note: operator.token issuer self-heal skipped (${
|
|
932
|
+
err instanceof Error ? err.message : String(err)
|
|
933
|
+
})`,
|
|
934
|
+
);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
577
938
|
/**
|
|
578
939
|
* Stop the internal hub. `stopHub` returns false when nothing was running
|
|
579
940
|
* (no pidfile, or stale pidfile cleared) — that's a clean no-op for the
|
|
@@ -659,11 +1020,19 @@ export async function logs(svc: string, opts: LogsOpts = {}): Promise<number> {
|
|
|
659
1020
|
spawn(cmd) {
|
|
660
1021
|
// Inherit env so `tail` sees PATH, etc. Bun.spawn defaults to empty
|
|
661
1022
|
// env — see api-modules-ops.ts:defaultRun.
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
1023
|
+
try {
|
|
1024
|
+
const proc = Bun.spawn([...cmd], {
|
|
1025
|
+
stdio: ["ignore", "inherit", "inherit"],
|
|
1026
|
+
env: process.env,
|
|
1027
|
+
});
|
|
1028
|
+
return proc.pid;
|
|
1029
|
+
} catch (err) {
|
|
1030
|
+
// A missing `tail` (minimal container without coreutils) surfaces
|
|
1031
|
+
// the friendly install UX instead of a raw spawn throw. The CLI
|
|
1032
|
+
// top-level catch in cli.ts renders the MissingDependencyError.
|
|
1033
|
+
rethrowIfMissing(err, "tail");
|
|
1034
|
+
throw err;
|
|
1035
|
+
}
|
|
667
1036
|
},
|
|
668
1037
|
};
|
|
669
1038
|
spawner.spawn(["tail", "-n", String(lines), "-f", path], path);
|