@openparachute/hub 0.6.2 → 0.6.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,17 +33,18 @@
33
33
  */
34
34
 
35
35
  import { spawnSync } from "node:child_process";
36
+ import { join } from "node:path";
37
+ import { fileURLToPath } from "node:url";
36
38
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
37
39
  import { type ExposeState, readExposeState } from "../expose-state.ts";
38
- import {
39
- type EnsureHubOpts,
40
- HUB_DEFAULT_PORT,
41
- HUB_SVC,
42
- ensureHubRunning,
43
- readHubPort,
44
- } from "../hub-control.ts";
40
+ import { type EnsureHubOpts, HUB_DEFAULT_PORT, HUB_SVC, readHubPort } from "../hub-control.ts";
41
+ import { hubDbPath, openHubDb } from "../hub-db.ts";
42
+ import { deriveHubOrigin } from "../hub-origin.ts";
43
+ import { ensureHubUnit, installAndStartHubUnit } from "../hub-unit.ts";
44
+ import { issueOperatorToken, readOperatorTokenFile } from "../operator-token.ts";
45
45
  import { type AliveFn, defaultAlive, processState } from "../process-state.ts";
46
46
  import { findService, readManifestLenient } from "../services-manifest.ts";
47
+ import { listUsers } from "../users.ts";
47
48
  import { type InstallOpts, install as defaultInstall } from "./install.ts";
48
49
 
49
50
  /** The three options the exposure prompt offers — also the `--expose` flag's domain. */
@@ -52,6 +53,17 @@ export type ExposeChoice = "none" | "tailnet" | "cloudflare";
52
53
  /** Where to continue setup after init finishes. CLI walks prompts in the terminal; browser opens /admin/setup. */
53
54
  export type WizardChoice = "browser" | "cli";
54
55
 
56
+ /**
57
+ * Outcome of the post-bringup operator-token guarantee (design §3.1):
58
+ * - `minted` — no token on disk + a hub user existed → minted + wrote one.
59
+ * - `present` — a token already existed on disk → left it alone.
60
+ * - `no-user` — no token + no hub user yet (fresh box pre-wizard); the
61
+ * wizard's account step will mint it. NOT an error.
62
+ * - `mint-failed`— a mint was attempted but failed (DB unavailable, etc.);
63
+ * non-fatal — the wizard / `auth rotate-operator` can retry.
64
+ */
65
+ export type OperatorTokenGuaranteeStatus = "minted" | "present" | "no-user" | "mint-failed";
66
+
55
67
  export interface InitOpts {
56
68
  configDir?: string;
57
69
  manifestPath?: string;
@@ -59,10 +71,28 @@ export interface InitOpts {
59
71
  /** Test seam: `processState` liveness check. */
60
72
  alive?: AliveFn;
61
73
  /**
62
- * Test seam: `ensureHubRunning` shim. Production uses the real one;
63
- * tests pass a stub that records calls without spawning.
74
+ * Hub-bringup shim. Phase 3a cutover: production now INSTALLS + STARTS the
75
+ * hub *unit* (launchd on Mac, systemd on Linux) via `installAndStartHubUnit`
76
+ * and waits for readiness — it no longer spawns a detached `bun hub-server.ts`
77
+ * (`defaultEnsureHubViaUnit`). The return shape (`{ pid, port, started }`) is
78
+ * preserved so the downstream init flow (URL resolution, wizard hand-off) is
79
+ * unchanged; `pid` is `0` on the unit path (a unit-managed hub has no
80
+ * pidfile). Tests pass a stub that records the call without touching the OS.
81
+ * Design §3.3 (init row), §4.1/§4.2, appendix (c).
64
82
  */
65
83
  ensureHub?: (opts: EnsureHubOpts) => Promise<{ pid: number; port: number; started: boolean }>;
84
+ /**
85
+ * Test seam: guarantee an operator token exists once the hub is up (design
86
+ * §3.1 / §3.3). Production reads `operator.token`; if absent AND a hub user
87
+ * already exists, it mints + writes one so a later per-module verb never
88
+ * 401s. Returns a short status so init can log what happened. Tests stub it
89
+ * to assert the mint-when-absent / skip-when-present behavior without a DB.
90
+ */
91
+ guaranteeOperatorToken?: (ctx: {
92
+ configDir: string;
93
+ hubPort: number;
94
+ log: (line: string) => void;
95
+ }) => Promise<OperatorTokenGuaranteeStatus>;
66
96
  /** Test seam: expose-state reader. */
67
97
  readExposeStateFn?: () => ExposeState | undefined;
68
98
  /** Test seam: TTY check (production reads `process.stdin.isTTY`). */
@@ -244,6 +274,161 @@ async function defaultExposeCloudflare(): Promise<number> {
244
274
  return await exposePublicInteractive({ preselect: "cloudflare" });
245
275
  }
246
276
 
277
+ /**
278
+ * Absolute path to this hub checkout's `src/cli.ts` — the entry the hub unit's
279
+ * `ExecStart`/`ProgramArguments` runs `serve` against. Resolved from
280
+ * `import.meta.url` (this file is `src/commands/init.ts`, so `cli.ts` is one
281
+ * directory up). On the bun-linked dev path this points into the checkout; on
282
+ * an npm install it points into the installed package — either way the unit
283
+ * runs the same on-disk entry the operator is invoking right now.
284
+ */
285
+ function defaultHubCliPath(): string {
286
+ return fileURLToPath(new URL("../cli.ts", import.meta.url));
287
+ }
288
+
289
+ /**
290
+ * Production hub-bringup for the Phase 3a cutover (design §3.3 init row,
291
+ * appendix c). REPLACES the detached `ensureHubRunning` spawn:
292
+ *
293
+ * 1. Probe the loopback hub. If it already answers, return started:false
294
+ * WITHOUT touching the unit (init is idempotent — a re-run against a live
295
+ * hub shouldn't reinstall/restart it).
296
+ * 2. Otherwise INSTALL + START the hub unit via `installAndStartHubUnit`:
297
+ * `buildHubManagedUnit` captures the operator's CURRENT `PARACHUTE_HOME`
298
+ * (§4.2 — derived from the resolved `configDir`, not the hard-coded
299
+ * default), resolves abs bun + the abs cli.ts entry, launchd-by-default on
300
+ * Mac (D2) / systemd-system-if-root-else-user+linger on Linux. Then waits
301
+ * for hub readiness, surfacing the unit log on timeout (§3.2 step 5).
302
+ * 3. On a host with NO service manager (container / init-less), throw an
303
+ * actionable error — the container runtime CMD is `serve`, not `init`
304
+ * (§3.2 step 4). NEVER fall back to a detached spawn.
305
+ *
306
+ * Returns the `{ pid, port, started }` shape init's downstream flow expects;
307
+ * `pid` is `0` because a unit-managed hub has no pidfile (the platform manager
308
+ * owns the process).
309
+ */
310
+ async function defaultEnsureHubViaUnit(opts: EnsureHubOpts): Promise<{
311
+ pid: number;
312
+ port: number;
313
+ started: boolean;
314
+ }> {
315
+ const configDir = opts.configDir ?? CONFIG_DIR;
316
+ const port = opts.startPort ?? HUB_DEFAULT_PORT;
317
+ const log = opts.log ?? (() => {});
318
+
319
+ // First try the lighter ensure-path (§3.2): probe /health → if up, done with
320
+ // no install; if a unit is already installed but down, just start it. This
321
+ // keeps a re-run of `init` idempotent — it won't pointlessly rewrite the unit
322
+ // file when the hub is already answering or the unit already exists.
323
+ const ensured = await ensureHubUnit({ port, log });
324
+ if (ensured.outcome === "already-up") {
325
+ return { pid: 0, port: ensured.port, started: false };
326
+ }
327
+ if (ensured.outcome === "started") {
328
+ return { pid: 0, port: ensured.port, started: true };
329
+ }
330
+ if (ensured.outcome === "no-manager") {
331
+ // Container / init-less host — can't host a unit. Foreground `serve` is the
332
+ // runtime here, not `init` (§3.2 step 4). Surface + bail; never spawn.
333
+ throw new Error(ensured.messages.join("\n"));
334
+ }
335
+ // `no-unit` (the fresh-box case init exists to handle) → INSTALL + start the
336
+ // unit, then wait for readiness (§3.3 init row, §4.1/§4.2). `start-failed` /
337
+ // `timeout` from the start-existing-unit path also fall through to a clean
338
+ // (re)install attempt here — overwriting the unit file is idempotent.
339
+ const result = await installAndStartHubUnit({
340
+ // Capture the operator's CURRENT PARACHUTE_HOME (the resolved configDir),
341
+ // NOT the hard-coded default (§4.2).
342
+ parachuteHome: configDir,
343
+ cliPath: defaultHubCliPath(),
344
+ port,
345
+ log,
346
+ });
347
+
348
+ if (result.outcome === "started") {
349
+ return { pid: 0, port: result.port, started: true };
350
+ }
351
+ // NB: `installAndStartHubUnit` never returns `already-up` — only the lighter
352
+ // `ensureHubUnit` probe (handled above) reports already-up. The "hub already
353
+ // running, started:false" signal is therefore produced solely by the
354
+ // `ensureHubUnit` arm above; reaching here means we genuinely tried to
355
+ // install + start the unit.
356
+ // no-manager / timeout / start-failed → actionable error. The init caller
357
+ // catches this and prints the message + `parachute logs hub` hint.
358
+ throw new Error(result.messages.join("\n") || `hub unit bringup failed (${result.outcome}).`);
359
+ }
360
+
361
+ /**
362
+ * Resolve the issuer to mint the operator token under. At init time the hub is
363
+ * reachable on loopback (just installed); prefer the live expose-state origin
364
+ * (rare during init, but honored if a prior `expose` ran), else the loopback
365
+ * origin. Mirrors `commands/auth.ts`'s `resolveHubIssuer` so a token minted at
366
+ * init validates the same way one minted by `auth rotate-operator` would.
367
+ */
368
+ function resolveInitIssuer(configDir: string, hubPort: number): string {
369
+ const state = readExposeState(join(configDir, "expose-state.json"));
370
+ if (state?.hubOrigin) return state.hubOrigin;
371
+ return (
372
+ deriveHubOrigin({ exposeFqdn: state?.canonicalFqdn, hubPort }) ?? `http://127.0.0.1:${hubPort}`
373
+ );
374
+ }
375
+
376
+ /**
377
+ * Production operator-token guarantee (design §3.1 / §3.3). Under the unified
378
+ * model every per-module verb is an authenticated module-ops call, so the
379
+ * steady-state operator needs an `operator.token` on disk. init guarantees it:
380
+ *
381
+ * - Token already on disk → leave it (`present`). The hub remains the sole
382
+ * minter; we never mint-in-parallel (§3.1).
383
+ * - No token + a hub user already exists → mint under the default (`admin`)
384
+ * scope-set + write it 0600 (`minted`).
385
+ * - No token + no hub user yet (the common fresh-box case — init runs BEFORE
386
+ * the wizard creates first-admin) → `no-user`. NOT an error. Note the
387
+ * wizard's account step does NOT write this on-disk token — it mints an
388
+ * in-DB single-use *display* token (deleted once the done-screen reads it).
389
+ * Today the on-disk `operator.token` is written only by `parachute auth
390
+ * set-password` / `auth rotate-operator`, so a fresh box that finishes the
391
+ * wizard without running either still has no on-disk token. Phase 3b closes
392
+ * this gap: the per-module verbs that require the operator token land there
393
+ * and carry the fresh-box mint with them.
394
+ *
395
+ * Failures are non-fatal (`mint-failed`): a DB hiccup shouldn't block init when
396
+ * `auth rotate-operator` can retry.
397
+ */
398
+ async function defaultGuaranteeOperatorToken(ctx: {
399
+ configDir: string;
400
+ hubPort: number;
401
+ log: (line: string) => void;
402
+ }): Promise<OperatorTokenGuaranteeStatus> {
403
+ const existing = await readOperatorTokenFile(ctx.configDir);
404
+ if (existing) return "present";
405
+
406
+ const db = openHubDb(hubDbPath(ctx.configDir));
407
+ try {
408
+ const owner = listUsers(db)[0];
409
+ if (!owner) {
410
+ // Fresh box: no first-admin yet. The wizard mints the token when it
411
+ // creates the admin. Nothing to do here, and definitely not an error.
412
+ return "no-user";
413
+ }
414
+ const issued = await issueOperatorToken(db, owner.id, {
415
+ dir: ctx.configDir,
416
+ issuer: resolveInitIssuer(ctx.configDir, ctx.hubPort),
417
+ });
418
+ ctx.log(`✓ Operator token written to ${issued.path} (mode 0600).`);
419
+ return "minted";
420
+ } catch (err) {
421
+ ctx.log(
422
+ `⚠ Couldn't mint an operator token (${
423
+ err instanceof Error ? err.message : String(err)
424
+ }); run \`parachute auth rotate-operator\` later if a CLI command reports a missing token.`,
425
+ );
426
+ return "mint-failed";
427
+ } finally {
428
+ db.close();
429
+ }
430
+ }
431
+
247
432
  /**
248
433
  * Default impl for the vault-module install step (hub#168 Cut 1). Calls
249
434
  * install("vault", { noCreate: true, noStart: true, …}) with a quiet log
@@ -347,7 +532,11 @@ export async function init(opts: InitOpts = {}): Promise<number> {
347
532
  const manifestPath = opts.manifestPath ?? SERVICES_MANIFEST_PATH;
348
533
  const log = opts.log ?? ((line) => console.log(line));
349
534
  const alive = opts.alive ?? defaultAlive;
350
- const ensureHub = opts.ensureHub ?? ensureHubRunning;
535
+ // Phase 3a cutover: production installs + starts the hub UNIT (not a detached
536
+ // spawn). The `ensureHub` seam is preserved for tests (and the return shape is
537
+ // unchanged); only the production default flipped.
538
+ const ensureHub = opts.ensureHub ?? defaultEnsureHubViaUnit;
539
+ const guaranteeOperatorToken = opts.guaranteeOperatorToken ?? defaultGuaranteeOperatorToken;
351
540
  const readExposeStateFn = opts.readExposeStateFn ?? (() => readExposeState());
352
541
  const isTty = opts.isTty ?? Boolean(process.stdin.isTTY && process.stdout.isTTY);
353
542
  const prompt = opts.prompt ?? defaultPrompt;
@@ -363,17 +552,32 @@ export async function init(opts: InitOpts = {}): Promise<number> {
363
552
  log("");
364
553
 
365
554
  // Step 1: hub running?
555
+ // NB: under the Phase 3a unit-managed hub there is no pidfile, so
556
+ // `processState(HUB_SVC)` reports not-running on EVERY init re-run even when
557
+ // the hub is live. We therefore don't decide the "already running" message
558
+ // from `processState` here — `ensureHub` probes `/health` and reports the
559
+ // truth via `result.started` (false ⇒ already up, true ⇒ we started it). Only
560
+ // when `processState` finds a real (legacy detached) pidfile do we report the
561
+ // pid directly without a bringup call.
366
562
  const hubState = processState(HUB_SVC, configDir, alive);
367
563
  let hubPort: number | undefined;
368
564
  if (hubState.status === "running") {
369
565
  hubPort = readHubPort(configDir);
370
566
  log(`✓ Hub already running (pid ${hubState.pid}${hubPort ? `, port ${hubPort}` : ""}).`);
371
567
  } else {
372
- log("Hub not running — starting it now…");
373
568
  try {
374
569
  const result = await ensureHub({ configDir, log: () => {} });
375
570
  hubPort = result.port;
376
- log(`✓ Hub started (pid ${result.pid}, port ${result.port}).`);
571
+ if (result.started) {
572
+ // Genuinely installed/started the unit. A unit-managed hub has no
573
+ // meaningful CLI-visible pid, so report only the port (no misleading
574
+ // `pid 0` sentinel).
575
+ log(`✓ Hub unit started (port ${result.port}).`);
576
+ } else {
577
+ // The hub was already answering `/health` — `ensureHub` touched
578
+ // nothing. Honest re-run messaging: no "starting it now", no `pid 0`.
579
+ log(`✓ Hub already running (port ${result.port}).`);
580
+ }
377
581
  } catch (err) {
378
582
  log(`✗ Hub failed to start: ${err instanceof Error ? err.message : String(err)}`);
379
583
  log("");
@@ -389,6 +593,15 @@ export async function init(opts: InitOpts = {}): Promise<number> {
389
593
  // overridden, so the fallback is almost always correct.
390
594
  if (hubPort === undefined) hubPort = HUB_DEFAULT_PORT;
391
595
 
596
+ // Step 1.5: guarantee an operator token exists (design §3.1 / §3.3). Under
597
+ // the unified model every per-module verb is an authenticated module-ops
598
+ // call, so the steady-state operator needs an `operator.token` on disk — the
599
+ // mint-on-init guarantee closes the bootstrap so a later verb never 401s.
600
+ // On a fresh box (no first-admin yet) this is a no-op (`no-user`): the wizard
601
+ // mints it when it creates the admin. Non-fatal either way — init continues
602
+ // to the wizard regardless.
603
+ await guaranteeOperatorToken({ configDir, hubPort, log });
604
+
392
605
  // Step 2: exposure chain. Skipped when already exposed, in non-TTY,
393
606
  // or when --no-expose-prompt was passed. `--expose <choice>` jumps
394
607
  // straight to the corresponding chain without asking.