@openparachute/hub 0.5.14-rc.9 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +23 -0
  2. package/package.json +7 -3
  3. package/src/__tests__/account-home-ui.test.ts +251 -15
  4. package/src/__tests__/account-vault-token.test.ts +355 -0
  5. package/src/__tests__/admin-vaults.test.ts +70 -4
  6. package/src/__tests__/api-mint-token.test.ts +30 -21
  7. package/src/__tests__/api-modules-ops.test.ts +45 -0
  8. package/src/__tests__/api-users.test.ts +7 -2
  9. package/src/__tests__/auth.test.ts +157 -30
  10. package/src/__tests__/cli.test.ts +44 -5
  11. package/src/__tests__/expose-2fa-warning.test.ts +31 -17
  12. package/src/__tests__/expose-auth-preflight.test.ts +71 -72
  13. package/src/__tests__/expose-cloudflare.test.ts +482 -14
  14. package/src/__tests__/expose.test.ts +52 -2
  15. package/src/__tests__/hub-server.test.ts +97 -0
  16. package/src/__tests__/hub.test.ts +85 -6
  17. package/src/__tests__/init.test.ts +102 -1
  18. package/src/__tests__/lifecycle.test.ts +464 -2
  19. package/src/__tests__/oauth-handlers.test.ts +1252 -83
  20. package/src/__tests__/oauth-ui.test.ts +12 -1
  21. package/src/__tests__/operator-token-issuer-self-heal.test.ts +412 -0
  22. package/src/__tests__/resource-binding.test.ts +97 -0
  23. package/src/__tests__/scope-explanations.test.ts +41 -12
  24. package/src/__tests__/services-manifest.test.ts +122 -4
  25. package/src/__tests__/setup-wizard.test.ts +335 -15
  26. package/src/__tests__/status.test.ts +36 -0
  27. package/src/__tests__/two-factor-flow.test.ts +602 -0
  28. package/src/__tests__/two-factor.test.ts +183 -0
  29. package/src/__tests__/upgrade.test.ts +78 -1
  30. package/src/__tests__/users.test.ts +68 -0
  31. package/src/__tests__/vault-auth-status.test.ts +47 -6
  32. package/src/__tests__/vault-hub-origin-env.test.ts +263 -0
  33. package/src/account-home-ui.ts +488 -38
  34. package/src/account-vault-token.ts +282 -0
  35. package/src/admin-handlers.ts +159 -4
  36. package/src/admin-login-ui.ts +49 -5
  37. package/src/admin-vaults.ts +48 -15
  38. package/src/api-account.ts +14 -0
  39. package/src/api-modules-ops.ts +49 -11
  40. package/src/api-users.ts +29 -3
  41. package/src/cli.ts +26 -21
  42. package/src/clients.ts +18 -6
  43. package/src/cloudflare/config.ts +10 -4
  44. package/src/cloudflare/detect.ts +39 -44
  45. package/src/commands/auth.ts +165 -24
  46. package/src/commands/expose-2fa-warning.ts +34 -32
  47. package/src/commands/expose-auth-preflight.ts +89 -78
  48. package/src/commands/expose-cloudflare.ts +370 -12
  49. package/src/commands/expose.ts +8 -0
  50. package/src/commands/init.ts +33 -2
  51. package/src/commands/lifecycle.ts +386 -17
  52. package/src/commands/status.ts +22 -0
  53. package/src/commands/upgrade.ts +55 -11
  54. package/src/commands/wizard.ts +8 -4
  55. package/src/env-file.ts +10 -0
  56. package/src/help.ts +3 -1
  57. package/src/hub-db.ts +39 -1
  58. package/src/hub-server.ts +52 -0
  59. package/src/hub.ts +82 -14
  60. package/src/oauth-handlers.ts +298 -21
  61. package/src/oauth-ui.ts +10 -0
  62. package/src/operator-token.ts +151 -0
  63. package/src/pending-login.ts +116 -0
  64. package/src/rate-limit.ts +51 -0
  65. package/src/resource-binding.ts +134 -0
  66. package/src/scope-explanations.ts +46 -18
  67. package/src/services-manifest.ts +112 -0
  68. package/src/setup-wizard.ts +77 -7
  69. package/src/tailscale/run.ts +28 -11
  70. package/src/totp.ts +201 -0
  71. package/src/two-factor-handlers.ts +287 -0
  72. package/src/two-factor-store.ts +181 -0
  73. package/src/two-factor-ui.ts +462 -0
  74. package/src/users.ts +58 -0
  75. package/src/vault/auth-status.ts +71 -19
  76. package/src/vault-hub-origin-env.ts +163 -0
  77. package/web/ui/dist/assets/index-BiBlvEaj.css +1 -0
  78. package/web/ui/dist/assets/index-CIN3mnmf.js +61 -0
  79. package/web/ui/dist/index.html +2 -2
  80. package/src/__tests__/vault-tokens-create-interactive.test.ts +0 -183
  81. package/src/commands/vault-tokens-create-interactive.ts +0 -143
  82. package/web/ui/dist/assets/index-7DtAXz7y.css +0 -1
  83. package/web/ui/dist/assets/index-tRmPbbC7.js +0 -61
@@ -1,3 +1,4 @@
1
+ import { spawnSync } from "node:child_process";
1
2
  import { mkdirSync, openSync } from "node:fs";
2
3
  import { dirname } from "node:path";
3
4
  import { DEFAULT_TUNNEL_NAME, cloudflaredPathsFor, writeConfig } from "../cloudflare/config.ts";
@@ -27,6 +28,12 @@ import {
27
28
  routeDns,
28
29
  } from "../cloudflare/tunnel.ts";
29
30
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "../config.ts";
31
+ import {
32
+ EXPOSE_STATE_PATH,
33
+ type ExposeState,
34
+ clearExposeState,
35
+ writeExposeState,
36
+ } from "../expose-state.ts";
30
37
  import {
31
38
  type EnsureHubOpts,
32
39
  HUB_DEFAULT_PORT,
@@ -34,12 +41,14 @@ import {
34
41
  readHubPort,
35
42
  } from "../hub-control.ts";
36
43
  import { deriveHubOrigin } from "../hub-origin.ts";
37
- import { type AliveFn, defaultAlive } from "../process-state.ts";
44
+ import { type AliveFn, defaultAlive, processState } from "../process-state.ts";
38
45
  import { readManifest } from "../services-manifest.ts";
39
46
  import { type Runner, defaultRunner } from "../tailscale/run.ts";
47
+ import { persistVaultHubOrigin } from "../vault-hub-origin-env.ts";
40
48
  import type { VaultAuthStatus } from "../vault/auth-status.ts";
41
49
  import { WELL_KNOWN_DIR } from "../well-known.ts";
42
50
  import { printPublic2FAWarning } from "./expose-2fa-warning.ts";
51
+ import { restart } from "./lifecycle.ts";
43
52
 
44
53
  const AUTH_DOC_URL =
45
54
  "https://github.com/ParachuteComputer/parachute-vault/blob/main/docs/auth-model.md";
@@ -94,14 +103,171 @@ const defaultKill: KillFn = (pid, signal) => {
94
103
  process.kill(pid, signal);
95
104
  };
96
105
 
106
+ /**
107
+ * Find the PIDs of every running `cloudflared` connector serving THIS tunnel.
108
+ * "This tunnel" is identified by either the tunnel UUID or the config.yml path
109
+ * appearing on the process command line — both are unique to Parachute's
110
+ * connector for this tunnel, so we never touch an unrelated cloudflared the
111
+ * operator may be running for a different tunnel.
112
+ *
113
+ * The motivating bug (hub#487): each `parachute expose public --cloudflare`
114
+ * "reused the tunnel" but spawned a fresh connector (new pid) without killing
115
+ * the prior ones, and the state file only tracked the most-recent pid. Orphan
116
+ * connectors accumulated — multiple `cloudflared tunnel run` processes all
117
+ * serving stale `config.yml` snapshots, so edge routing became nondeterministic
118
+ * ("silent fails"). Sweeping by UUID/config-path catches the orphans that the
119
+ * single-pid state record misses (prior runs that crashed mid-rewrite, or a
120
+ * connector the operator started by hand for this tunnel).
121
+ *
122
+ * Injectable so tests assert the sweep without a live `pgrep`.
123
+ */
124
+ export type ConnectorPidsFn = (tunnelUuid: string, configPath: string) => number[];
125
+
126
+ export const defaultConnectorPids: ConnectorPidsFn = (tunnelUuid, configPath) => {
127
+ try {
128
+ // `pgrep -fl cloudflared` lists "<pid> <full command line>" for every
129
+ // process whose command line matches "cloudflared". We then filter to the
130
+ // ones that name THIS tunnel (uuid or config path) so the kill is surgical.
131
+ // macOS + Linux ship pgrep; Windows is out of scope (mirrors hub#287's lsof
132
+ // assumption). Any failure → [] (caller falls back to state-tracked pid).
133
+ const result = spawnSync("pgrep", ["-fl", "cloudflared"], {
134
+ encoding: "utf8",
135
+ timeout: 2000,
136
+ });
137
+ if (result.status !== 0 || typeof result.stdout !== "string") return [];
138
+ const selfPid = process.pid;
139
+ const pids: number[] = [];
140
+ for (const line of result.stdout.split("\n")) {
141
+ const trimmed = line.trim();
142
+ if (trimmed.length === 0) continue;
143
+ const match = trimmed.match(/^(\d+)\s+(.*)$/);
144
+ if (!match) continue;
145
+ const pid = Number.parseInt(match[1]!, 10);
146
+ const cmdline = match[2]!;
147
+ if (!Number.isInteger(pid) || pid <= 0 || pid === selfPid) continue;
148
+ // Surgical match: only connectors that name this tunnel's UUID or its
149
+ // config path. A bare `cloudflared` (e.g. `--version`, `tunnel list`)
150
+ // or a connector for a *different* tunnel won't match either token.
151
+ if (cmdline.includes(tunnelUuid) || cmdline.includes(configPath)) {
152
+ pids.push(pid);
153
+ }
154
+ }
155
+ return pids;
156
+ } catch {
157
+ return [];
158
+ }
159
+ };
160
+
161
+ /**
162
+ * Resolve a hostname to its A/AAAA addresses. Returns [] when the name doesn't
163
+ * resolve (NXDOMAIN, SERVFAIL, no records yet) — the signal the DNS
164
+ * self-diagnosis keys on. Injectable so tests drive each case (unresolved /
165
+ * Cloudflare / non-Cloudflare) deterministically.
166
+ */
167
+ export type ResolveHostFn = (hostname: string) => Promise<string[]>;
168
+
169
+ export const defaultResolveHost: ResolveHostFn = async (hostname) => {
170
+ try {
171
+ // Bun.dns ships with the runtime; `node:dns/promises` is equally fine but
172
+ // Bun.dns.lookup returns both families in one call. `all: true` gives every
173
+ // record so a partially-propagated name still surfaces an address.
174
+ const records = await Bun.dns.lookup(hostname, { family: 0 });
175
+ return records.map((r) => r.address).filter((a) => typeof a === "string" && a.length > 0);
176
+ } catch {
177
+ return [];
178
+ }
179
+ };
180
+
181
+ /**
182
+ * Cloudflare's published anycast IPv4 ranges (the proxy edge). A proxied
183
+ * (orange-cloud) record — which is what `cloudflared tunnel route dns` creates
184
+ * — resolves to one of these. If the hostname resolves to something *outside*
185
+ * these ranges, it's almost certainly shadowed: a Pages project, an A record,
186
+ * or a grey-cloud CNAME pointing elsewhere. We keep the list to the v4 ranges
187
+ * (the common case) and treat any IPv6 in Cloudflare's 2606:4700::/32 block as
188
+ * Cloudflare too. Source: https://www.cloudflare.com/ips/ (stable for years).
189
+ */
190
+ const CLOUDFLARE_V4_RANGES: ReadonlyArray<readonly [string, number]> = [
191
+ ["173.245.48.0", 20],
192
+ ["103.21.244.0", 22],
193
+ ["103.22.200.0", 22],
194
+ ["103.31.4.0", 22],
195
+ ["141.101.64.0", 18],
196
+ ["108.162.192.0", 18],
197
+ ["190.93.240.0", 20],
198
+ ["188.114.96.0", 20],
199
+ ["197.234.240.0", 22],
200
+ ["198.41.128.0", 17],
201
+ ["162.158.0.0", 15],
202
+ ["104.16.0.0", 13],
203
+ ["104.24.0.0", 14],
204
+ ["172.64.0.0", 13],
205
+ ["131.0.72.0", 22],
206
+ ];
207
+
208
+ function ipv4ToInt(ip: string): number | undefined {
209
+ const parts = ip.split(".");
210
+ if (parts.length !== 4) return undefined;
211
+ let n = 0;
212
+ for (const part of parts) {
213
+ const octet = Number.parseInt(part, 10);
214
+ if (!Number.isInteger(octet) || octet < 0 || octet > 255) return undefined;
215
+ n = n * 256 + octet;
216
+ }
217
+ return n >>> 0;
218
+ }
219
+
220
+ /** True if any resolved address belongs to Cloudflare's edge. */
221
+ export function looksLikeCloudflare(addresses: readonly string[]): boolean {
222
+ for (const addr of addresses) {
223
+ // IPv6: Cloudflare's edge lives in 2606:4700::/32.
224
+ if (addr.includes(":")) {
225
+ if (addr.toLowerCase().startsWith("2606:4700")) return true;
226
+ continue;
227
+ }
228
+ const ipInt = ipv4ToInt(addr);
229
+ if (ipInt === undefined) continue;
230
+ for (const [base, bits] of CLOUDFLARE_V4_RANGES) {
231
+ const baseInt = ipv4ToInt(base);
232
+ if (baseInt === undefined) continue;
233
+ const mask = bits === 0 ? 0 : (0xffffffff << (32 - bits)) >>> 0;
234
+ if ((ipInt & mask) === (baseInt & mask)) return true;
235
+ }
236
+ }
237
+ return false;
238
+ }
239
+
97
240
  export interface ExposeCloudflareOpts {
98
241
  runner?: Runner;
99
242
  spawner?: CloudflaredSpawner;
100
243
  alive?: AliveFn;
101
244
  kill?: KillFn;
245
+ /**
246
+ * Find every running cloudflared connector PID serving this tunnel (by UUID
247
+ * or config-path match). Used to sweep orphan connectors before spawning a
248
+ * fresh one (hub#487). Tests inject a stub; production uses
249
+ * `defaultConnectorPids` (a filtered `pgrep -fl cloudflared`).
250
+ */
251
+ connectorPids?: ConnectorPidsFn;
252
+ /**
253
+ * Resolve a hostname to its addresses, for the post-route DNS self-diagnosis
254
+ * (hub#487). Returns the resolved IPs (empty when NXDOMAIN / not yet live).
255
+ * Best-effort and non-fatal — a failure to resolve never blocks the expose.
256
+ * Tests inject a stub; production uses `defaultResolveHost` (Bun DNS).
257
+ */
258
+ resolveHost?: ResolveHostFn;
102
259
  log?: (line: string) => void;
103
260
  manifestPath?: string;
104
261
  statePath?: string;
262
+ /**
263
+ * Path to `expose-state.json` — the shared cross-provider expose record the
264
+ * Tailscale path also writes (`expose.ts`). Distinct from `statePath`
265
+ * (cloudflared-state.json, the per-tunnel process record). The cloudflare
266
+ * up-path writes this so downstream consumers (`resolveAdminUrl` in init,
267
+ * `resolveHubOrigin` in lifecycle / auth) see the public URL instead of
268
+ * loopback; the off-path clears it. Defaults to `EXPOSE_STATE_PATH`.
269
+ */
270
+ exposeStatePath?: string;
105
271
  /**
106
272
  * Tunnel name targeted by this invocation. Defaults to `parachute` —
107
273
  * the canonical single-tunnel name. Override to run multiple tunnels on
@@ -164,6 +330,14 @@ export interface ExposeCloudflareOpts {
164
330
  * `<vaultHome>/config.yaml` from disk. (#186)
165
331
  */
166
332
  vaultAuthStatus?: VaultAuthStatus;
333
+ /**
334
+ * Restart a hub-dependent service so it re-reads the new public hub origin.
335
+ * Mirrors the Tailscale path's `restartService` seam (`expose.ts`). Defaults
336
+ * to lifecycle `restart`; tests inject a fake to assert the call without
337
+ * spawning a real daemon. Only invoked for vault (the only `iss`-validating
338
+ * service) and only when it's already running.
339
+ */
340
+ restartService?: (short: string) => Promise<number>;
167
341
  }
168
342
 
169
343
  interface Resolved {
@@ -171,9 +345,12 @@ interface Resolved {
171
345
  spawner: CloudflaredSpawner;
172
346
  alive: AliveFn;
173
347
  kill: KillFn;
348
+ connectorPids: ConnectorPidsFn;
349
+ resolveHost: ResolveHostFn;
174
350
  log: (line: string) => void;
175
351
  manifestPath: string;
176
352
  statePath: string;
353
+ exposeStatePath: string;
177
354
  tunnelName: string;
178
355
  configPath: string;
179
356
  logPath: string;
@@ -186,24 +363,42 @@ interface Resolved {
186
363
  now: () => Date;
187
364
  vaultHome: string | undefined;
188
365
  vaultAuthStatus: VaultAuthStatus | undefined;
366
+ restartService: (short: string) => Promise<number>;
189
367
  }
190
368
 
191
369
  function resolve(opts: ExposeCloudflareOpts): Resolved {
192
370
  const tunnelName = opts.tunnelName ?? DEFAULT_TUNNEL_NAME;
193
- const paths = cloudflaredPathsFor(tunnelName);
371
+ const configDir = opts.configDir ?? CONFIG_DIR;
372
+ // Derive per-tunnel config/log paths from the *resolved* configDir, not the
373
+ // real `CONFIG_DIR`. When a test threads a tmp `configDir` but omits explicit
374
+ // `configPath`/`logPath`, this keeps the derived files inside the tmp dir
375
+ // instead of writing fixtures into the operator's real ~/.parachute.
376
+ const paths = cloudflaredPathsFor(tunnelName, configDir);
194
377
  return {
195
378
  runner: opts.runner ?? defaultRunner,
196
379
  spawner: opts.spawner ?? defaultCloudflaredSpawner,
197
380
  alive: opts.alive ?? defaultAlive,
198
381
  kill: opts.kill ?? defaultKill,
382
+ // Defaulting policy mirrors lifecycle's startReadyMs (hub#487): the real
383
+ // implementations shell out (`pgrep`) / hit the network (DNS). When a test
384
+ // injects a fake `spawner` but no explicit seam, fall back to inert stubs
385
+ // (no orphans found; "resolves at Cloudflare" → no DNS warning) so suites
386
+ // stay deterministic and offline. Production (no spawner override) always
387
+ // gets the real `pgrep` sweep + DNS diagnosis.
388
+ connectorPids:
389
+ opts.connectorPids ?? (opts.spawner === undefined ? defaultConnectorPids : () => []),
390
+ resolveHost:
391
+ opts.resolveHost ??
392
+ (opts.spawner === undefined ? defaultResolveHost : async () => ["104.16.0.1"]),
199
393
  log: opts.log ?? ((line) => console.log(line)),
200
394
  manifestPath: opts.manifestPath ?? SERVICES_MANIFEST_PATH,
201
395
  statePath: opts.statePath ?? CLOUDFLARED_STATE_PATH,
396
+ exposeStatePath: opts.exposeStatePath ?? EXPOSE_STATE_PATH,
202
397
  tunnelName,
203
398
  configPath: opts.configPath ?? paths.configPath,
204
399
  logPath: opts.logPath ?? paths.logPath,
205
400
  cloudflaredHome: opts.cloudflaredHome ?? DEFAULT_CLOUDFLARED_HOME,
206
- configDir: opts.configDir ?? CONFIG_DIR,
401
+ configDir,
207
402
  hubOrigin: opts.hubOrigin,
208
403
  hubEnsureOpts: opts.hubEnsureOpts ?? {},
209
404
  wellKnownDir: opts.wellKnownDir ?? WELL_KNOWN_DIR,
@@ -211,6 +406,14 @@ function resolve(opts: ExposeCloudflareOpts): Resolved {
211
406
  now: opts.now ?? (() => new Date()),
212
407
  vaultHome: opts.vaultHome,
213
408
  vaultAuthStatus: opts.vaultAuthStatus,
409
+ restartService:
410
+ opts.restartService ??
411
+ ((short: string) =>
412
+ restart(short, {
413
+ manifestPath: opts.manifestPath,
414
+ configDir,
415
+ log: opts.log ?? (() => {}),
416
+ })),
214
417
  };
215
418
  }
216
419
 
@@ -223,19 +426,65 @@ function printAuthGuidance(log: (line: string) => void, vaultUrl: string): void
223
426
  log("Pick the path that matches how you'll reach it:");
224
427
  log("");
225
428
  log(" Humans (claude.ai / ChatGPT connectors, browser):");
226
- log(" parachute auth set-password # set an owner password");
227
- log(" parachute auth 2fa enroll # (recommended) TOTP + backup codes");
429
+ log(" parachute auth set-password # set a STRONG owner password");
430
+ log(" parachute auth 2fa enroll # add a second factor (recommended)");
431
+ log(" # (or set 2FA up in the browser at /account/2fa for a scannable QR)");
228
432
  log(" then point your connector at:");
229
433
  log(` ${vaultUrl}`);
230
434
  log("");
231
- log(" Scripts / machines:");
232
- log(" parachute vault tokens create # creates a pvt_… bearer token");
233
- log(" Authorization: Bearer pvt_… # attach to every request");
435
+ log(" Scripts / machines (hub-issued JWT — set the owner password first):");
436
+ log(" parachute auth mint-token --scope vault:<name>:read # or :write");
437
+ log(" Authorization: Bearer <hub-jwt> # attach the printed token to every request");
438
+ log(" (or: Admin → Vaults → Connect mints one and shows the header for you)");
234
439
  log("");
235
- log("Neither is a prerequisite for the other. Full auth reference:");
440
+ log("The owner password gates both paths browser sign-in and minting tokens.");
441
+ log("Full auth reference:");
236
442
  log(` ${AUTH_DOC_URL}`);
237
443
  }
238
444
 
445
+ /**
446
+ * Best-effort registrable-zone guess: the last two labels of the hostname
447
+ * (`vault.example.com` → `example.com`, `gitcoin.parachute.computer` →
448
+ * `parachute.computer`). This is a heuristic — multi-label public suffixes
449
+ * (`foo.co.uk`) would guess `co.uk` — but it's only used to phrase the
450
+ * `dig +short <zone> NS` remedy, where being off by a label is a harmless
451
+ * nudge. We don't ship a full public-suffix list for one warning string.
452
+ */
453
+ function guessZone(hostname: string): string {
454
+ const labels = hostname.split(".").filter((l) => l.length > 0);
455
+ if (labels.length <= 2) return hostname;
456
+ return labels.slice(-2).join(".");
457
+ }
458
+
459
+ /**
460
+ * Non-fatal post-route DNS diagnosis. Resolves `hostname` and warns when the
461
+ * result looks wrong — see the call site for the two symptoms this addresses.
462
+ * Never throws (resolveHost swallows its own errors) and never changes the
463
+ * exit code; the worst case is no output.
464
+ */
465
+ async function diagnoseDns(hostname: string, r: Resolved): Promise<void> {
466
+ const zone = guessZone(hostname);
467
+ const addresses = await r.resolveHost(hostname);
468
+ if (addresses.length === 0) {
469
+ r.log("");
470
+ r.log(`⚠ DNS isn't live yet for ${hostname}.`);
471
+ r.log(` If ${zone} is a new Cloudflare zone, its nameservers may not be switched at your`);
472
+ r.log(" registrar yet. Check with:");
473
+ r.log(` dig +short ${zone} NS # should list *.ns.cloudflare.com`);
474
+ r.log(" Propagation can take minutes to hours. The tunnel itself is up — the URLs below");
475
+ r.log(" will start working once DNS resolves.");
476
+ return;
477
+ }
478
+ if (!looksLikeCloudflare(addresses)) {
479
+ r.log("");
480
+ r.log(`⚠ ${hostname} resolves (${addresses.join(", ")}) but not to Cloudflare's edge.`);
481
+ r.log(` It may be shadowed by another DNS record or a Cloudflare Pages project on ${zone}.`);
482
+ r.log(" Ensure it's a proxied (orange-cloud) CNAME to the tunnel — check");
483
+ r.log(` https://dash.cloudflare.com → DNS for ${zone}. A grey-cloud / A record / Pages`);
484
+ r.log(" binding on this hostname will 404 the tunnel at the edge.");
485
+ }
486
+ }
487
+
239
488
  export async function exposeCloudflareUp(
240
489
  hostname: string,
241
490
  opts: ExposeCloudflareOpts = {},
@@ -365,6 +614,19 @@ export async function exposeCloudflareUp(
365
614
  }
366
615
  r.log("✓ DNS routed.");
367
616
 
617
+ // Post-route DNS self-diagnosis (hub#487). `cloudflared tunnel route dns`
618
+ // can succeed (the CNAME is written in Cloudflare's API) while the hostname
619
+ // is still NOT actually serving the tunnel — two shapes Aaron hit:
620
+ // (a) a "pending" zone whose nameservers aren't switched at the registrar
621
+ // yet, so the record exists in Cloudflare but nothing resolves; and
622
+ // (b) a subdomain shadowed by a Cloudflare Pages project on the same zone,
623
+ // so the edge 404s the tunnel.
624
+ // Both previously printed "✓ DNS routed" + the URLs as if fine. This check
625
+ // is best-effort and strictly NON-FATAL — it only adds a warning; it never
626
+ // changes the exit code or blocks the expose. Fast: one DNS lookup with a
627
+ // built-in timeout in `resolveHost`.
628
+ await diagnoseDns(hostname, r);
629
+
368
630
  const credsFile = credentialsPath(tunnel.id, r.cloudflaredHome);
369
631
  writeConfig(
370
632
  {
@@ -383,12 +645,28 @@ export async function exposeCloudflareUp(
383
645
  );
384
646
  r.log(`✓ Wrote ${r.configPath}`);
385
647
 
648
+ // Orphan-connector sweep (hub#487). Before spawning a fresh connector, kill
649
+ // EVERY cloudflared connector currently serving this tunnel so exactly one
650
+ // process serves the config.yml we just wrote. Pre-fix, each re-expose
651
+ // spawned a new connector without killing the prior ones (state tracked only
652
+ // the most-recent pid), so orphans accumulated and edge routing became
653
+ // nondeterministic. We union two sources:
654
+ // - the pid recorded in cloudflared-state.json (the prior `parachute`-
655
+ // spawned connector for this tunnel name), and
656
+ // - any pid found by scanning running processes for this tunnel's UUID or
657
+ // config path (catches orphans the state file lost track of — crashed
658
+ // mid-rewrite, or started by hand for this tunnel).
386
659
  const stateBefore = readCloudflaredState(r.statePath);
387
660
  const prior = findTunnelRecord(stateBefore, r.tunnelName);
388
- if (prior && r.alive(prior.pid)) {
661
+ const toKill = new Set<number>();
662
+ if (prior && r.alive(prior.pid)) toKill.add(prior.pid);
663
+ for (const pid of r.connectorPids(tunnel.id, r.configPath)) {
664
+ if (r.alive(pid)) toKill.add(pid);
665
+ }
666
+ for (const deadPid of toKill) {
389
667
  try {
390
- r.kill(prior.pid, "SIGTERM");
391
- r.log(`Stopped prior cloudflared (pid ${prior.pid}).`);
668
+ r.kill(deadPid, "SIGTERM");
669
+ r.log(`Stopped prior cloudflared connector (pid ${deadPid}).`);
392
670
  } catch {
393
671
  // Process is already gone — safe to ignore; we replace the record below.
394
672
  }
@@ -409,6 +687,67 @@ export async function exposeCloudflareUp(
409
687
  };
410
688
  writeCloudflaredState(withTunnelRecord(stateBefore, record), r.statePath);
411
689
 
690
+ // Persist the shared cross-provider expose record. Without this, the
691
+ // Tailscale path was the only one writing expose-state.json — so after a
692
+ // Cloudflare bring-up `readExposeState()` returned undefined and downstream
693
+ // consumers fell back to loopback:
694
+ // - init's `resolveAdminUrl` printed http://127.0.0.1:1939/admin/ instead
695
+ // of the public URL.
696
+ // - lifecycle's `resolveHubOrigin` (and the hub#460 vault `.env`
697
+ // PARACHUTE_HUB_ORIGIN persistence) kept the loopback origin, so vault's
698
+ // OAuth `iss` claim didn't match the public host — the "rejected on
699
+ // reconnect" P0 on Cloudflare deploys.
700
+ // Mode is "subdomain": cloudflared routes the whole FQDN at the hub catchall
701
+ // (one ingress → hub), unlike the Tailscale path's "path" routing. The single
702
+ // proxy entry mirrors the hub-catchall shape the Tailscale Funnel path plans.
703
+ const exposeState: ExposeState = {
704
+ version: 1,
705
+ layer: "public",
706
+ mode: "subdomain",
707
+ canonicalFqdn: hostname,
708
+ port: hubPort,
709
+ funnel: false,
710
+ entries: [
711
+ {
712
+ kind: "proxy",
713
+ mount: "/",
714
+ target: `http://localhost:${hubPort}`,
715
+ service: "hub",
716
+ },
717
+ ],
718
+ hubOrigin,
719
+ };
720
+ writeExposeState(exposeState, r.exposeStatePath);
721
+
722
+ // Persist the public hub origin into vault's `.env` and restart vault — the
723
+ // durable half of the OAuth issuer-mismatch fix on Cloudflare deploys.
724
+ //
725
+ // The bug (vault 401s every hub token on a Cloudflare deploy): the Tailscale
726
+ // path gets this for free because it auto-restarts vault, and that restart
727
+ // flows the freshly-written expose-state `hubOrigin` into `vault/.env` via
728
+ // lifecycle's `persistVaultHubOrigin`. The Cloudflare path wrote expose-state
729
+ // but never touched vault's `.env` or restarted it, so the launchd / systemd
730
+ // daemon kept booting vault with NO `PARACHUTE_HUB_ORIGIN` → vault fell back
731
+ // to loopback as its expected issuer → every hub-minted token (whose `iss`
732
+ // is the public origin) failed the `iss` check → 401 → "You're not signed in
733
+ // to the hub." We mirror the Tailscale path here exactly.
734
+ //
735
+ // `persistVaultHubOrigin` writes the durable `.env` (skips loopback itself,
736
+ // so a `--hub-origin http://127.0.0.1` override never bakes a dead issuer in);
737
+ // the restart makes the running vault re-read it immediately rather than
738
+ // waiting for the next reboot.
739
+ persistVaultHubOrigin(r.configDir, hubOrigin, r.log);
740
+ if (processState("vault", r.configDir, r.alive).status === "running") {
741
+ r.log("");
742
+ r.log("Restarting vault to pick up new hub origin…");
743
+ const rcode = await r.restartService("vault");
744
+ if (rcode !== 0) {
745
+ r.log(
746
+ "⚠ vault restart failed. Run manually once the issue is resolved: parachute restart vault",
747
+ );
748
+ }
749
+ }
750
+
412
751
  const baseUrl = `https://${hostname}`;
413
752
  // A well-formed vault manifest always lists at least one mount path. If
414
753
  // it's empty, something went sideways in `parachute install vault` — warn
@@ -473,12 +812,31 @@ export async function exposeCloudflareOff(opts: ExposeCloudflareOpts = {}): Prom
473
812
  } else {
474
813
  r.log(`cloudflared (pid ${record.pid}) wasn't running; clearing stale state.`);
475
814
  }
815
+ // Sweep any orphan connectors for this tunnel that the state record didn't
816
+ // track (hub#487) so `off` leaves exactly zero connectors serving it. Match
817
+ // by UUID/config-path; skip the record pid we already signalled above.
818
+ for (const orphanPid of r.connectorPids(record.tunnelUuid, record.configPath)) {
819
+ if (orphanPid === record.pid || !r.alive(orphanPid)) continue;
820
+ try {
821
+ r.kill(orphanPid, "SIGTERM");
822
+ r.log(`✓ Stopped orphan cloudflared connector (pid ${orphanPid}).`);
823
+ } catch {
824
+ // Already gone between probe and kill — fine.
825
+ }
826
+ }
476
827
  const stateAfter = withoutTunnelRecord(stateBefore, r.tunnelName);
477
828
  if (stateAfter) {
478
829
  writeCloudflaredState(stateAfter, r.statePath);
479
830
  } else {
480
831
  clearCloudflaredState(r.statePath);
481
832
  }
833
+ // Clear the shared expose-state.json when no Cloudflare tunnels remain, so
834
+ // downstream consumers stop resolving the now-dead public URL (mirrors the
835
+ // up-path write above + the Tailscale off-path's expose-state teardown). When
836
+ // other tunnels survive we leave it — a later off for the last one clears it.
837
+ if (!stateAfter) {
838
+ clearExposeState(r.exposeStatePath);
839
+ }
482
840
  r.log(` ${record.hostname} is no longer reachable through this machine.`);
483
841
  r.log(
484
842
  ` Tunnel "${record.tunnelName}" (${record.tunnelUuid}) remains defined in Cloudflare; re-running`,
@@ -24,6 +24,7 @@ import { type ServiceEntry, readManifest } from "../services-manifest.ts";
24
24
  import { type ServeEntry, bringupCommand, teardownCommand } from "../tailscale/commands.ts";
25
25
  import { getFqdn, isTailscaleInstalled } from "../tailscale/detect.ts";
26
26
  import { type Runner, defaultRunner } from "../tailscale/run.ts";
27
+ import { clearVaultHubOrigin } from "../vault-hub-origin-env.ts";
27
28
  import type { VaultAuthStatus } from "../vault/auth-status.ts";
28
29
  import {
29
30
  WELL_KNOWN_DIR,
@@ -438,6 +439,13 @@ export async function exposeOff(layer: ExposeLayer, opts: ExposeOpts = {}): Prom
438
439
  }
439
440
 
440
441
  clearExposeState(statePath);
442
+ // Drop the persisted PARACHUTE_HUB_ORIGIN from vault's `.env`. `expose up`
443
+ // (via the vault restart) persisted the public origin so the launchd /
444
+ // systemd daemon validates `iss` against it. With exposure gone, a
445
+ // local-only hub mints loopback-`iss` tokens, so a stale public origin left
446
+ // in `.env` would itself cause the mismatch on the next daemon restart.
447
+ // Reverting to vault's loopback default (`getHubOrigin`) keeps them aligned.
448
+ clearVaultHubOrigin(configDir, log);
441
449
  // Pair to the debug-only write at expose-up — clean up the inspection artifact
442
450
  // on teardown so it doesn't outlive the layer it described.
443
451
  if (existsSync(wellKnownFilePath)) {
@@ -180,6 +180,26 @@ export function looksLikeServer(platform: NodeJS.Platform, env: NodeJS.ProcessEn
180
180
  return false;
181
181
  }
182
182
 
183
+ /**
184
+ * Heuristic: would a browser-spawn fail because there's no display?
185
+ *
186
+ * A TTY guard alone is insufficient — an SSH session is a TTY with no display,
187
+ * so `xdg-open` fails (or blocks). We treat a box as display-less when:
188
+ * - it's a server per {@link looksLikeServer} (linux + SSH or no X/Wayland,
189
+ * excluding WSL which is a dev laptop), OR
190
+ * - it's linux with neither $DISPLAY nor $WAYLAND_DISPLAY (covers a local
191
+ * headless linux console that isn't over SSH).
192
+ *
193
+ * macOS / Windows always have a window server, so they're never display-less
194
+ * here (someone SSH'd into a Mac is a rare enough edge that we keep the happy
195
+ * path — `open` no-ops gracefully there anyway).
196
+ */
197
+ export function hasNoDisplay(platform: NodeJS.Platform, env: NodeJS.ProcessEnv): boolean {
198
+ if (platform !== "linux") return false;
199
+ if (looksLikeServer(platform, env)) return true;
200
+ return !env.DISPLAY && !env.WAYLAND_DISPLAY;
201
+ }
202
+
183
203
  /**
184
204
  * Default browser-opener. Tries `open` on macOS, `xdg-open` on Linux, and
185
205
  * returns false when neither is available (Windows / WSL fallthrough +
@@ -299,7 +319,7 @@ async function promptExposeChoice(
299
319
  log("Do you want to expose it publicly so you can reach it from other devices?");
300
320
  const mark = (c: ExposeChoice) => (c === defaultChoice ? " (default)" : "");
301
321
  log(` 1) No — keep it loopback-only${mark("none")}`);
302
- log(` 2) Yes via Tailscale Funnel (private to your devices)${mark("tailnet")}`);
322
+ log(` 2) Yes, private to your tailnet (Tailscale \`serve\`)${mark("tailnet")}`);
303
323
  log(` 3) Yes via Cloudflare Tunnel (public HTTPS, your own domain)${mark("cloudflare")}`);
304
324
  log("");
305
325
 
@@ -523,6 +543,17 @@ export async function init(opts: InitOpts = {}): Promise<number> {
523
543
  log("(Open the URL above in your browser to continue.)");
524
544
  return 0;
525
545
  }
546
+ // Headless guard: a TTY isn't enough — an SSH session is a TTY but has no
547
+ // display, so `xdg-open` either fails noisily or (worse) blocks. Skip the
548
+ // spawn entirely on a server-shaped box (linux + no $DISPLAY/$WAYLAND_DISPLAY,
549
+ // or SSH) and just print the link. Aaron hit this on EC2: init tried to open
550
+ // a browser, failed with "Couldn't launch a browser," and (pre-Fix-1) showed
551
+ // the loopback URL. With Fix 1 the printed link is now the public Cloudflare
552
+ // URL. Keep spawning on a real desktop (macOS, Linux-with-display).
553
+ if (hasNoDisplay(platform, env)) {
554
+ log("(No display detected — open the URL above in a browser to continue.)");
555
+ return 0;
556
+ }
526
557
  // `choice === "browser"` (either flag-driven or the operator picked
527
558
  // browser at the prompt) goes straight to openBrowser — skip the
528
559
  // back-compat "Open in your browser now?" Y/n confirm. If choice is
@@ -554,7 +585,7 @@ async function runExposureChoice(
554
585
  ): Promise<number> {
555
586
  if (choice === "none") return 0;
556
587
  if (choice === "tailnet") {
557
- ctx.log("Setting up Tailscale Funnel…");
588
+ ctx.log("Setting up private tailnet access (Tailscale `serve`)…");
558
589
  return await ctx.exposeTailnetImpl();
559
590
  }
560
591
  // cloudflare