@openparachute/hub 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hub-server.ts CHANGED
@@ -56,6 +56,9 @@
56
56
  * /api/connections/catalog (GET) → events/actions across installed modules (cookie-gated)
57
57
  * /admin/connections (POST/GET) → connection provision/list (cookie-gated; POST CSRF-belted)
58
58
  * /admin/connections/<id> (DELETE) → connection teardown (cookie-gated; CSRF-belted)
59
+ * /admin/connections/<id>/renew (POST) → credential renewal (H4; Bearer = the credential itself, proof of possession)
60
+ * /admin/connections/<id>/claim (POST) → claim/reconcile a directly-delivered credential → pending record (surface#113; Bearer = the credential itself)
61
+ * /admin/connections/<id>/approve (POST) → operator approval of a pending claim (cookie-gated; CSRF-belted)
59
62
  *
60
63
  * # "CSRF-belted" = strict same-origin Origin check on cookie-authed
61
64
  * # mutations (hub#632, boundary C1) — origin-check.ts
@@ -204,7 +207,12 @@ import {
204
207
  handleResetUserPassword,
205
208
  handleUpdateUserVaults,
206
209
  } from "./api-users.ts";
207
- import { buildChromeForRequest, injectChromeIntoResponse } from "./chrome-strip.ts";
210
+ import { gateUiAudience, resolveUiMount } from "./audience-gate.ts";
211
+ import {
212
+ CHROME_OPT_OUT_PREFIXES,
213
+ buildChromeForRequest,
214
+ injectChromeIntoResponse,
215
+ } from "./chrome-strip.ts";
208
216
  import { CONFIG_DIR, SERVICES_MANIFEST_PATH } from "./config.ts";
209
217
  import { applyCorsHeaders, corsPreflightResponse, isCorsAllowedRoute } from "./cors.ts";
210
218
  import { ensureCsrfToken } from "./csrf.ts";
@@ -270,6 +278,8 @@ import {
270
278
  isVaultEntry,
271
279
  vaultInstanceNameFor,
272
280
  } from "./well-known.ts";
281
+ import { type WsBridgeData, createWsBridgeHandlers } from "./ws-bridge.ts";
282
+ import { type WsConnectionTracker, defaultWsConnectionTracker } from "./ws-connection-caps.ts";
273
283
 
274
284
  interface Args {
275
285
  port: number;
@@ -478,6 +488,23 @@ async function collectInstalledModules(
478
488
  return out;
479
489
  }
480
490
 
491
+ /**
492
+ * Resolve a module's loopback origin by SHORT name from services.json — the
493
+ * H4 credential-delivery seam (the Connections engine POSTs minted
494
+ * credentials + removal payloads direct to the daemon, not through the hub
495
+ * proxy). Short derivation mirrors `collectInstalledModules`:
496
+ * `shortNameForManifest(name) ?? name`, so third-party modules (whose row
497
+ * name IS their short) resolve too. Read per-request — a module installed
498
+ * seconds ago is deliverable without a hub restart.
499
+ */
500
+ function makeResolveModuleOrigin(manifestPath: string): (short: string) => string | null {
501
+ return (short) => {
502
+ const services = readManifestLenient(manifestPath).services;
503
+ const entry = services.find((s) => (shortNameForManifest(s.name) ?? s.name) === short);
504
+ return entry ? `http://127.0.0.1:${entry.port}` : null;
505
+ };
506
+ }
507
+
481
508
  /**
482
509
  * The trust layer a request arrived through. Hub binds `127.0.0.1:1939`, so
483
510
  * every request reaches it via one of three trusted forwarders (or directly
@@ -564,6 +591,125 @@ function isLoopbackPeer(peerAddr: string | null | undefined): boolean {
564
591
  );
565
592
  }
566
593
 
594
+ /**
595
+ * The two substrate trust headers the hub stamps on every forwarded request
596
+ * (H2, surface-runtime-primitives design §10):
597
+ *
598
+ * X-Parachute-Layer — the `layerOf` classification ("loopback" |
599
+ * "tailnet" | "public"), fail-closed to "public"
600
+ * when the peer address is unknown.
601
+ * X-Parachute-Client-IP — the resolved client IP (CF-Connecting-IP →
602
+ * X-Forwarded-For first hop → peer address; same
603
+ * precedence as rate-limit.ts `clientIpFromRequest`,
604
+ * with the peer address as the direct-caller floor).
605
+ *
606
+ * Backends (surface-host's `ctx.layer` / `ctx.clientIp`, any module reading
607
+ * trust signals) consume THESE, never raw forwarder headers — the hub is the
608
+ * only component that can see the actual peer socket, so it's the only place
609
+ * the classification can be made fail-closed.
610
+ */
611
+ export const PARACHUTE_LAYER_HEADER = "x-parachute-layer";
612
+ export const PARACHUTE_CLIENT_IP_HEADER = "x-parachute-client-ip";
613
+
614
+ /**
615
+ * Resolve the client IP for the X-Parachute-Client-IP stamp. Precedence:
616
+ *
617
+ * 1. `CF-Connecting-IP` — cloudflared stamps the actual client IP on every
618
+ * forwarded request (authoritative on cloudflare-fronted hubs).
619
+ * 2. `X-Forwarded-For` first hop — tailscale serve/funnel and generic
620
+ * reverse proxies set it; the leftmost entry is the original client.
621
+ * 3. The peer address itself — the direct caller (loopback CLI, or a
622
+ * direct network peer on a 0.0.0.0 bind).
623
+ *
624
+ * Returns null when nothing resolves (no forwarder headers AND no peer
625
+ * address — e.g. a unit test calling the fetch fn without a Server). The
626
+ * caller omits the header in that case; backends treat absence as null.
627
+ *
628
+ * Known limitation (same as the rate-limiter's keying): a DIRECT caller can
629
+ * spoof the forwarded-IP headers and misattribute its own address. It cannot
630
+ * spoof the LAYER (layerOf classifies direct non-loopback peers as "public"
631
+ * regardless of injected headers), so the trust signal stays sound — only
632
+ * the attribution string is best-effort for direct callers.
633
+ */
634
+ export function resolveClientIp(req: Request, peerAddr: string | null): string | null {
635
+ const cf = req.headers.get("cf-connecting-ip")?.trim();
636
+ if (cf) return cf;
637
+ const xff = req.headers.get("x-forwarded-for");
638
+ if (xff) {
639
+ const first = xff.split(",")[0]?.trim();
640
+ if (first) return first;
641
+ }
642
+ const peer = peerAddr?.trim();
643
+ return peer ? peer : null;
644
+ }
645
+
646
+ /**
647
+ * Strip any inbound occurrences of the substrate trust headers, then stamp
648
+ * the hub's own classification. The strip is load-bearing: a public client
649
+ * sending `X-Parachute-Layer: loopback` (or a forged client IP) must never
650
+ * ride that injection past the proxy into a module that keys trust off it.
651
+ * Mutates `headers` in place (the proxy's outgoing header bag).
652
+ */
653
+ export function stampSubstrateTrustHeaders(
654
+ headers: Headers,
655
+ req: Request,
656
+ peerAddr: string | null,
657
+ ): void {
658
+ headers.delete(PARACHUTE_LAYER_HEADER);
659
+ headers.delete(PARACHUTE_CLIENT_IP_HEADER);
660
+ headers.set(PARACHUTE_LAYER_HEADER, layerOf(req, peerAddr));
661
+ const clientIp = resolveClientIp(req, peerAddr);
662
+ if (clientIp) headers.set(PARACHUTE_CLIENT_IP_HEADER, clientIp);
663
+ }
664
+
665
+ /**
666
+ * Shared bucket for connections whose client IP cannot be derived at all
667
+ * (no forwarder headers AND no peer address). Fail-closed: they all contend
668
+ * for one per-IP allotment rather than each minting a fresh bucket — the
669
+ * same posture as rate-limit.ts's UNKNOWN_IP_SENTINEL.
670
+ */
671
+ export const WS_CAP_SHARED_BUCKET = "unknown";
672
+
673
+ /**
674
+ * Derive the connection-cap bucket key for a WS upgrade (hub#649).
675
+ *
676
+ * STRICTER than {@link resolveClientIp} on purpose. The H2 attribution stamp
677
+ * tolerates a direct caller misattributing itself (documented limitation —
678
+ * the LAYER stays truthful, only the attribution string is best-effort). A
679
+ * cap key cannot afford that tolerance: if a direct peer's forged
680
+ * X-Forwarded-For were believed, rotating the header would mint a fresh
681
+ * bucket per connection and the per-IP cap would never trip. So forwarded
682
+ * IP headers are believed ONLY when the peer is loopback — the hub's actual
683
+ * forwarder topology (cloudflared, tailscale serve/funnel) runs on-box and
684
+ * dials 127.0.0.1; nothing else legitimately presents those headers from
685
+ * loopback, and a remote attacker can't BE loopback.
686
+ *
687
+ * - loopback peer: CF-Connecting-IP → X-Forwarded-For first hop → the
688
+ * loopback address itself (direct local callers share one bucket —
689
+ * owner-operated, and the cap is configurable).
690
+ * - non-loopback peer: the peer address, regardless of injected headers
691
+ * (spoofed XFF on an untrusted layer lands in the spoofer's own bucket).
692
+ * - no peer derivable: {@link WS_CAP_SHARED_BUCKET} (fail closed).
693
+ *
694
+ * Known limitation, container deploys (Render / Fly): the platform edge
695
+ * dials from a private non-loopback address, so all public clients share
696
+ * the edge peer's bucket there — the per-IP cap degrades to a coarse shared
697
+ * cap and the global cap is the operative bound. Raise
698
+ * PARACHUTE_WS_MAX_PER_IP on such deploys; a trusted-proxy allowlist can
699
+ * refine this when a cloud WS surface actually ships.
700
+ */
701
+ export function wsCapBucketKey(req: Request, peerAddr: string | null): string {
702
+ const peer = peerAddr?.trim() || null;
703
+ if (peer && isLoopbackPeer(peer)) {
704
+ const cf = req.headers.get("cf-connecting-ip")?.trim();
705
+ if (cf) return cf;
706
+ const xff = req.headers.get("x-forwarded-for")?.split(",")[0]?.trim();
707
+ if (xff) return xff;
708
+ return peer;
709
+ }
710
+ return peer ?? WS_CAP_SHARED_BUCKET;
711
+ }
712
+
567
713
  /**
568
714
  * Forward a request to a loopback service on `127.0.0.1:<port>`. By default
569
715
  * the incoming pathname + query are preserved verbatim; pass `targetPath` to
@@ -595,10 +741,17 @@ function isLoopbackPeer(peerAddr: string | null | undefined): boolean {
595
741
  * `short` is the canonical short (`vault`/`scribe`/`notes`) — used as
596
742
  * the supervisor map key + pidfile directory key for classification.
597
743
  *
598
- * Hop-by-hop notes: WebSocket upgrades and HTTP/2 trailers don't traverse
599
- * fetch-based proxies cleanly. No on-box service uses either today; if one
600
- * eventually needs them, switch to a Node http.IncomingMessage / http.request
601
- * pair.
744
+ * `peerAddr` is the resolved peer address (`server.requestIP`), threaded so
745
+ * the substrate trust headers (below) classify the layer the same way the
746
+ * `publicExposure` cloak does fail-closed to `public` when unknown.
747
+ *
748
+ * Hop-by-hop notes: HTTP/2 trailers don't traverse fetch-based proxies
749
+ * cleanly; no on-box service uses them today. WebSocket upgrades CANNOT
750
+ * traverse this fetch-based path either — they're handled BEFORE dispatch by
751
+ * the Bun-native upgrade bridge (H1: `maybeUpgradeWebSocket` +
752
+ * `src/ws-bridge.ts`) for modules that declare the capability; an upgrade
753
+ * request reaching this function belongs to a non-declaring mount and the
754
+ * upstream sees a plain GET.
602
755
  */
603
756
  async function proxyRequest(
604
757
  req: Request,
@@ -606,6 +759,7 @@ async function proxyRequest(
606
759
  serviceLabel: string,
607
760
  short: string,
608
761
  supervisor: Supervisor | undefined,
762
+ peerAddr: string | null,
609
763
  targetPath?: string,
610
764
  ): Promise<Response> {
611
765
  const url = new URL(req.url);
@@ -651,6 +805,11 @@ async function proxyRequest(
651
805
  if (!headers.has("x-forwarded-proto")) {
652
806
  headers.set("x-forwarded-proto", isHttpsRequest(req) ? "https" : "http");
653
807
  }
808
+ // Substrate trust headers (H2, surface-runtime design §10): stamped on
809
+ // EVERY forwarded request so module backends read trust signals from the
810
+ // substrate instead of re-deriving them from raw forwarder headers (the
811
+ // "header-absence = local trust" anti-pattern the design rejects).
812
+ stampSubstrateTrustHeaders(headers, req, peerAddr);
654
813
 
655
814
  const init: RequestInit & { duplex?: "half" } = {
656
815
  method: req.method,
@@ -761,7 +920,7 @@ async function proxyToVault(
761
920
  // vault instances share the same supervisor key under hub's current
762
921
  // single-vault-per-hub model; if multi-vault-per-hub ever ships, the
763
922
  // classifier will need a per-instance key.
764
- return proxyRequest(req, match.port, "vault", "vault", supervisor, targetPath);
923
+ return proxyRequest(req, match.port, "vault", "vault", supervisor, peerAddr, targetPath);
765
924
  }
766
925
 
767
926
  /**
@@ -805,7 +964,7 @@ async function proxyToVaultAdmin(
805
964
  if (effectivePublicExposure(entry) === "loopback" && layerOf(req, peerAddr) !== "loopback") {
806
965
  return new Response("not found", { status: 404 });
807
966
  }
808
- return proxyRequest(req, entry.port, "vault", "vault", supervisor);
967
+ return proxyRequest(req, entry.port, "vault", "vault", supervisor, peerAddr);
809
968
  }
810
969
 
811
970
  /**
@@ -911,7 +1070,7 @@ async function proxyToService(
911
1070
  // will land in "persistent" by default which is the safer choice for
912
1071
  // unknown lifecycle).
913
1072
  const short = shortNameForManifest(match.entry.name) ?? match.entry.name;
914
- return proxyRequest(req, match.port, match.entry.name, short, supervisor, targetPath);
1073
+ return proxyRequest(req, match.port, match.entry.name, short, supervisor, peerAddr, targetPath);
915
1074
  }
916
1075
 
917
1076
  /**
@@ -1038,6 +1197,15 @@ export interface HubFetchDeps {
1038
1197
  * CLI commands directly.
1039
1198
  */
1040
1199
  supervisor?: Supervisor;
1200
+ /**
1201
+ * WebSocket connection-cap accounting (hub#649). Production uses the
1202
+ * process-wide {@link defaultWsConnectionTracker} (caps from env at boot);
1203
+ * tests inject their own tracker so they neither consume nor depend on the
1204
+ * shared counters. Release pairing is structural — the acquire site stashes
1205
+ * the release closure on the upgraded socket's `data`, so a mismatched
1206
+ * tracker between fetch fn and bridge handlers is impossible.
1207
+ */
1208
+ wsConnectionTracker?: WsConnectionTracker;
1041
1209
  }
1042
1210
 
1043
1211
  /**
@@ -1445,15 +1613,230 @@ export function resolveIssuerSource(
1445
1613
 
1446
1614
  /**
1447
1615
  * Minimal structural type for the Bun `Server` handle the fetch callback
1448
- * receives as its 2nd argument. We only need `requestIP` (item E / #526) to
1449
- * resolve the peer address for `layerOf`. Typed structurally (rather than
1616
+ * receives as its 2nd argument. We need `requestIP` (item E / #526) to
1617
+ * resolve the peer address for `layerOf`, and `upgrade` (H1) to hand a
1618
+ * gated WebSocket upgrade to the bridge. Typed structurally (rather than
1450
1619
  * importing Bun's full `Server`) so tests can pass a tiny fake and so the
1451
1620
  * signature stays robust to Bun type-shape churn. Optional in the callback
1452
1621
  * because a direct unit call to the returned fetch fn may omit it — in which
1453
- * case `peerAddr` is null and `layerOf` fails closed to `public`.
1622
+ * case `peerAddr` is null and `layerOf` fails closed to `public`, and a
1623
+ * WebSocket upgrade is refused (503 — no server to upgrade on).
1454
1624
  */
1455
1625
  interface PeerIpResolver {
1456
1626
  requestIP(req: Request): { address: string } | null;
1627
+ /**
1628
+ * Bun `Server.upgrade` — present on the real server, optional on fakes.
1629
+ * Typed with the bridge's data payload (Bun's own signature takes
1630
+ * `data: unknown`; method bivariance keeps the real Server assignable).
1631
+ */
1632
+ upgrade?(req: Request, options: { data: WsBridgeData }): boolean;
1633
+ }
1634
+
1635
+ /**
1636
+ * True when the request is a WebSocket upgrade. The `Upgrade` header is the
1637
+ * discriminator (RFC 6455 §4.1 requires it; Bun's `server.upgrade` re-checks
1638
+ * the full handshake — key, version, Connection token — so this only needs
1639
+ * to be a cheap router predicate, not a validator).
1640
+ */
1641
+ export function isWebSocketUpgrade(req: Request): boolean {
1642
+ return (req.headers.get("upgrade") ?? "").toLowerCase() === "websocket";
1643
+ }
1644
+
1645
+ /**
1646
+ * Hop-by-hop + WS-handshake headers never forwarded on the upstream connect:
1647
+ * the Bun WebSocket client re-mints its own handshake (key/version/
1648
+ * extensions), and forwarding the originals would corrupt it.
1649
+ */
1650
+ const WS_HOP_BY_HOP_HEADERS = [
1651
+ "host",
1652
+ "connection",
1653
+ "upgrade",
1654
+ "keep-alive",
1655
+ "proxy-authorization",
1656
+ "te",
1657
+ "trailer",
1658
+ "transfer-encoding",
1659
+ "sec-websocket-key",
1660
+ "sec-websocket-version",
1661
+ "sec-websocket-extensions",
1662
+ "sec-websocket-accept",
1663
+ // Subprotocol negotiation is NOT forwarded in v1 (see ws-bridge.ts header).
1664
+ "sec-websocket-protocol",
1665
+ ] as const;
1666
+
1667
+ /** The verdict of {@link maybeUpgradeWebSocket}. */
1668
+ type WsUpgradeVerdict =
1669
+ | { kind: "upgraded" }
1670
+ | { kind: "response"; response: Response }
1671
+ | { kind: "pass" };
1672
+
1673
+ /**
1674
+ * H1 — the WebSocket upgrade bridge's routing + gating half (the frame
1675
+ * piping lives in `src/ws-bridge.ts`).
1676
+ *
1677
+ * For an `Upgrade: websocket` request:
1678
+ *
1679
+ * 1. Resolve the service mount (generic longest-prefix, then vault mounts —
1680
+ * same resolution as the HTTP proxies). No mount → `pass` (normal
1681
+ * dispatch 404s / handles it).
1682
+ * 2. Gate BEFORE upgrading — same posture as the HTTP path:
1683
+ * `publicExposure: "loopback"` cloak (404, indistinguishable from
1684
+ * not-installed) and the per-UI audience gate (H3).
1685
+ * 3. Capability check, DENY BY DEFAULT: the module must declare
1686
+ * `websocket: true` on its services.json row OR its
1687
+ * `.parachute/module.json`. No declaration → 426 (the route exists but
1688
+ * doesn't speak WebSocket; the fetch-based proxy can't forward upgrades
1689
+ * and the daemon never sees the request).
1690
+ * 4. Connection caps (hub#649): per-client-IP + total concurrent caps,
1691
+ * checked-and-acquired in the same synchronous block as the upgrade
1692
+ * (no await between check and commit). Over-cap → generic 429 (no
1693
+ * count leakage; the hub log carries which cap + bucket), refused
1694
+ * BEFORE `server.upgrade()` commits a socket or the bridge dials the
1695
+ * upstream. Keying + trust model: {@link wsCapBucketKey}; defaults +
1696
+ * env overrides: `ws-connection-caps.ts`. Release rides the bridge's
1697
+ * close handler via `data.releaseCap`.
1698
+ * 5. `server.upgrade(req, { data })` with the upstream URL + headers
1699
+ * (client headers minus hop-by-hop/handshake, plus the H2 substrate
1700
+ * trust stamps). The ws-bridge handlers take over from there.
1701
+ */
1702
+ async function maybeUpgradeWebSocket(
1703
+ req: Request,
1704
+ server: PeerIpResolver | undefined,
1705
+ deps: {
1706
+ manifestPath: string;
1707
+ peerAddr: string | null;
1708
+ readModuleManifestFn: (installDir: string) => Promise<ModuleManifest | null>;
1709
+ /** H3 — gate the upgrade on the mount's audience BEFORE upgrading. */
1710
+ gateAudience?: (pathname: string) => Promise<Response | null>;
1711
+ /** hub#649 — per-IP + total connection-cap accounting. */
1712
+ wsConnectionTracker: WsConnectionTracker;
1713
+ },
1714
+ ): Promise<WsUpgradeVerdict> {
1715
+ const services = readManifestLenient(deps.manifestPath).services;
1716
+ const url = new URL(req.url);
1717
+ const match =
1718
+ findServiceUpstream(services, url.pathname) ?? findVaultUpstream(services, url.pathname);
1719
+ if (!match) return { kind: "pass" };
1720
+
1721
+ // Layer cloak first — a loopback-only module must look not-installed from
1722
+ // tailnet/public, for upgrades exactly as for HTTP.
1723
+ if (
1724
+ effectivePublicExposure(match.entry) === "loopback" &&
1725
+ layerOf(req, deps.peerAddr) !== "loopback"
1726
+ ) {
1727
+ return { kind: "response", response: new Response("not found", { status: 404 }) };
1728
+ }
1729
+
1730
+ // Audience gate (H3) — runs BEFORE the upgrade so an unauthorized client
1731
+ // never gets a socket. Threaded from dispatch (needs db + issuer).
1732
+ if (deps.gateAudience) {
1733
+ const gated = await deps.gateAudience(url.pathname);
1734
+ if (gated) return { kind: "response", response: gated };
1735
+ }
1736
+
1737
+ // Capability — deny by default. services.json row wins; module.json is the
1738
+ // canonical declaration source for modules that haven't re-registered yet.
1739
+ let declared = match.entry.websocket === true;
1740
+ if (!declared && match.entry.installDir) {
1741
+ try {
1742
+ const manifest = await deps.readModuleManifestFn(match.entry.installDir);
1743
+ declared = manifest?.websocket === true;
1744
+ } catch {
1745
+ declared = false; // malformed manifest → deny (fail closed)
1746
+ }
1747
+ }
1748
+ if (!declared) {
1749
+ return {
1750
+ kind: "response",
1751
+ response: new Response(
1752
+ JSON.stringify({
1753
+ error: "websocket_not_supported",
1754
+ error_description: `module "${match.entry.name}" does not declare WebSocket support`,
1755
+ }),
1756
+ { status: 426, headers: { "content-type": "application/json", upgrade: "websocket" } },
1757
+ ),
1758
+ };
1759
+ }
1760
+
1761
+ if (!server?.upgrade) {
1762
+ return {
1763
+ kind: "response",
1764
+ response: new Response(
1765
+ JSON.stringify({
1766
+ error: "service_unavailable",
1767
+ error_description: "websocket upgrade unavailable on this server",
1768
+ }),
1769
+ { status: 503, headers: { "content-type": "application/json" } },
1770
+ ),
1771
+ };
1772
+ }
1773
+
1774
+ // Upstream URL — same path semantics as the HTTP proxy (stripPrefix honored).
1775
+ const stripPrefix = stripPrefixFor(match.entry);
1776
+ const targetPath = stripPrefix ? url.pathname.slice(match.mount.length) || "/" : url.pathname;
1777
+ const upstreamUrl = `ws://127.0.0.1:${match.port}${targetPath}${url.search}`;
1778
+
1779
+ // Upstream headers: the client's own (cookie / authorization ride through
1780
+ // so the daemon authenticates the connection) minus hop-by-hop + handshake
1781
+ // headers, plus the H2 substrate trust stamps.
1782
+ const headers = new Headers(req.headers);
1783
+ for (const h of WS_HOP_BY_HOP_HEADERS) headers.delete(h);
1784
+ stampSubstrateTrustHeaders(headers, req, deps.peerAddr);
1785
+ const upstreamHeaders: Record<string, string> = {};
1786
+ headers.forEach((value, key) => {
1787
+ upstreamHeaders[key] = value;
1788
+ });
1789
+
1790
+ // Connection caps (hub#649) — the LAST gate, synchronous with the upgrade
1791
+ // itself (everything between here and `server.upgrade` must stay
1792
+ // await-free so the check can't race the commit). Last on purpose: the
1793
+ // earlier refusals keep their precise statuses (the 404 cloak stays
1794
+ // indistinguishable from not-installed even under cap pressure), and the
1795
+ // counters only ever hold slots for connections that would actually
1796
+ // bridge.
1797
+ const capKey = wsCapBucketKey(req, deps.peerAddr);
1798
+ const acquired = deps.wsConnectionTracker.tryAcquire(capKey);
1799
+ if (!acquired.ok) {
1800
+ // Operator-facing pressure signal: which cap, which bucket, how full.
1801
+ // None of this reaches the client — the 429 body is deliberately
1802
+ // generic (no counts, no cap identity).
1803
+ console.warn(
1804
+ `[ws-caps] refused upgrade for ${url.pathname}: ${
1805
+ acquired.reason === "per_ip_cap" ? `per-IP cap (ip=${capKey})` : `total cap (ip=${capKey})`
1806
+ }; total=${deps.wsConnectionTracker.totalCount} ip_count=${deps.wsConnectionTracker.countFor(
1807
+ capKey,
1808
+ )}`,
1809
+ );
1810
+ return {
1811
+ kind: "response",
1812
+ response: new Response(
1813
+ JSON.stringify({
1814
+ error: "too_many_connections",
1815
+ error_description: "WebSocket connection limit reached; try again later",
1816
+ }),
1817
+ { status: 429, headers: { "content-type": "application/json" } },
1818
+ ),
1819
+ };
1820
+ }
1821
+
1822
+ const upgraded = server.upgrade(req, {
1823
+ data: { upstreamUrl, upstreamHeaders, releaseCap: acquired.release },
1824
+ });
1825
+ if (upgraded) return { kind: "upgraded" };
1826
+ // No socket was created, so the bridge's close handler will never fire —
1827
+ // release the slot inline (the closure latches, so this can't double-count
1828
+ // against a later close).
1829
+ acquired.release();
1830
+ return {
1831
+ kind: "response",
1832
+ response: new Response(
1833
+ JSON.stringify({
1834
+ error: "upgrade_failed",
1835
+ error_description: "WebSocket handshake was malformed or could not be completed",
1836
+ }),
1837
+ { status: 400, headers: { "content-type": "application/json" } },
1838
+ ),
1839
+ };
1457
1840
  }
1458
1841
 
1459
1842
  /**
@@ -1584,6 +1967,49 @@ export function hubFetch(
1584
1967
  // error detail"). A transient SQLITE_BUSY is classified non-fatal and just
1585
1968
  // surfaces a 503 the next request clears — it never kills the hub.
1586
1969
  try {
1970
+ // H1 — WebSocket upgrade bridge. Runs before normal dispatch: an
1971
+ // `Upgrade: websocket` request targeting a declared service mount is
1972
+ // gated (publicExposure cloak + audience gate) and, if it passes,
1973
+ // upgraded into the Bun-native bridge (src/ws-bridge.ts) instead of
1974
+ // the fetch-based proxy (which cannot forward upgrades). Upgrade
1975
+ // requests that match no service mount fall through to normal dispatch
1976
+ // unchanged — no hub-owned route speaks WebSocket.
1977
+ if (isWebSocketUpgrade(req)) {
1978
+ const verdict = await maybeUpgradeWebSocket(req, server, {
1979
+ manifestPath,
1980
+ peerAddr,
1981
+ readModuleManifestFn: deps?.readModuleManifest ?? defaultReadModuleManifest,
1982
+ wsConnectionTracker: deps?.wsConnectionTracker ?? defaultWsConnectionTracker,
1983
+ // H3 — the audience gate runs BEFORE the upgrade, same posture as
1984
+ // the HTTP dispatch below: a WS endpoint under a hub-users surface
1985
+ // never hands a socket to an anonymous caller, while `surface`
1986
+ // audiences pass through (the backed surface authenticates the
1987
+ // socket itself — e.g. the docs editor's collab WS rides this).
1988
+ // (The publicExposure cloak already ran inside
1989
+ // maybeUpgradeWebSocket before this hook.)
1990
+ gateAudience: async (wsPathname) => {
1991
+ const wsUiMatch = resolveUiMount(
1992
+ readManifestLenient(manifestPath).services,
1993
+ wsPathname,
1994
+ );
1995
+ if (!wsUiMatch) return null;
1996
+ return gateUiAudience(req, wsUiMatch.audience, wsUiMatch.ui, {
1997
+ db: getDb?.(),
1998
+ knownIssuers: () => oauthDeps(req).hubBoundOrigins(),
1999
+ });
2000
+ },
2001
+ });
2002
+ if (verdict.kind === "upgraded") {
2003
+ // Bun's contract after a successful `server.upgrade()` is to
2004
+ // return undefined from fetch — the socket now belongs to the
2005
+ // websocket handlers. The public signature stays Response-typed
2006
+ // for the many direct (non-WS) call sites; this cast is the one
2007
+ // deliberate exception, observed only by Bun's runtime.
2008
+ return undefined as unknown as Response;
2009
+ }
2010
+ if (verdict.kind === "response") return verdict.response;
2011
+ // kind === "pass" — fall through to normal dispatch.
2012
+ }
1587
2013
  return await dispatch();
1588
2014
  } catch (err) {
1589
2015
  const klass = classifyDbError(err);
@@ -2242,6 +2668,7 @@ export function hubFetch(
2242
2668
  connectionsStorePath: deps?.connectionsStorePath ?? join(CONFIG_DIR, "connections.json"),
2243
2669
  channelOrigin,
2244
2670
  resolveVaultOrigin,
2671
+ resolveModuleOrigin: makeResolveModuleOrigin(manifestPath),
2245
2672
  // Daemon eviction — the same in-process supervisor the lifecycle
2246
2673
  // verbs drive (module-ops API); restarting vault evicts the open
2247
2674
  // store handle + re-runs selfRegister (services.json path rebuild).
@@ -2339,6 +2766,7 @@ export function hubFetch(
2339
2766
  hubOrigin: oauthDeps(req).issuer,
2340
2767
  modules,
2341
2768
  resolveVaultOrigin,
2769
+ resolveModuleOrigin: makeResolveModuleOrigin(manifestPath),
2342
2770
  channelOrigin,
2343
2771
  storePath: deps?.connectionsStorePath ?? join(CONFIG_DIR, "connections.json"),
2344
2772
  };
@@ -3018,8 +3446,54 @@ export function hubFetch(
3018
3446
  // here only after every hub-owned prefix above has had its turn — so
3019
3447
  // `/`, `/admin/*`, `/oauth/*`, `/.well-known/*`, `/hub/*`, `/vault/*`,
3020
3448
  // `/api/*` are excluded by ordering, not by an explicit denylist (#182).
3449
+ //
3450
+ // H3 — per-UI audience gate. When the path falls under a declared UI
3451
+ // sub-unit (a `uis{}` entry on the matched service row — surface-hosted
3452
+ // UI mounts like /surface/<name>/*), the sub-unit's audience is
3453
+ // enforced BEFORE forwarding: 'public' passes, 'surface' passes (the
3454
+ // backed surface authenticates every request itself), 'hub-users'
3455
+ // requires a session or a scope-satisfying Bearer, 'operator' requires
3456
+ // the first admin. Module API paths outside any uis entry are NOT
3457
+ // gated here — modules keep their own auth. Ordering nuance: when the
3458
+ // row's publicExposure cloak would fire (loopback-only, non-loopback
3459
+ // layer), the gate is SKIPPED so the 404 cloak stays indistinguishable
3460
+ // from not-installed (a 401 here would leak the route's existence) —
3461
+ // which also means a 'surface'/'public' mount on a loopback-only row
3462
+ // stays unreachable from tailnet/funnel: exposure is orthogonal to
3463
+ // audience.
3464
+ const uiMatch = resolveUiMount(readManifestLenient(manifestPath).services, pathname);
3465
+ if (uiMatch) {
3466
+ const cloaked =
3467
+ effectivePublicExposure(uiMatch.entry) === "loopback" &&
3468
+ layerOf(req, peerAddr) !== "loopback";
3469
+ if (!cloaked) {
3470
+ const denied = await gateUiAudience(req, uiMatch.audience, uiMatch.ui, {
3471
+ db: getDb?.(),
3472
+ knownIssuers: () => oauthDeps(req).hubBoundOrigins(),
3473
+ });
3474
+ if (denied) return denied;
3475
+ }
3476
+ }
3021
3477
  const proxied = await proxyToService(req, manifestPath, deps?.supervisor, peerAddr);
3022
- if (proxied) return decorateWithChrome(proxied, req, pathname, getDb);
3478
+ if (proxied) {
3479
+ // H5 — chrome-strip rides the gate: where the audience resolved
3480
+ // `public`, the identity chrome is disabled for that mount (public
3481
+ // readers aren't hub users). `surface` follows the same precedent —
3482
+ // a backed surface's visitors are mostly capability-link invitees,
3483
+ // NOT hub users, so the "Signed in as…" chrome would be wrong for
3484
+ // them (and the surface owns its whole page anyway). Reuses the
3485
+ // per-path opt-out mechanism the /surface/notes/ precedent
3486
+ // established, generalized to the declared audience.
3487
+ return decorateWithChrome(
3488
+ proxied,
3489
+ req,
3490
+ pathname,
3491
+ getDb,
3492
+ uiMatch !== undefined && (uiMatch.audience === "public" || uiMatch.audience === "surface")
3493
+ ? [uiMatch.mount]
3494
+ : undefined,
3495
+ );
3496
+ }
3023
3497
 
3024
3498
  // Branded fall-through 404 (closes hub#392) — the operator who mistyped
3025
3499
  // a URL sees a clear "not found" page with a path back home, not the
@@ -3047,6 +3521,14 @@ export function hubFetch(
3047
3521
  * wrapper threads in the session-aware chrome HTML and a `set-cookie`
3048
3522
  * append when a fresh CSRF cookie was minted.
3049
3523
  *
3524
+ * `extraOptOutPrefixes` (H5) generalizes the static opt-out list: the
3525
+ * dispatch passes the matched UI mount when the audience gate resolved
3526
+ * `public` or `surface` — public readers (and a backed surface's
3527
+ * capability-link invitees) aren't hub users, so the identity chrome
3528
+ * ("Signed in as…", Sign in link) must not ride their pages. Same
3529
+ * mechanism as the hardcoded `/surface/notes/` precedent, now driven by
3530
+ * the sub-unit's declared audience instead of a hub-side path list.
3531
+ *
3050
3532
  * When `getDb` isn't wired (hubFetch instantiated without state — tests,
3051
3533
  * cold-start hub minus DB), we still inject — the signed-out variant.
3052
3534
  */
@@ -3055,6 +3537,7 @@ async function decorateWithChrome(
3055
3537
  req: Request,
3056
3538
  pathname: string,
3057
3539
  getDb: HubFetchDeps["getDb"],
3540
+ extraOptOutPrefixes?: readonly string[],
3058
3541
  ): Promise<Response> {
3059
3542
  // Build chrome HTML lazily — `buildChromeForRequest` already opens the DB
3060
3543
  // for the session lookup; calling it on a response that won't be rewritten
@@ -3075,6 +3558,9 @@ async function decorateWithChrome(
3075
3558
  const out = await injectChromeIntoResponse(res, {
3076
3559
  chromeHtml,
3077
3560
  pathname,
3561
+ ...(extraOptOutPrefixes !== undefined && extraOptOutPrefixes.length > 0
3562
+ ? { optOutPrefixes: [...CHROME_OPT_OUT_PREFIXES, ...extraOptOutPrefixes] }
3563
+ : {}),
3078
3564
  });
3079
3565
  // Append set-cookie if a CSRF was minted AND the chrome was actually
3080
3566
  // injected (we know that by checking out !== res — pass-through preserves
@@ -3152,6 +3638,9 @@ if (import.meta.main) {
3152
3638
  issuer,
3153
3639
  loopbackPort: port,
3154
3640
  }),
3641
+ // H1 — the WebSocket upgrade bridge's frame-piping handlers. Connections
3642
+ // land here only after `maybeUpgradeWebSocket` gated + upgraded them.
3643
+ websocket: createWsBridgeHandlers(),
3155
3644
  });
3156
3645
  // Register PID + port from the running hub itself so any startup path
3157
3646
  // (spawn-via-`ensureHubRunning` or a direct `bun src/hub-server.ts` from