@thotischner/observability-mcp 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/analysis/history.d.ts +36 -2
  2. package/dist/analysis/history.js +60 -2
  3. package/dist/analysis/history.test.js +46 -0
  4. package/dist/audit/sinks/s3.d.ts +61 -0
  5. package/dist/audit/sinks/s3.js +179 -0
  6. package/dist/audit/sinks/s3.test.d.ts +1 -0
  7. package/dist/audit/sinks/s3.test.js +175 -0
  8. package/dist/auth/csrf.d.ts +6 -0
  9. package/dist/auth/csrf.js +4 -0
  10. package/dist/auth/csrf.test.js +22 -0
  11. package/dist/auth/lockout.d.ts +72 -0
  12. package/dist/auth/lockout.js +134 -0
  13. package/dist/auth/lockout.test.d.ts +1 -0
  14. package/dist/auth/lockout.test.js +133 -0
  15. package/dist/auth/middleware.d.ts +5 -0
  16. package/dist/auth/middleware.js +6 -1
  17. package/dist/auth/middleware.test.js +31 -0
  18. package/dist/auth/password-policy.d.ts +52 -0
  19. package/dist/auth/password-policy.js +125 -0
  20. package/dist/auth/password-policy.test.d.ts +1 -0
  21. package/dist/auth/password-policy.test.js +111 -0
  22. package/dist/auth/policy/batch-dry-run.js +15 -0
  23. package/dist/auth/revocation.d.ts +93 -0
  24. package/dist/auth/revocation.js +193 -0
  25. package/dist/auth/revocation.test.d.ts +1 -0
  26. package/dist/auth/revocation.test.js +136 -0
  27. package/dist/auth/session.d.ts +7 -0
  28. package/dist/auth/session.js +6 -0
  29. package/dist/auth/session.test.js +21 -0
  30. package/dist/connectors/interface.d.ts +5 -1
  31. package/dist/connectors/loader.d.ts +8 -0
  32. package/dist/connectors/loader.js +49 -0
  33. package/dist/connectors/loki.d.ts +45 -1
  34. package/dist/connectors/loki.js +141 -8
  35. package/dist/connectors/loki.test.js +171 -1
  36. package/dist/connectors/manifest-hooks.test.d.ts +1 -0
  37. package/dist/connectors/manifest-hooks.test.js +206 -0
  38. package/dist/federation/registry.d.ts +27 -5
  39. package/dist/federation/registry.js +49 -4
  40. package/dist/federation/registry.test.js +79 -3
  41. package/dist/federation/upstream.d.ts +32 -6
  42. package/dist/federation/upstream.js +60 -12
  43. package/dist/federation/upstream.test.d.ts +1 -0
  44. package/dist/federation/upstream.test.js +118 -0
  45. package/dist/index.js +522 -67
  46. package/dist/metrics/self.d.ts +1 -0
  47. package/dist/metrics/self.js +8 -0
  48. package/dist/openapi.js +39 -0
  49. package/dist/openapi.test.js +1 -0
  50. package/dist/policy/redact.js +1 -1
  51. package/dist/postmortem/store.d.ts +34 -0
  52. package/dist/postmortem/store.js +113 -0
  53. package/dist/postmortem/store.test.d.ts +1 -0
  54. package/dist/postmortem/store.test.js +118 -0
  55. package/dist/scim/compliance.test.d.ts +1 -0
  56. package/dist/scim/compliance.test.js +169 -0
  57. package/dist/scim/factory.test.d.ts +1 -0
  58. package/dist/scim/factory.test.js +54 -0
  59. package/dist/scim/patch-ops.test.d.ts +1 -0
  60. package/dist/scim/patch-ops.test.js +100 -0
  61. package/dist/scim/redis-store.d.ts +38 -0
  62. package/dist/scim/redis-store.js +178 -0
  63. package/dist/scim/redis-store.test.d.ts +1 -0
  64. package/dist/scim/redis-store.test.js +138 -0
  65. package/dist/scim/routes.d.ts +27 -2
  66. package/dist/scim/routes.js +161 -15
  67. package/dist/scim/store.d.ts +40 -1
  68. package/dist/scim/store.js +23 -5
  69. package/dist/sdk/hook-wrappers.d.ts +39 -0
  70. package/dist/sdk/hook-wrappers.js +113 -0
  71. package/dist/sdk/hook-wrappers.test.d.ts +1 -0
  72. package/dist/sdk/hook-wrappers.test.js +204 -0
  73. package/dist/sdk/index.d.ts +13 -0
  74. package/dist/security/csp.d.ts +64 -0
  75. package/dist/security/csp.js +135 -0
  76. package/dist/security/csp.test.d.ts +1 -0
  77. package/dist/security/csp.test.js +97 -0
  78. package/dist/tools/detect-anomalies.d.ts +12 -1
  79. package/dist/tools/detect-anomalies.js +22 -2
  80. package/dist/tools/query-logs.d.ts +40 -0
  81. package/dist/tools/query-logs.js +69 -3
  82. package/dist/tools/topology.js +23 -5
  83. package/dist/tools/topology.test.js +45 -0
  84. package/dist/tools/validation.d.ts +13 -0
  85. package/dist/tools/validation.js +74 -0
  86. package/dist/tools/validation.test.js +54 -1
  87. package/dist/transport/transportSessionMap.d.ts +70 -0
  88. package/dist/transport/transportSessionMap.js +128 -0
  89. package/dist/transport/transportSessionMap.test.d.ts +1 -0
  90. package/dist/transport/transportSessionMap.test.js +111 -0
  91. package/dist/types.d.ts +48 -0
  92. package/dist/ui/index.html +898 -116
  93. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -19,7 +19,11 @@ import { buildSessionAttacher, buildRequireSession, } from "./auth/middleware.js
19
19
  import { buildRequirePermissionFromEngine, hasPermission, listGrantedPermissions, DEFAULT_POLICY, } from "./auth/rbac.js";
20
20
  import { resolveOidcConfig, buildOidcRuntime } from "./auth/oidc/runtime.js";
21
21
  import { registerOidcRoutes } from "./auth/oidc/endpoints.js";
22
- import { ScimStore } from "./scim/store.js";
22
+ import { RevocationStore } from "./auth/revocation.js";
23
+ import { AccountLockout, lockoutConfigFromEnv, lockoutDisabledFromEnv, } from "./auth/lockout.js";
24
+ import { resolveSessionStore } from "./transport/sessionStore.js";
25
+ import { generateNonce, enforcedCsp, reportOnlyCsp, reportingEndpointsHeader, reportToHeader, summariseViolation, cspStrictReportFromEnv, CSP_NONCE_PLACEHOLDER, } from "./security/csp.js";
26
+ import { createScimStore } from "./scim/store.js";
23
27
  import { registerScimRoutes } from "./scim/routes.js";
24
28
  import { BuiltinPolicyEngine } from "./auth/policy/engine.js";
25
29
  import { loadPolicyFromFile, writePolicyFile, PolicyLoadError, VALID_RESOURCES, VALID_ACTIONS } from "./auth/policy/loader.js";
@@ -40,10 +44,11 @@ import { getPluginLoader } from "./connectors/loader.js";
40
44
  import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
41
45
  import { isValidConnectorName, installTarball } from "./connectors/install.js";
42
46
  import { PluginVerificationError } from "./connectors/verify.js";
43
- import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
47
+ import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions, auditDlqDepth } from "./metrics/self.js";
44
48
  import { initOtel } from "./observability/otel.js";
45
49
  import { WebSocketServerTransport } from "./transport/websocket.js";
46
50
  import { HookRegistry } from "./sdk/hooks.js";
51
+ import { wrapToolHandler, wrapResourceHandler, wrapPromptHandler } from "./sdk/hook-wrappers.js";
47
52
  import { UpstreamClient } from "./federation/upstream.js";
48
53
  import { FederationRegistry, parseFederationEnv } from "./federation/registry.js";
49
54
  import { buildCsrfIssuer, buildCsrfEnforcer, csrfBypassFromEnv } from "./auth/csrf.js";
@@ -56,6 +61,7 @@ import { queryLogsHandler } from "./tools/query-logs.js";
56
61
  import { queryTracesHandler } from "./tools/query-traces.js";
57
62
  import { getAnomalyHistoryHandler } from "./tools/get-anomaly-history.js";
58
63
  import { generatePostmortemHandler } from "./tools/generate-postmortem.js";
64
+ import { PostmortemStore } from "./postmortem/store.js";
59
65
  import { AnomalyHistory, fromEnv as anomalyHistoryFromEnv } from "./analysis/history.js";
60
66
  import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-service-health.js";
61
67
  import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
@@ -295,11 +301,20 @@ async function main() {
295
301
  return result;
296
302
  }
297
303
  }
304
+ /**
305
+ * Returns the McpServer for the given context. The companion
306
+ * `toolHandlers` map carries every tool registered for this ctx
307
+ * (post-hook-wrapping) so the in-product Playground UI (Q13) can
308
+ * invoke a tool without going through the full Streamable HTTP
309
+ * transport stack. The map is keyed by tool name; values run the
310
+ * same wrapped handler the McpServer would dispatch over MCP.
311
+ */
298
312
  function createMcpServer(ctx) {
299
313
  const mcpServer = new McpServer({
300
314
  name: "observability-mcp",
301
315
  version: SERVER_VERSION,
302
316
  });
317
+ const toolHandlers = new Map();
303
318
  // --- Register tools with Zod schemas ---
304
319
  // Product-aware registration: when the active credential is bound
305
320
  // to a Product (OMCP_KEY_PRODUCTS), `ctx.allowedTools` carries that
@@ -319,34 +334,39 @@ async function main() {
319
334
  return undefined;
320
335
  if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
321
336
  const originalHandler = rest[rest.length - 1];
322
- const wrappedHandler = async (args, extra) => {
323
- const hookCtxBase = {
324
- principal: ctx.principalId,
325
- tenant: ctx.tenant || "default",
326
- target: name,
327
- };
328
- const pre = await hookRegistry.fire("tool_pre_invoke", { ...hookCtxBase, kind: "tool_pre_invoke" }, { args });
329
- if (!pre.allow) {
330
- return {
331
- content: [{ type: "text", text: pre.reason ?? "denied by plugin hook" }],
332
- isError: true,
333
- };
334
- }
335
- const effectiveArgs = pre.payload?.args ?? args;
336
- const result = await originalHandler(effectiveArgs, extra);
337
- const post = await hookRegistry.fire("tool_post_invoke", { ...hookCtxBase, kind: "tool_post_invoke" }, { args: effectiveArgs, result });
338
- if (!post.allow) {
339
- return {
340
- content: [{ type: "text", text: post.reason ?? "denied by plugin hook" }],
341
- isError: true,
342
- };
343
- }
344
- return post.payload?.result ?? result;
345
- };
337
+ const wrappedHandler = wrapToolHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
346
338
  rest[rest.length - 1] = wrappedHandler;
339
+ // Stash for the Playground endpoint — keyed by tool name. The
340
+ // wrapped handler honours pre/post hooks + the same RBAC the
341
+ // McpServer dispatch path runs. Per-ctx Map so a different
342
+ // user's allowedTools never leak.
343
+ toolHandlers.set(name, wrappedHandler);
347
344
  }
348
345
  return mcpServer.tool(name, ...rest);
349
346
  });
347
+ // Q12: resource + prompt registrations get the same hook-fan-out
348
+ // treatment so a plugin's resource_pre_fetch / resource_post_fetch /
349
+ // prompt_pre_fetch / prompt_post_fetch handlers actually fire when
350
+ // a future resource/prompt registration lands. The wrappers stay
351
+ // thin pass-throughs when no hooks are registered (the OSS default).
352
+ // Wrappers are tested in mcp-server/src/sdk/hook-wrappers.test.ts.
353
+ const registerResource = ((name, ...rest) => {
354
+ if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
355
+ const originalHandler = rest[rest.length - 1];
356
+ rest[rest.length - 1] = wrapResourceHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
357
+ }
358
+ return mcpServer.resource(name, ...rest);
359
+ });
360
+ const registerPrompt = ((name, ...rest) => {
361
+ if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
362
+ const originalHandler = rest[rest.length - 1];
363
+ rest[rest.length - 1] = wrapPromptHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
364
+ }
365
+ return mcpServer.prompt(name, ...rest);
366
+ });
367
+ // Suppress unused-warn — kept for the moment registrations land.
368
+ void registerResource;
369
+ void registerPrompt;
350
370
  registerTool("list_sources", [
351
371
  "List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
352
372
  "When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
@@ -547,7 +567,9 @@ async function main() {
547
567
  .describe("Optional. Detection threshold: 'low' flags only strong deviations (>3σ), 'medium' is balanced (>2σ), 'high' is most sensitive and noisier (>1.5σ). Default: 'medium'."),
548
568
  }, async (args) => {
549
569
  await enforceEntitledAccess(ctx, { tool: "detect_anomalies", source: args?.source, service: args?.service });
550
- return withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx));
570
+ // P1: pass the anomaly-history sink so detected scores flow
571
+ // into the TSDB and `get_anomaly_history` returns real data.
572
+ return withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx, anomalyHistory));
551
573
  });
552
574
  registerTool("get_topology", [
553
575
  "Return the infrastructure topology graph (Resources and Edges) from every topology-capable connector.",
@@ -599,16 +621,33 @@ async function main() {
599
621
  // Product-allow-list gate, so federated tools obey the same policy
600
622
  // surface as native ones.
601
623
  for (const info of federationRegistry.getNamespacedTools()) {
602
- // Upstream's inputSchema is forwarded verbatim. The SDK's
603
- // tool() overload signatures don't carry an obvious type for a
604
- // dynamic-shape schema, so we cast to `any` at the boundary and
605
- // let the upstream contract speak for the validation.
606
- registerTool(info.namespacedName, info.description || `Federated from upstream ${info.sourceName}.`, info.inputSchema ?? {}, async (args) => {
624
+ // The MCP SDK's tool() signature wants a ZodRawShape (a map of
625
+ // field-name Zod type), not a raw JSON Schema. Federated
626
+ // upstreams expose JSON Schema (the wire-format MCP uses on
627
+ // tools/list); we transcode to a permissive Zod shape so the
628
+ // SDK accepts the registration. Per-field types are `z.unknown()`
629
+ // because the upstream will validate the call args anyway; the
630
+ // local Zod check is only a "this is the field name set" gate.
631
+ // P7: this transcoding fixes the registration crash that broke
632
+ // every federation deploy before the E2E test caught it.
633
+ const upstreamProps = info.inputSchema?.properties ?? {};
634
+ // Every field is z.unknown().optional() — the SDK only uses this
635
+ // shape to know the field-name set; the upstream re-validates
636
+ // against its full JSON Schema (incl. its own `required` list)
637
+ // when the call arrives. Marking all fields optional here keeps
638
+ // calls with the upstream-defaults flowing through; without it
639
+ // the SDK rejects any call that omits a field upstream considers
640
+ // required even if the upstream would accept the omission.
641
+ const localShape = {};
642
+ for (const k of Object.keys(upstreamProps)) {
643
+ localShape[k] = z.unknown().optional();
644
+ }
645
+ registerTool(info.namespacedName, info.description || `Federated from upstream ${info.sourceName}.`, localShape, async (args) => {
607
646
  await enforceEntitledAccess(ctx, { tool: info.namespacedName });
608
647
  return withToolMetrics(info.namespacedName, () => federationRegistry.callNamespacedTool(info.namespacedName, args));
609
648
  });
610
649
  }
611
- return mcpServer;
650
+ return { mcpServer, toolHandlers };
612
651
  }
613
652
  // --- Management-plane auth (basic mode) -----------------------------------
614
653
  // Off by default. Enable with `OMCP_AUTH=basic` + `OMCP_USERS_FILE` and
@@ -688,7 +727,19 @@ async function main() {
688
727
  else if (requestedAuthMode !== "anonymous") {
689
728
  authMisconfig(`unknown OMCP_AUTH=${requestedAuthMode}`);
690
729
  }
691
- const authRuntime = { mode: authMode, session: sessionCfg, secretEphemeral, oidc: oidcRuntime };
730
+ // Session revocation blocklist (Q17). Only meaningful when sessions
731
+ // exist (basic / oidc); anonymous mode leaves it undefined so the
732
+ // middleware check is a pure no-op. OMCP_AUTH_REVOCATION_FILE persists
733
+ // the blocklist across restarts and shares it across replicas when it
734
+ // points at shared storage; unset = in-memory only.
735
+ let revocationStore;
736
+ if (authMode !== "anonymous") {
737
+ revocationStore = await RevocationStore.create({
738
+ path: process.env.OMCP_AUTH_REVOCATION_FILE?.trim() || undefined,
739
+ });
740
+ console.log(`[auth] session revocation blocklist active — backend=${revocationStore.persistent ? `file (${revocationStore.filePath})` : "memory"}, ${revocationStore.size} existing entr${revocationStore.size === 1 ? "y" : "ies"}`);
741
+ }
742
+ const authRuntime = { mode: authMode, session: sessionCfg, secretEphemeral, oidc: oidcRuntime, revocation: revocationStore };
692
743
  // --- HTTP server ---
693
744
  const app = express();
694
745
  // Trust-proxy: when set, Express will read req.ip / req.secure from
@@ -717,13 +768,43 @@ async function main() {
717
768
  app.set("trust proxy", trustProxy);
718
769
  }
719
770
  }
720
- app.use(express.json({ limit: "1mb" }));
771
+ // Parse application/json AND any *+json media type. SCIM clients
772
+ // (Entra, Okta) send `application/scim+json` per RFC 7644 §3.1 —
773
+ // without the wildcard the body silently arrives empty and every
774
+ // SCIM POST/PATCH 400s. The wildcard also future-proofs other
775
+ // structured-suffix JSON content types.
776
+ // application/csp-report is the legacy media type browsers use for CSP
777
+ // violation reports (the modern Reporting API uses application/reports+json,
778
+ // already covered by the wildcard). Without it the report body arrives empty.
779
+ app.use(express.json({ limit: "1mb", type: ["application/json", "application/*+json", "application/csp-report"] }));
780
+ // Q20 — resolve the opt-in strict Report-Only CSP toggle once at boot.
781
+ // Default off: with ~200 inline handlers the report-only policy would
782
+ // emit a [Report Only] console message per handler on every page load.
783
+ const cspStrictReport = cspStrictReportFromEnv();
784
+ if (cspStrictReport) {
785
+ console.log("[csp] strict report-only policy ON (OMCP_CSP_STRICT_REPORT) — inline-handler violations will be reported to /api/csp-violations");
786
+ }
721
787
  // Security headers
722
788
  app.use((req, res, next) => {
723
789
  res.setHeader("X-Content-Type-Options", "nosniff");
724
790
  res.setHeader("X-Frame-Options", "DENY");
725
791
  res.setHeader("X-XSS-Protection", "1; mode=block");
726
792
  res.setHeader("Referrer-Policy", "strict-origin-when-cross-origin");
793
+ // Q20 — Content-Security-Policy. A per-request nonce is minted and
794
+ // stashed on res.locals so the UI handler can stamp it into the two
795
+ // inline <script> blocks. The enforced policy keeps the UI working
796
+ // (script-src 'unsafe-inline' for the ~200 inline handlers) and is
797
+ // always on; the strict report-only policy is opt-in (it surfaces the
798
+ // inline-handler debt but is console-noisy). Both report to
799
+ // /api/csp-violations.
800
+ const nonce = generateNonce();
801
+ res.locals.cspNonce = nonce;
802
+ res.setHeader("Content-Security-Policy", enforcedCsp());
803
+ if (cspStrictReport) {
804
+ res.setHeader("Content-Security-Policy-Report-Only", reportOnlyCsp(nonce));
805
+ }
806
+ res.setHeader("Reporting-Endpoints", reportingEndpointsHeader());
807
+ res.setHeader("Report-To", reportToHeader());
727
808
  // Dynamic API responses must never be served from the browser/proxy
728
809
  // cache: after a mutation (e.g. installing a connector) the UI
729
810
  // re-fetches these GETs immediately, and a heuristically-cached stale
@@ -774,6 +855,11 @@ async function main() {
774
855
  const csrfCfg = {
775
856
  bypassBearer: csrfBypassFromEnv(),
776
857
  secureCookie: (r) => r.secure || r.headers["x-forwarded-proto"] === "https",
858
+ // CSP violation reports are unauthenticated browser POSTs that by
859
+ // construction carry no cookie + no custom header — exempt them from
860
+ // CSRF. The endpoint only records a sanitised summary, so accepting it
861
+ // cross-site is harmless.
862
+ skip: (r) => r.method === "POST" && (r.path === "/api/csp-violations" || r.originalUrl.split("?")[0] === "/api/csp-violations"),
777
863
  };
778
864
  app.use(buildCsrfIssuer(csrfCfg));
779
865
  app.use("/api", buildCsrfEnforcer(csrfCfg));
@@ -904,6 +990,36 @@ async function main() {
904
990
  .catch((err) => console.warn("AuditLog flushSinks failed:", err));
905
991
  });
906
992
  const audit = (resource, action) => buildAuditMiddleware({ audit: mgmtAudit, resource, action });
993
+ // Q20 — CSP violation report sink. Unauthenticated browser POST (exempt
994
+ // from CSRF via csrfCfg.skip), tightly rate-limited so a misbehaving or
995
+ // hostile client can't flood the audit log, and only a sanitised summary
996
+ // (directive / blocked-uri / document-uri) is recorded. Always 204 so the
997
+ // browser never retries. The report-only strict policy is what drives most
998
+ // of these today (the inline-handler debt) — they roll into mgmtAudit so an
999
+ // operator can watch the migration surface shrink.
1000
+ const cspReportRateLimit = rateLimit({
1001
+ windowMs: 60_000,
1002
+ max: 60,
1003
+ standardHeaders: true,
1004
+ legacyHeaders: false,
1005
+ message: { error: "rate limited" },
1006
+ });
1007
+ app.post("/api/csp-violations", cspReportRateLimit, (req, res) => {
1008
+ const summary = summariseViolation(req.body);
1009
+ if (summary) {
1010
+ void mgmtAudit.record({
1011
+ actor: { sub: "browser:csp" },
1012
+ tenant: "default",
1013
+ resource: "settings",
1014
+ action: "read",
1015
+ method: "POST",
1016
+ path: "/api/csp-violations",
1017
+ status: 204,
1018
+ target: `${summary.directive} blocked ${summary.blockedUri}`.slice(0, 256),
1019
+ }).catch(() => { });
1020
+ }
1021
+ res.status(204).end();
1022
+ });
907
1023
  // Plugin lifecycle hook registry — populated by the loader at boot
908
1024
  // (one entry per manifest `hooks[]` entry) and mutable at runtime
909
1025
  // when a connector is installed via /api/connectors/install. Each
@@ -938,11 +1054,11 @@ async function main() {
938
1054
  // (no tools) so the gateway boots regardless of upstream health.
939
1055
  const federationRegistry = new FederationRegistry();
940
1056
  for (const cfg of parseFederationEnv()) {
941
- const client = new UpstreamClient({
942
- name: cfg.name,
943
- url: cfg.url,
944
- bearerToken: cfg.bearerToken,
945
- });
1057
+ const client = new UpstreamClient(cfg.kind === "stdio"
1058
+ ? { transport: "stdio", name: cfg.name, command: cfg.command, args: cfg.args }
1059
+ : cfg.kind === "ws"
1060
+ ? { transport: "ws", name: cfg.name, url: cfg.url }
1061
+ : { name: cfg.name, url: cfg.url, bearerToken: cfg.bearerToken });
946
1062
  federationRegistry.add(client);
947
1063
  client.connect().catch((err) => {
948
1064
  console.warn("federation upstream %s initial connect failed: %s", cfg.name, err instanceof Error ? err.message : String(err));
@@ -1044,11 +1160,51 @@ async function main() {
1044
1160
  // this endpoint when enabled.
1045
1161
  if (process.env.METRICS_ENABLED !== "false") {
1046
1162
  app.get("/metrics", async (_req, res) => {
1163
+ // P9: refresh the audit-webhook DLQ depth before the scrape so
1164
+ // Prometheus sees the current file state rather than whatever
1165
+ // /api/audit/dlq last set. Best-effort; ENOENT or missing-env
1166
+ // resets to 0 (the dlqPath being unset is the normal state).
1167
+ try {
1168
+ const dlqPath = process.env.OMCP_AUDIT_WEBHOOK_DLQ;
1169
+ if (dlqPath) {
1170
+ const fs = await import("node:fs/promises");
1171
+ const raw = await fs.readFile(dlqPath, "utf8").catch(() => "");
1172
+ auditDlqDepth.set(raw.split("\n").filter((l) => l.trim()).length);
1173
+ }
1174
+ else {
1175
+ auditDlqDepth.set(0);
1176
+ }
1177
+ }
1178
+ catch {
1179
+ auditDlqDepth.set(0);
1180
+ }
1047
1181
  res.set("Content-Type", selfRegistry.contentType);
1048
1182
  res.end(await selfRegistry.metrics());
1049
1183
  });
1050
1184
  }
1051
- // Serve Web UI
1185
+ // Serve Web UI. The index page is served dynamically so the per-request
1186
+ // CSP nonce can be stamped into its inline <script> blocks (the rest of
1187
+ // ui/ stays on express.static). Read once at boot; if the file is
1188
+ // missing we fall through to static, which 404s like before.
1189
+ let uiHtmlTemplate = null;
1190
+ try {
1191
+ uiHtmlTemplate = readFileSync(join(__dirname, "ui", "index.html"), "utf8");
1192
+ }
1193
+ catch {
1194
+ uiHtmlTemplate = null;
1195
+ }
1196
+ if (uiHtmlTemplate) {
1197
+ const template = uiHtmlTemplate;
1198
+ const serveIndex = (_req, res) => {
1199
+ const nonce = res.locals.cspNonce ?? "";
1200
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1201
+ // Index is identity/nonce-specific — never let a proxy cache it.
1202
+ res.setHeader("Cache-Control", "no-store");
1203
+ res.send(template.split(CSP_NONCE_PLACEHOLDER).join(nonce));
1204
+ };
1205
+ app.get("/", serveIndex);
1206
+ app.get("/index.html", serveIndex);
1207
+ }
1052
1208
  app.use(express.static(join(__dirname, "ui")));
1053
1209
  // --- API endpoints for Web UI ---
1054
1210
  // List sources with health status — tenant-scoped.
@@ -1108,6 +1264,37 @@ async function main() {
1108
1264
  app.get("/api/tools/registry", (_req, res) => {
1109
1265
  res.json({ tools: REGISTERED_TOOLS });
1110
1266
  });
1267
+ // Q13: in-product Playground endpoint. Lets the operator invoke a
1268
+ // registered tool against the live gateway without spinning up a
1269
+ // separate MCP client. Re-uses the per-session ctx and the same
1270
+ // wrapped handler the McpServer dispatch path would run (so RBAC,
1271
+ // entitlements, rate-limit, audit, hook fan-out all apply
1272
+ // identically).
1273
+ app.post("/api/playground/invoke", async (req, res) => {
1274
+ const ctx = await gateCtx(req, res);
1275
+ if (!ctx)
1276
+ return;
1277
+ const body = (req.body ?? {});
1278
+ const tool = typeof body.tool === "string" ? body.tool : "";
1279
+ if (!tool) {
1280
+ res.status(400).json({ error: "tool (string) is required" });
1281
+ return;
1282
+ }
1283
+ const { toolHandlers } = createMcpServer(ctx);
1284
+ const handler = toolHandlers.get(tool);
1285
+ if (!handler) {
1286
+ res.status(404).json({ error: `tool '${tool}' is not registered (or not allowed for this credential)` });
1287
+ return;
1288
+ }
1289
+ try {
1290
+ const result = await handler(body.args ?? {}, undefined);
1291
+ res.json({ tool, result });
1292
+ }
1293
+ catch (err) {
1294
+ const message = err instanceof Error ? err.message : String(err);
1295
+ res.status(500).json({ error: message, tool });
1296
+ }
1297
+ });
1111
1298
  // Server info — version, loaded plugins, MCP protocol version, build metadata.
1112
1299
  // Used by the Web UI footer and by operators to confirm what's deployed.
1113
1300
  app.get("/api/info", async (_req, res) => {
@@ -1142,6 +1329,16 @@ async function main() {
1142
1329
  redaction: REDACTION_ENABLED,
1143
1330
  trustProxy: !!(process.env.OMCP_TRUST_PROXY && process.env.OMCP_TRUST_PROXY !== "false"),
1144
1331
  toolRatePerMin: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
1332
+ // P1: posture flags so dashboards can alert when a shipped
1333
+ // capability is configured but doing nothing useful.
1334
+ anomalyHistoryActive: anomalyHistory.isEnabled(),
1335
+ tracesCapabilityCount: registry
1336
+ .getAll()
1337
+ .filter((c) => typeof c.queryTraces === "function").length,
1338
+ pluginsVerified: !/^(0|false|no|off)$/i.test(process.env.VERIFY_PLUGINS ?? "true"),
1339
+ scimEnabled: !!process.env.OMCP_SCIM_TOKEN,
1340
+ federationUpstreams: (process.env.OMCP_FEDERATION_UPSTREAMS ?? "")
1341
+ .split(",").map((s) => s.trim()).filter(Boolean).length,
1145
1342
  },
1146
1343
  plugins: loader.list().map((p) => ({
1147
1344
  name: p.name,
@@ -1187,6 +1384,10 @@ async function main() {
1187
1384
  },
1188
1385
  permissions: listGrantedPermissions(sess.roles, policyEngineToMap(policyEngine)),
1189
1386
  exp: sess.exp,
1387
+ // The current session's revocation id. Surfaced so an admin can
1388
+ // copy it into POST /api/auth/revocations to kill a specific
1389
+ // session. Absent for legacy cookies issued before sid existed.
1390
+ sid: sess.sid,
1190
1391
  // When the user signed in via OIDC, surface the IdP issuer
1191
1392
  // URL so the UI can render an appropriate badge or link to
1192
1393
  // an IdP-side profile page. Empty / absent in basic mode.
@@ -1566,6 +1767,46 @@ async function main() {
1566
1767
  scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
1567
1768
  });
1568
1769
  });
1770
+ // --- /api/audit/dlq — webhook-sink dead-letter queue surface (P9) ---
1771
+ // When the audit webhook is configured AND the receiver exhausted
1772
+ // its retry budget, entries land in the DLQ file. This endpoint
1773
+ // surfaces the count + the last N entries so operators can decide
1774
+ // whether to replay manually. Also refreshes the
1775
+ // `obsmcp_audit_webhook_dlq_depth` gauge so the /metrics scrape
1776
+ // alongside it stays accurate.
1777
+ app.get("/api/audit/dlq", need("audit", "read"), async (_req, res) => {
1778
+ const dlqPath = process.env.OMCP_AUDIT_WEBHOOK_DLQ;
1779
+ if (!dlqPath) {
1780
+ auditDlqDepth.set(0);
1781
+ res.json({ enabled: false, path: null, depth: 0, entries: [] });
1782
+ return;
1783
+ }
1784
+ try {
1785
+ const fs = await import("node:fs/promises");
1786
+ const raw = await fs.readFile(dlqPath, "utf8");
1787
+ const lines = raw.split("\n").filter((l) => l.trim());
1788
+ auditDlqDepth.set(lines.length);
1789
+ const tail = lines.slice(-50).map((l) => {
1790
+ try {
1791
+ return JSON.parse(l);
1792
+ }
1793
+ catch {
1794
+ return { _raw: l, _parseError: true };
1795
+ }
1796
+ });
1797
+ res.json({ enabled: true, path: dlqPath, depth: lines.length, entries: tail });
1798
+ }
1799
+ catch (err) {
1800
+ const code = err.code;
1801
+ if (code === "ENOENT") {
1802
+ auditDlqDepth.set(0);
1803
+ res.json({ enabled: true, path: dlqPath, depth: 0, entries: [] });
1804
+ return;
1805
+ }
1806
+ console.warn("[/api/audit/dlq] read failed:", err);
1807
+ res.status(500).json({ error: err?.message || "DLQ read failed" });
1808
+ }
1809
+ });
1569
1810
  // --- /api/usage — per-identity MCP rate-limit snapshot -----------------
1570
1811
  // Read-only view of the IdentityRateLimiter's bucket state. Gated by
1571
1812
  // need("audit","read") — the same role set that already sees the
@@ -1671,6 +1912,19 @@ async function main() {
1671
1912
  catch { /* ignore — first login will pick it up */ }
1672
1913
  }
1673
1914
  }
1915
+ // Q18 — per-username failed-login lockout with progressive backoff.
1916
+ // Complements the per-IP loginRateLimit above: that bounds a noisy
1917
+ // single source, this bounds a slow / distributed grind on one
1918
+ // account. Backed by the shared SessionStore so a Redis deployment
1919
+ // locks consistently across replicas (and self-cleans via TTL).
1920
+ // Basic mode only — OIDC delegates auth (and lockout) to the IdP.
1921
+ let lockout;
1922
+ if (authRuntime.mode === "basic" && !lockoutDisabledFromEnv()) {
1923
+ const lockoutStore = await resolveSessionStore();
1924
+ const lockoutCfg = lockoutConfigFromEnv();
1925
+ lockout = new AccountLockout(lockoutStore, lockoutCfg);
1926
+ console.log(`[auth] account lockout active — ${lockoutCfg.maxFailures} failures / ${lockoutCfg.windowSeconds}s → lock ${lockoutCfg.baseLockSeconds}s (×2 up to ${lockoutCfg.maxLockSeconds}s), backend=${lockoutStore.backend}`);
1927
+ }
1674
1928
  app.post("/api/auth/login", loginRateLimit, async (req, res) => {
1675
1929
  if (authRuntime.mode !== "basic" || !sessionCfg || !usersStore) {
1676
1930
  res.status(503).json({ error: "auth mode does not accept logins" });
@@ -1684,11 +1938,57 @@ async function main() {
1684
1938
  res.status(400).json({ error: "username and password are required" });
1685
1939
  return;
1686
1940
  }
1941
+ // Gate on the lock BEFORE the (expensive) scrypt verify so a locked
1942
+ // account can't be used to burn CPU. A locked account is a 429 with
1943
+ // Retry-After, never a credential oracle — the response is identical
1944
+ // whether or not the username exists.
1945
+ if (lockout) {
1946
+ const status = await lockout.check(username);
1947
+ if (status.locked) {
1948
+ res.setHeader("Retry-After", String(status.retryAfterSeconds ?? 0));
1949
+ res.status(429).json({
1950
+ error: "account temporarily locked due to repeated failed logins",
1951
+ retryAfterSeconds: status.retryAfterSeconds,
1952
+ });
1953
+ void mgmtAudit.record({
1954
+ actor: { sub: username },
1955
+ tenant: "default",
1956
+ resource: "users",
1957
+ action: "write",
1958
+ method: "POST",
1959
+ path: "/api/auth/login",
1960
+ status: 429,
1961
+ }).catch(() => { });
1962
+ return;
1963
+ }
1964
+ }
1687
1965
  const user = authenticate(username, password, usersStore);
1688
1966
  if (!user) {
1967
+ if (lockout) {
1968
+ const after = await lockout.recordFailure(username);
1969
+ if (after.locked) {
1970
+ res.setHeader("Retry-After", String(after.retryAfterSeconds ?? 0));
1971
+ res.status(429).json({
1972
+ error: "account temporarily locked due to repeated failed logins",
1973
+ retryAfterSeconds: after.retryAfterSeconds,
1974
+ });
1975
+ void mgmtAudit.record({
1976
+ actor: { sub: username },
1977
+ tenant: "default",
1978
+ resource: "users",
1979
+ action: "write",
1980
+ method: "POST",
1981
+ path: "/api/auth/login",
1982
+ status: 429,
1983
+ }).catch(() => { });
1984
+ return;
1985
+ }
1986
+ }
1689
1987
  res.status(401).json({ error: "invalid credentials" });
1690
1988
  return;
1691
1989
  }
1990
+ if (lockout)
1991
+ await lockout.recordSuccess(user.username);
1692
1992
  const { cookie } = issueSession({ sub: user.username, name: user.name, roles: user.roles, tenant: user.tenant }, sessionCfg);
1693
1993
  const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
1694
1994
  res.setHeader("Set-Cookie", setCookieHeader(cookie, sessionCfg, { secure }));
@@ -1716,31 +2016,161 @@ async function main() {
1716
2016
  registerOidcRoutes(app, { sessionCfg, oidc: oidcRuntime });
1717
2017
  console.log("[auth] OIDC endpoints registered: /api/auth/oidc/{login,callback,logout}");
1718
2018
  }
1719
- // Phase F21: SCIM 2.0 — opt-in. OMCP_SCIM_TOKEN gates access;
1720
- // OMCP_SCIM_STORE points at the on-disk JSON (mode 0600, atomic).
1721
- // Multi-replica deployments should plug the F8 SessionStore in
1722
- // when F21b lands.
2019
+ // Q17 session revocation blocklist. Admin-gated (same role tier as
2020
+ // user/role management). A revoked-but-unexpired cookie is rejected by
2021
+ // buildSessionAttacher on the next request. Revoke a single session by
2022
+ // `sid` (read it from /api/me or the audit log) or every current
2023
+ // session for a `sub` ("log this user out everywhere"). The blocklist
2024
+ // is the stateful complement to the otherwise-stateless cookie.
2025
+ app.post("/api/auth/revocations", need("users", "delete"), audit("users", "write"), async (req, res) => {
2026
+ if (!revocationStore) {
2027
+ res.status(503).json({ error: "revocation requires an auth mode (basic|oidc)" });
2028
+ return;
2029
+ }
2030
+ const body = (req.body || {});
2031
+ const sid = typeof body.sid === "string" && body.sid.trim() ? body.sid.trim() : undefined;
2032
+ const sub = typeof body.sub === "string" && body.sub.trim() ? body.sub.trim() : undefined;
2033
+ const reason = typeof body.reason === "string" ? body.reason.slice(0, 500) : undefined;
2034
+ if ((sid ? 1 : 0) + (sub ? 1 : 0) !== 1) {
2035
+ res.status(400).json({ error: "exactly one of `sid` or `sub` is required" });
2036
+ return;
2037
+ }
2038
+ const by = req.session?.sub;
2039
+ const entry = sid
2040
+ ? await revocationStore.revokeSession(sid, { reason, by })
2041
+ : await revocationStore.revokeSubject(sub, { reason, by });
2042
+ res.status(201).json({ ok: true, revocation: entry });
2043
+ });
2044
+ app.get("/api/auth/revocations", need("users", "delete"), (_req, res) => {
2045
+ res.json({ revocations: revocationStore ? revocationStore.list() : [] });
2046
+ });
2047
+ // Phase F21 / Q6: SCIM 2.0 — opt-in. OMCP_SCIM_TOKEN gates access.
2048
+ // The store backend is chosen by createScimStore from
2049
+ // OMCP_SCIM_BACKEND (file | redis). file (default) → OMCP_SCIM_STORE
2050
+ // on-disk JSON (mode 0600, atomic). redis → a shared snapshot so
2051
+ // multi-replica deployments stay coherent (Q6); the redis client is
2052
+ // built from OMCP_SCIM_REDIS_URL here, mirroring the session store.
1723
2053
  const scimToken = process.env.OMCP_SCIM_TOKEN?.trim();
1724
2054
  if (scimToken) {
1725
- const scimStorePath = process.env.OMCP_SCIM_STORE?.trim() || "/tmp/scim.json";
1726
- const scimStore = new ScimStore(scimStorePath);
1727
- await scimStore.load();
1728
- registerScimRoutes(app, {
1729
- store: scimStore,
1730
- bearerToken: scimToken,
1731
- audit: (ev) => void mgmtAudit.record({
1732
- actor: { sub: `scim:${ev.actor}` },
1733
- tenant: "default",
1734
- resource: "users",
1735
- action: ev.action.includes("delete") ? "delete" : "write",
1736
- method: "SCIM",
1737
- path: `/scim/v2/${ev.action}`,
1738
- status: ev.status,
1739
- target: ev.target,
1740
- }).catch(() => undefined),
1741
- });
1742
- console.log("[scim] /scim/v2/* registered (store: %s)", scimStorePath);
2055
+ try {
2056
+ const scimBackend = (process.env.OMCP_SCIM_BACKEND?.trim() || "file");
2057
+ let scimRedis;
2058
+ if (scimBackend === "redis") {
2059
+ const redisUrl = process.env.OMCP_SCIM_REDIS_URL?.trim();
2060
+ if (!redisUrl)
2061
+ throw new Error("OMCP_SCIM_BACKEND=redis requires OMCP_SCIM_REDIS_URL");
2062
+ const { createClient } = await import("redis");
2063
+ const client = createClient({ url: redisUrl });
2064
+ client.on("error", (err) => console.warn("[scim] redis client error: %s", err instanceof Error ? err.message : String(err)));
2065
+ await client.connect();
2066
+ scimRedis = client;
2067
+ }
2068
+ const scimStore = await createScimStore({
2069
+ backend: scimBackend,
2070
+ path: process.env.OMCP_SCIM_STORE?.trim() || "/tmp/scim.json",
2071
+ redis: scimRedis,
2072
+ redisKey: process.env.OMCP_SCIM_REDIS_KEY?.trim(),
2073
+ });
2074
+ registerScimRoutes(app, {
2075
+ store: scimStore,
2076
+ bearerToken: scimToken,
2077
+ audit: (ev) => void mgmtAudit.record({
2078
+ actor: { sub: `scim:${ev.actor}` },
2079
+ tenant: "default",
2080
+ resource: "users",
2081
+ action: ev.action.includes("delete") ? "delete" : "write",
2082
+ method: "SCIM",
2083
+ path: `/scim/v2/${ev.action}`,
2084
+ status: ev.status,
2085
+ target: ev.target,
2086
+ }).catch(() => undefined),
2087
+ });
2088
+ console.log("[scim] /scim/v2/* registered (backend: %s)", scimBackend);
2089
+ }
2090
+ catch (err) {
2091
+ console.warn("[scim] enable failed (routes not mounted): %s", err instanceof Error ? err.message : String(err));
2092
+ }
1743
2093
  }
2094
+ // Phase P6: Postmortems persistence. /api/postmortems lets the
2095
+ // UI list / open / regenerate / delete previously-generated
2096
+ // reports. Opt-in via OMCP_POSTMORTEMS_FILE (default
2097
+ // /tmp/postmortems.jsonl). When the env is left at its default
2098
+ // the demo still works — operators who want survival across
2099
+ // restarts mount a PVC at the same path and set the env to it.
2100
+ const postmortemStore = new PostmortemStore(process.env.OMCP_POSTMORTEMS_FILE?.trim() || "/tmp/postmortems.jsonl");
2101
+ await postmortemStore.load();
2102
+ // GET /api/postmortems — list (newest-first), tenant-scoped.
2103
+ app.get("/api/postmortems", need("services", "read"), async (req, res) => {
2104
+ const sess = req.session;
2105
+ const tenant = sess?.tenant || "default";
2106
+ const entries = postmortemStore.list(tenant);
2107
+ res.json({
2108
+ total: entries.length,
2109
+ entries: entries.map((e) => ({
2110
+ id: e.id,
2111
+ ts: e.ts,
2112
+ createdBy: e.createdBy,
2113
+ service: e.report.service,
2114
+ window: e.report.window,
2115
+ synopsis: e.report.synopsis,
2116
+ })),
2117
+ });
2118
+ });
2119
+ // GET /api/postmortems/:id — full report (markdown + sections).
2120
+ app.get("/api/postmortems/:id", need("services", "read"), async (req, res) => {
2121
+ const sess = req.session;
2122
+ const tenant = sess?.tenant || "default";
2123
+ const id = String(req.params.id ?? "");
2124
+ const entry = postmortemStore.get(id, tenant);
2125
+ if (!entry) {
2126
+ res.status(404).json({ error: `Postmortem ${id} not found` });
2127
+ return;
2128
+ }
2129
+ res.json(entry);
2130
+ });
2131
+ // POST /api/postmortems — regenerate via the tool handler +
2132
+ // persist. Body: { service, duration?, format? }. Returns the
2133
+ // stored entry with its id.
2134
+ app.post("/api/postmortems", need("services", "write"), async (req, res) => {
2135
+ const body = (req.body ?? {});
2136
+ if (!body.service || typeof body.service !== "string") {
2137
+ res.status(400).json({ error: "service is required" });
2138
+ return;
2139
+ }
2140
+ const sess = req.session;
2141
+ const tenant = sess?.tenant || "default";
2142
+ const createdBy = sess?.sub || sess?.name || "unknown";
2143
+ try {
2144
+ // Force JSON so we get the structured report shape back from
2145
+ // the tool, not just the markdown body. We persist the full
2146
+ // structured report; the markdown lives inside `report.markdown`.
2147
+ const ctx = { ...defaultContext(), tenant, principalId: createdBy };
2148
+ const result = await generatePostmortemHandler(registry, { service: body.service, duration: body.duration, format: "json" }, ctx);
2149
+ const text = result?.content?.[0]?.text;
2150
+ if (!text) {
2151
+ res.status(500).json({ error: "generate_postmortem returned no content" });
2152
+ return;
2153
+ }
2154
+ const report = JSON.parse(text);
2155
+ const stored = await postmortemStore.append({ report, createdBy, tenant });
2156
+ res.status(201).json(stored);
2157
+ }
2158
+ catch (e) {
2159
+ console.warn(`[postmortems] regen failed:`, e);
2160
+ res.status(500).json({ error: e?.message || "internal error" });
2161
+ }
2162
+ });
2163
+ // DELETE /api/postmortems/:id — admin-gated.
2164
+ app.delete("/api/postmortems/:id", need("services", "delete"), async (req, res) => {
2165
+ const sess = req.session;
2166
+ const tenant = sess?.tenant || "default";
2167
+ const ok = await postmortemStore.delete(String(req.params.id ?? ""), tenant);
2168
+ if (!ok) {
2169
+ res.status(404).json({ error: `Postmortem ${req.params.id} not found` });
2170
+ return;
2171
+ }
2172
+ res.status(204).end();
2173
+ });
1744
2174
  // Connectors currently loaded into this server (builtin + filesystem
1745
2175
  // plugins), with manifest metadata — drives the UI "Connectors" page.
1746
2176
  app.get("/api/connectors", (_req, res) => {
@@ -2377,6 +2807,31 @@ async function main() {
2377
2807
  tools: filteredTools,
2378
2808
  });
2379
2809
  });
2810
+ // Q21 — per-service anomaly-score sparklines for the Health tab. Reads
2811
+ // the in-process ring of the anomaly-history sink (last hour), tenant-
2812
+ // scoped. MUST be registered before "/api/health/:service" so the
2813
+ // literal path isn't captured as a service name. `enabled` is true once
2814
+ // any score exists; the UI falls back to its client-side trend otherwise.
2815
+ app.get("/api/health/anomaly-sparklines", (req, res) => {
2816
+ const sess = req.session;
2817
+ const callerTenant = sess?.tenant || "default";
2818
+ // Anonymous (single-tenant) mode: no tenant filter, see everything.
2819
+ const tenant = sess ? callerTenant : undefined;
2820
+ const records = anomalyHistory.recent({ tenant });
2821
+ const series = {};
2822
+ for (const r of records) {
2823
+ const t = Date.parse(r.ts);
2824
+ if (!Number.isFinite(t))
2825
+ continue;
2826
+ (series[r.service] ??= []).push({ t, score: r.score });
2827
+ }
2828
+ res.json({
2829
+ enabled: records.length > 0,
2830
+ remoteWrite: anomalyHistory.isEnabled(),
2831
+ windowMs: anomalyHistory.windowMs,
2832
+ series,
2833
+ });
2834
+ });
2380
2835
  // Health endpoint for UI dashboard
2381
2836
  app.get("/api/health/:service", async (req, res) => {
2382
2837
  try {
@@ -2550,7 +3005,7 @@ async function main() {
2550
3005
  });
2551
3006
  // Stdio transport: one server over stdin/stdout, no HTTP listener.
2552
3007
  if (STDIO) {
2553
- const server = createMcpServer(defaultContext());
3008
+ const { mcpServer: server } = createMcpServer(defaultContext());
2554
3009
  await server.connect(new StdioServerTransport());
2555
3010
  console.error(`observability-mcp running on stdio transport · connectors: ${registry
2556
3011
  .getAll()
@@ -2723,7 +3178,7 @@ async function main() {
2723
3178
  }
2724
3179
  mcpActiveSessions.set(transports.size);
2725
3180
  };
2726
- const sessionMcpServer = createMcpServer(ctx);
3181
+ const { mcpServer: sessionMcpServer } = createMcpServer(ctx);
2727
3182
  await sessionMcpServer.connect(transport);
2728
3183
  }
2729
3184
  await transport.handleRequest(req, res, req.body);
@@ -2831,7 +3286,7 @@ async function main() {
2831
3286
  }
2832
3287
  mcpActiveSessions.set(transports.size);
2833
3288
  };
2834
- const sessionMcpServer = createMcpServer(ctx);
3289
+ const { mcpServer: sessionMcpServer } = createMcpServer(ctx);
2835
3290
  await sessionMcpServer.connect(transport);
2836
3291
  }
2837
3292
  await transport.handleRequest(req, res, req.body);
@@ -2981,7 +3436,7 @@ async function main() {
2981
3436
  wss.handleUpgrade(req, socket, head, async (ws) => {
2982
3437
  try {
2983
3438
  const transport = new WebSocketServerTransport(ws);
2984
- const sessionMcpServer = createMcpServer(auth.ctx);
3439
+ const { mcpServer: sessionMcpServer } = createMcpServer(auth.ctx);
2985
3440
  await sessionMcpServer.connect(transport);
2986
3441
  }
2987
3442
  catch (err) {