npm - @thotischner/observability-mcp - Versions diffs - 1.7.0 → 1.8.1 - Mend

@thotischner/observability-mcp 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/config/products.yaml.example +48 -0
package/dist/audit/log.d.ts +99 -0
package/dist/audit/log.js +180 -0
package/dist/audit/log.test.d.ts +1 -0
package/dist/audit/log.test.js +147 -0
package/dist/audit/middleware.d.ts +20 -0
package/dist/audit/middleware.js +50 -0
package/dist/auth/credentials.d.ts +18 -0
package/dist/auth/credentials.js +26 -1
package/dist/auth/credentials.test.js +26 -1
package/dist/auth/local-users.d.ts +62 -0
package/dist/auth/local-users.js +143 -0
package/dist/auth/local-users.test.d.ts +1 -0
package/dist/auth/local-users.test.js +80 -0
package/dist/auth/middleware.d.ts +48 -0
package/dist/auth/middleware.js +65 -0
package/dist/auth/middleware.test.d.ts +1 -0
package/dist/auth/middleware.test.js +90 -0
package/dist/auth/oidc/client.d.ts +73 -0
package/dist/auth/oidc/client.js +104 -0
package/dist/auth/oidc/client.test.d.ts +1 -0
package/dist/auth/oidc/client.test.js +121 -0
package/dist/auth/oidc/discovery.d.ts +38 -0
package/dist/auth/oidc/discovery.js +48 -0
package/dist/auth/oidc/discovery.test.d.ts +1 -0
package/dist/auth/oidc/discovery.test.js +68 -0
package/dist/auth/oidc/endpoints.d.ts +20 -0
package/dist/auth/oidc/endpoints.js +124 -0
package/dist/auth/oidc/endpoints.test.d.ts +7 -0
package/dist/auth/oidc/endpoints.test.js +304 -0
package/dist/auth/oidc/flow-cookie.d.ts +57 -0
package/dist/auth/oidc/flow-cookie.js +142 -0
package/dist/auth/oidc/flow-cookie.test.d.ts +1 -0
package/dist/auth/oidc/flow-cookie.test.js +0 -0
package/dist/auth/oidc/index.d.ts +7 -0
package/dist/auth/oidc/index.js +6 -0
package/dist/auth/oidc/jwks.d.ts +36 -0
package/dist/auth/oidc/jwks.js +69 -0
package/dist/auth/oidc/jwks.test.d.ts +1 -0
package/dist/auth/oidc/jwks.test.js +65 -0
package/dist/auth/oidc/jwt.d.ts +62 -0
package/dist/auth/oidc/jwt.js +113 -0
package/dist/auth/oidc/jwt.test.d.ts +1 -0
package/dist/auth/oidc/jwt.test.js +141 -0
package/dist/auth/oidc/pkce.d.ts +19 -0
package/dist/auth/oidc/pkce.js +43 -0
package/dist/auth/oidc/pkce.test.d.ts +1 -0
package/dist/auth/oidc/pkce.test.js +55 -0
package/dist/auth/oidc/runtime.d.ts +63 -0
package/dist/auth/oidc/runtime.js +129 -0
package/dist/auth/oidc/runtime.test.d.ts +1 -0
package/dist/auth/oidc/runtime.test.js +180 -0
package/dist/auth/policy/engine.d.ts +48 -0
package/dist/auth/policy/engine.js +73 -0
package/dist/auth/policy/engine.test.d.ts +1 -0
package/dist/auth/policy/engine.test.js +98 -0
package/dist/auth/policy/loader.d.ts +35 -0
package/dist/auth/policy/loader.js +100 -0
package/dist/auth/policy/opa.d.ts +69 -0
package/dist/auth/policy/opa.js +162 -0
package/dist/auth/policy/opa.test.d.ts +1 -0
package/dist/auth/policy/opa.test.js +158 -0
package/dist/auth/rbac.d.ts +40 -0
package/dist/auth/rbac.js +120 -0
package/dist/auth/rbac.test.d.ts +1 -0
package/dist/auth/rbac.test.js +121 -0
package/dist/auth/session.d.ts +66 -0
package/dist/auth/session.js +146 -0
package/dist/auth/session.test.d.ts +1 -0
package/dist/auth/session.test.js +90 -0
package/dist/catalog/loader.d.ts +67 -0
package/dist/catalog/loader.js +122 -0
package/dist/catalog/loader.test.d.ts +1 -0
package/dist/catalog/loader.test.js +108 -0
package/dist/connectors/kubernetes.d.ts +1 -0
package/dist/connectors/kubernetes.js +12 -2
package/dist/connectors/topology-vocabulary.d.ts +41 -0
package/dist/connectors/topology-vocabulary.js +120 -0
package/dist/connectors/topology-vocabulary.test.d.ts +1 -0
package/dist/connectors/topology-vocabulary.test.js +63 -0
package/dist/context.d.ts +13 -1
package/dist/context.js +5 -1
package/dist/index.js +1012 -29
package/dist/net/egress-policy.js +2 -0
package/dist/openapi.js +440 -0
package/dist/openapi.test.d.ts +1 -0
package/dist/openapi.test.js +64 -0
package/dist/policy/redact.d.ts +44 -0
package/dist/policy/redact.js +144 -0
package/dist/policy/redact.test.d.ts +1 -0
package/dist/policy/redact.test.js +172 -0
package/dist/products/loader.d.ts +84 -0
package/dist/products/loader.js +216 -0
package/dist/products/loader.test.d.ts +1 -0
package/dist/products/loader.test.js +168 -0
package/dist/quota/limiter.d.ts +72 -0
package/dist/quota/limiter.js +105 -0
package/dist/quota/limiter.test.d.ts +1 -0
package/dist/quota/limiter.test.js +119 -0
package/dist/quota/token-budget.d.ts +119 -0
package/dist/quota/token-budget.js +297 -0
package/dist/quota/token-budget.test.d.ts +1 -0
package/dist/quota/token-budget.test.js +215 -0
package/dist/tenancy/context.d.ts +45 -0
package/dist/tenancy/context.js +97 -0
package/dist/tenancy/context.test.d.ts +1 -0
package/dist/tenancy/context.test.js +72 -0
package/dist/tenancy/migration.test.d.ts +7 -0
package/dist/tenancy/migration.test.js +75 -0
package/dist/ui/index.html +1454 -88
package/package.json +20 -3

package/dist/index.js CHANGED Viewed

@@ -12,6 +12,22 @@ import { isTopologyProvider } from "./connectors/interface.js";
 import { defaultContext, principalContext } from "./context.js";
 import { enforceEntitledAccess, enterpriseGateStatus, enterpriseGateInfo, enterprisePolicyView, enterpriseCatalogView, enterpriseAuditTail, authorizeAdmin, updateRbacPolicy, updateCatalog, } from "./enterprise-gate.js";
 import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
+import { issueSession, setCookieHeader, clearCookieHeader, generateSecret, } from "./auth/session.js";
+import { readUsersFile, authenticate, } from "./auth/local-users.js";
+import { buildSessionAttacher, buildRequireSession, } from "./auth/middleware.js";
+import { buildRequirePermission, hasPermission, listGrantedPermissions, DEFAULT_POLICY, } from "./auth/rbac.js";
+import { resolveOidcConfig, buildOidcRuntime } from "./auth/oidc/runtime.js";
+import { registerOidcRoutes } from "./auth/oidc/endpoints.js";
+import { BuiltinPolicyEngine } from "./auth/policy/engine.js";
+import { loadPolicyFromFile, PolicyLoadError, VALID_RESOURCES, VALID_ACTIONS } from "./auth/policy/loader.js";
+import { OpaPolicyEngine } from "./auth/policy/opa.js";
+import { AuditLog } from "./audit/log.js";
+import { buildAuditMiddleware } from "./audit/middleware.js";
+import { readCatalogFile, CatalogStore } from "./catalog/loader.js";
+import { readProductsFile, ProductsStore, validateProduct, writeProductsFile, ProductsLoadError } from "./products/loader.js";
+import { redactValue } from "./policy/redact.js";
+import { IdentityRateLimiter, resolveToolRatePerMin } from "./quota/limiter.js";
+import { TokenBudget, estimateTokensFor, resolveDailyTokenLimit } from "./quota/token-budget.js";
 import { getPluginLoader } from "./connectors/loader.js";
 import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
 import { isValidConnectorName, installTarball } from "./connectors/install.js";
@@ -41,6 +57,55 @@ const SERVER_VERSION = (() => {
         return "unknown";
     }
 })();
+/** Defensive read of a single query-string value. Express's
+ * `req.query[k]` is typed as `string | ParsedQs | (string | ParsedQs)[]`
+ * — a caller passing `?actor=a&actor=b` (or `?actor[]=a`) yields an
+ * array (or object) rather than a string, which then propagates as
+ * `[a,b]` into downstream filters that expect a string. This helper
+ * returns the first string-shaped value or undefined; arrays / nested
+ * objects collapse safely instead of leaking through. */
+function qstr(v) {
+    if (typeof v === "string")
+        return v;
+    if (Array.isArray(v) && typeof v[0] === "string")
+        return v[0];
+    return undefined;
+}
+/** Forensic breadcrumb for redaction-bypass tool invocations.
+ *
+ * Deliberately omits the principal identifier: the credential name
+ * lives in OMCP_API_KEYS, and threading any derivative of it into the
+ * log channel re-introduces a leak surface that static analysers
+ * (rightly) flag. SIEM cross-correlation goes via the correlationId
+ * UUID — slice 2 will wire the management-plane audit chain to carry
+ * the same correlationId alongside the (chain-protected) principal,
+ * so a downstream investigator can join the two channels there.
+ */
+function emitBypassEvent(event, ctx, args) {
+    console.error(JSON.stringify({
+        event,
+        ts: new Date().toISOString(),
+        auth: ctx.auth,
+        tool: "query_logs",
+        service: args?.service ?? null,
+        correlationId: ctx.correlationId,
+        ...(event === "redaction_bypass_denied" ? { reason: "credential_not_in_OMCP_KEY_BYPASS_REDACTION" } : {}),
+    }));
+}
+/** Bridge from the new PolicyEngine to the existing
+ *  hasPermission/buildRequirePermission signatures (which still take
+ *  a plain {role: Permission[]} map). Built-in engine exposes the
+ *  raw map directly; engines that don't (slice 4's OPA) will fall
+ *  back to a synthesized one via .list(). */
+function policyEngineToMap(engine) {
+    if (engine instanceof BuiltinPolicyEngine)
+        return engine.raw();
+    const out = {};
+    for (const role of engine.roles()) {
+        out[role] = engine.list([role]);
+    }
+    return out;
+}
 function applyConfigToRuntime(config, registry) {
     setHealthThresholds(config.healthThresholds);
 }
@@ -113,6 +178,131 @@ async function main() {
     // so we cannot share a single McpServer across HTTP sessions. Each new
     // session needs its own server. The factory captures the live registry
     // by reference so tool handlers always see the current configuration.
+    // Catalog enrichers for the MCP tool surface: wrap the standard
+    // tool-result shape ({content:[{text: json}]}) and inject .catalog
+    // metadata where it matches a known service name. No-op when the
+    // catalog is empty (the demo case) or when the payload doesn't
+    // parse as JSON. The HTTP `/api/services` + `/api/health` handlers
+    // call the loader.ts CatalogStore directly; this path mirrors that
+    // behaviour for MCP clients (Claude Desktop, the agent, ...).
+    // McpToolResult is whatever the wrapped handler returned — keep it
+    // untyped so we don't fight the SDK's narrow `content: [{type:"text",...}]`
+    // overload. We pass the value back unchanged when it doesn't parse,
+    // and otherwise mutate the parsed JSON before re-stringifying into a
+    // fresh wrapper that mirrors the handler's own shape.
+    function enrichToolServicesText(result, ctx) {
+        try {
+            const parsed = JSON.parse(result.content[0]?.text ?? "{}");
+            if (parsed && Array.isArray(parsed.services)) {
+                for (const s of parsed.services) {
+                    // Scope enrichment to the caller's tenant so we don't
+                    // leak owner / on-call / SLO bytes for other tenants'
+                    // services that happen to share a name in the catalog.
+                    const entry = typeof s?.name === "string" ? catalog.get(s.name, ctx.tenant) : undefined;
+                    if (entry)
+                        s.catalog = entry;
+                }
+            }
+            const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
+            return clone;
+        }
+        catch {
+            return result;
+        }
+    }
+    // Apply PII / secret redaction to a tool result's text payload. No-op
+    // when OMCP_REDACTION=off. Adds a top-level `_redacted` field with
+    // the per-category counts so the agent (and the human) sees a hint
+    // like `{ email: 4, ipv4: 2, totalMatches: 6 }` instead of silently
+    // losing data.
+    /** Charge the estimated tokens in a tool response against the
+     *  per-identity daily budget. When the budget would be exceeded,
+     *  replace the response with a structured error payload —
+     *  the tool's data never crosses the boundary, and the agent
+     *  sees a parseable {error: "OMCP_TOKEN_BUDGET_EXCEEDED", ...}
+     *  rather than a generic failure. Anonymous principals are not
+     *  charged (the budget is per-credential).
+     *
+     *  This charges RETROACTIVELY: the tool body has already executed,
+     *  so the work is done by the time we decide to deny — the call
+     *  that flips the bucket over the cap still pays the cost; the
+     *  N+1 call denies before doing work. Pre-flight denial would
+     *  require predicting response size before the connector runs,
+     *  which isn't tractable for query_logs / query_metrics where
+     *  size is data-dependent. The trade-off is intentional: one
+     *  over-cap call per bucket roll vs an unhelpful "request denied,
+     *  size unknown" upstream. */
+    function chargeTokenBudget(result, ctx, toolName) {
+        if (ctx.auth !== "apikey")
+            return result;
+        const text = result.content[0]?.text ?? "";
+        const tokens = estimateTokensFor(text);
+        const decision = tokenBudget.check(identityKey(ctx), tokens);
+        if (decision.allowed || decision.limit === 0)
+            return result;
+        // A single request larger than the entire daily cap can never
+        // succeed by waiting — surface a distinct error code so the
+        // agent doesn't loop. Otherwise the wait-then-retry path is the
+        // right answer (and freedAtRetry tells the agent how much they
+        // can request after the wait).
+        const requestExceedsCap = tokens > decision.limit;
+        const errBody = {
+            error: requestExceedsCap ? "OMCP_TOKEN_REQUEST_EXCEEDS_BUDGET" : "OMCP_TOKEN_BUDGET_EXCEEDED",
+            tool: toolName,
+            used: decision.used,
+            limit: decision.limit,
+            requested: tokens,
+            retryAfterSeconds: requestExceedsCap ? 0 : decision.retryAfterSeconds,
+            freedAtRetry: decision.freedAtRetry,
+            message: requestExceedsCap
+                ? `This single response (~${tokens} tokens) is larger than the entire daily budget (${decision.limit}). Retrying won't help — narrow the query (smaller window / lower limit / more selective filter) or raise OMCP_TOOL_DAILY_TOKENS.`
+                : `Daily token budget exceeded (${decision.used}/${decision.limit} tokens used in the trailing 24h; this call would have added ~${tokens}). Try again in ~${Math.ceil(decision.retryAfterSeconds / 3600)}h or raise OMCP_TOOL_DAILY_TOKENS.`,
+        };
+        // Preserve any additional content entries (e.g. a future
+        // tool returning [text, image]) — only the text payload of the
+        // first entry is replaced with the error JSON; everything after
+        // it passes through.
+        return {
+            ...result,
+            content: [
+                { ...result.content[0], text: JSON.stringify(errBody) },
+                ...result.content.slice(1),
+            ],
+        };
+    }
+    const REDACTION_ENABLED = String(process.env.OMCP_REDACTION ?? "on").toLowerCase() !== "off";
+    function redactToolText(result, opts = {}) {
+        if (!REDACTION_ENABLED)
+            return result;
+        if (opts.bypass)
+            return result;
+        try {
+            const parsed = JSON.parse(result.content[0]?.text ?? "{}");
+            const r = redactValue(parsed);
+            const redacted = r.value;
+            if (r.totalMatches > 0 && redacted && typeof redacted === "object") {
+                redacted._redacted = { ...r.matches, totalMatches: r.totalMatches };
+            }
+            const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(redacted) } : c) };
+            return clone;
+        }
+        catch {
+            return result;
+        }
+    }
+    function enrichToolHealthText(result, serviceName, ctx) {
+        try {
+            const parsed = JSON.parse(result.content[0]?.text ?? "{}");
+            const entry = serviceName ? catalog.get(serviceName, ctx.tenant) : undefined;
+            if (entry && parsed && typeof parsed === "object")
+                parsed.catalog = entry;
+            const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
+            return clone;
+        }
+        catch {
+            return result;
+        }
+    }
     function createMcpServer(ctx) {
         const mcpServer = new McpServer({
             name: "observability-mcp",
@@ -140,7 +330,8 @@ async function main() {
                 .describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
         }, async (args) => {
             await enforceEntitledAccess(ctx, { tool: "list_services" });
-            return withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
+            const result = await withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
+            return enrichToolServicesText(result, ctx);
         });
         const metricsList = getAvailableMetricNames(registry);
         const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
@@ -172,7 +363,8 @@ async function main() {
                 .describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
         }, async (args) => {
             await enforceEntitledAccess(ctx, { tool: "query_metrics", source: args?.source, service: args?.service });
-            return withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
+            const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
+            return chargeTokenBudget(result, ctx, "query_metrics");
         });
         mcpServer.tool("query_logs", [
             "Fetch recent log entries for ONE service over a look-back window, with a pre-computed summary (error/warning counts and the most frequent error patterns).",
@@ -201,9 +393,48 @@ async function main() {
                 .positive()
                 .optional()
                 .describe("Optional. Maximum number of log entries to return (most recent first). Default: 100."),
+            bypass_redaction: z
+                .boolean()
+                .optional()
+                .describe("Optional. When true, request that PII/secret redaction be skipped for this single call. The server only honours this when the calling credential was explicitly authorised via OMCP_KEY_BYPASS_REDACTION; otherwise the request still gets redacted output. Default: false."),
         }, async (args) => {
             await enforceEntitledAccess(ctx, { tool: "query_logs", source: args?.source, service: args?.service });
-            return withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
+            const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
+            // Redact PII / secrets from the log payload before it crosses the
+            // MCP boundary into the agent's context. Per-call bypass kicks in
+            // only when BOTH (a) the credential is OMCP_KEY_BYPASS_REDACTION
+            // allow-listed, AND (b) the agent explicitly opted in via the
+            // bypass_redaction arg. Either alone keeps redaction on, so
+            // configuration-only and arg-only paths both fail closed.
+            const wantsBypass = args?.bypass_redaction === true;
+            const allowed = ctx.allowBypassRedaction === true;
+            const bypass = wantsBypass && allowed;
+            if (bypass || (wantsBypass && !allowed)) {
+                // Forensic trail:
+                //   1. stderr breadcrumb for SIEM tail-and-forward setups (the
+                //      log channel keeps no identifying credential reference
+                //      to avoid CodeQL taint findings — correlation goes via
+                //      the audit chain entry below).
+                //   2. management-plane audit chain entry so the bypass
+                //      invocation is tamper-evident alongside the rest of
+                //      /api/*. Persists if OMCP_MGMT_AUDIT_FILE is set.
+                emitBypassEvent(bypass ? "redaction_bypass_engaged" : "redaction_bypass_denied", ctx, args);
+                void mgmtAudit.record({
+                    actor: { sub: ctx.principalId },
+                    tenant: ctx.tenant,
+                    resource: "redaction",
+                    action: "bypass",
+                    method: "MCP",
+                    path: "/mcp/query_logs",
+                    status: bypass ? 200 : 403,
+                    target: args?.service ?? undefined,
+                }).catch(() => {
+                    // Audit record is best-effort — losing one entry must not
+                    // crash the tool call. The chain itself remains intact.
+                });
+            }
+            const redacted = redactToolText(result, { bypass });
+            return chargeTokenBudget(redacted, ctx, "query_logs");
         });
         mcpServer.tool("get_service_health", [
             "Produce a single aggregated health verdict for ONE service by combining its metrics and logs.",
@@ -216,7 +447,9 @@ async function main() {
                 .describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
         }, async (args) => {
             await enforceEntitledAccess(ctx, { tool: "get_service_health", service: args?.service });
-            return withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
+            const result = await withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
+            const enriched = enrichToolHealthText(result, String(args?.service ?? ""), ctx);
+            return chargeTokenBudget(enriched, ctx, "get_service_health");
         });
         mcpServer.tool("detect_anomalies", [
             "Scan one or all monitored services for abnormal behavior and return the findings ranked by severity.",
@@ -284,8 +517,113 @@ async function main() {
         });
         return mcpServer;
     }
+    // --- Management-plane auth (basic mode) -----------------------------------
+    // Off by default. Enable with `OMCP_AUTH=basic` + `OMCP_USERS_FILE` and
+    // optionally `OMCP_SESSION_SECRET`. When the secret is omitted in basic
+    // mode the server generates one for the process lifetime — sessions
+    // won't survive a restart and a warning is logged. See docs/auth-basic.md.
+    //
+    // SECURITY DEFAULT: misconfiguration in basic mode is fail-CLOSED — the
+    // process exits with a non-zero status rather than silently degrading
+    // to anonymous. Set `OMCP_AUTH_ALLOW_FALLBACK=true` to opt back into
+    // the old fall-back-to-anonymous behaviour (only sensible for the
+    // throwaway-demo case where ops can immediately see the boot log).
+    const requestedAuthMode = String(process.env.OMCP_AUTH ?? "anonymous").toLowerCase();
+    const allowFallback = String(process.env.OMCP_AUTH_ALLOW_FALLBACK ?? "false").toLowerCase() === "true";
+    function authMisconfig(reason) {
+        if (allowFallback) {
+            console.error(`[auth] ${reason} — OMCP_AUTH_ALLOW_FALLBACK=true → falling back to anonymous`);
+            return;
+        }
+        console.error(`[auth] ${reason} — refusing to start (set OMCP_AUTH_ALLOW_FALLBACK=true to override)`);
+        process.exit(1);
+    }
+    let authMode = "anonymous";
+    let sessionCfg;
+    let usersStore = null;
+    let secretEphemeral = false;
+    let oidcRuntime;
+    if (requestedAuthMode === "basic") {
+        const usersPath = process.env.OMCP_USERS_FILE;
+        if (!usersPath) {
+            authMisconfig("OMCP_AUTH=basic requires OMCP_USERS_FILE");
+        }
+        else {
+            usersStore = await readUsersFile(usersPath);
+            if (!usersStore) {
+                authMisconfig(`OMCP_USERS_FILE=${usersPath} unreadable or malformed`);
+                usersStore = null;
+            }
+            else if (usersStore.users.length === 0) {
+                authMisconfig(`OMCP_USERS_FILE=${usersPath} has no users`);
+                usersStore = null;
+            }
+            else {
+                let secret = process.env.OMCP_SESSION_SECRET;
+                if (!secret || secret.length < 32) {
+                    secret = generateSecret();
+                    secretEphemeral = true;
+                    console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars). Generated an ephemeral secret — " +
+                        "sessions will be invalidated on restart. Set OMCP_SESSION_SECRET to a stable value in production.");
+                }
+                sessionCfg = { secret };
+                authMode = "basic";
+                console.log(`[auth] basic mode active — ${usersStore.users.length} user(s) loaded`);
+            }
+        }
+    }
+    else if (requestedAuthMode === "oidc") {
+        const r = resolveOidcConfig(process.env);
+        if (r.error || !r.config) {
+            authMisconfig(r.error ?? "OIDC misconfigured");
+        }
+        else {
+            let secret = process.env.OMCP_SESSION_SECRET;
+            if (!secret || secret.length < 32) {
+                secret = generateSecret();
+                secretEphemeral = true;
+                console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars) in OIDC mode. " +
+                    "Generated an ephemeral secret — sessions and OIDC state cookies " +
+                    "will be invalidated on restart. Set OMCP_SESSION_SECRET in production.");
+            }
+            sessionCfg = { secret };
+            authMode = "oidc";
+            oidcRuntime = buildOidcRuntime(r.config);
+            console.log(`[auth] OIDC mode active — issuer=${r.config.issuer} clientId=${r.config.clientId} rolesClaim=${r.config.rolesClaim} mappedRoles=${Object.keys(r.config.roleMap).length}`);
+        }
+    }
+    else if (requestedAuthMode !== "anonymous") {
+        authMisconfig(`unknown OMCP_AUTH=${requestedAuthMode}`);
+    }
+    const authRuntime = { mode: authMode, session: sessionCfg, secretEphemeral, oidc: oidcRuntime };
     // --- HTTP server ---
     const app = express();
+    // Trust-proxy: when set, Express will read req.ip / req.secure from
+    // X-Forwarded-For + X-Forwarded-Proto. OFF by default — forging those
+    // headers behind a misconfigured deployment is the kind of mistake
+    // that gives every audit entry the same client IP. Set
+    // `OMCP_TRUST_PROXY` to:
+    //   "true"            — trust every hop (Express default-on shape)
+    //   "loopback"        — trust 127.0.0.1 / ::1 only (sensible default
+    //                       when running behind a same-host nginx)
+    //   "<n>"             — trust the last <n> hops
+    //   "<ip>,<ip>"       — explicit list (single value or comma-separated)
+    // Any falsy / unset value leaves it OFF so req.ip stays the raw
+    // socket address.
+    const trustProxy = process.env.OMCP_TRUST_PROXY;
+    if (trustProxy && trustProxy !== "false") {
+        if (trustProxy === "true") {
+            app.set("trust proxy", true);
+        }
+        else if (/^\d+$/.test(trustProxy)) {
+            app.set("trust proxy", parseInt(trustProxy, 10));
+        }
+        else {
+            // string or comma-separated IPs / "loopback" / etc — let Express's
+            // parser handle the lookup (it accepts any of the above forms).
+            app.set("trust proxy", trustProxy);
+        }
+    }
     app.use(express.json({ limit: "1mb" }));
     // Security headers
     app.use((req, res, next) => {
@@ -317,6 +655,130 @@ async function main() {
         });
         next();
     });
+    // Broad rate-limit on the whole management-plane surface. Generous
+    // enough to leave a polling UI plenty of headroom (300/min per IP),
+    // tight enough to stop unauthenticated brute-force walks of /api/*
+    // (and to keep CodeQL's missing-rate-limiting rule satisfied for
+    // every downstream route).
+    app.use("/api", rateLimit({
+        windowMs: 60_000,
+        max: 300,
+        standardHeaders: true,
+        legacyHeaders: false,
+        message: { error: "rate limited" },
+    }));
+    // Management-plane auth: attach the session payload to every request
+    // (no decision logic here — anonymous mode is a no-op). The gate is
+    // mounted explicitly on each protected route prefix further down so
+    // there is no string-match-based "is this public?" branch anywhere.
+    app.use(buildSessionAttacher(authRuntime));
+    const requireSession = buildRequireSession(authRuntime);
+    // Active policy engine — built-in DEFAULT_POLICY by default. When
+    // OMCP_RBAC_POLICY_FILE is set we load it and ALWAYS abort on
+    // failure: OMCP_AUTH_ALLOW_FALLBACK is for *auth-mode* fallback
+    // (basic → anonymous), not for the policy file. An operator who
+    // deployed a restrictive policy to TIGHTEN the default would be
+    // worse off silently inheriting the broader built-in
+    // (DEFAULT_POLICY grants admin → redaction:bypass) than crashing
+    // with a clear error. Policy file errors are unconditionally
+    // fatal so the configured intent always wins.
+    let policyEngine = new BuiltinPolicyEngine(DEFAULT_POLICY);
+    const policyFile = process.env.OMCP_RBAC_POLICY_FILE?.trim();
+    const opaUrl = process.env.OMCP_OPA_URL?.trim();
+    // OPA takes precedence over a file: an operator who wired both
+    // probably wants OPA as the live engine and uses the file as a
+    // local fallback only via OMCP_POLICY_ENGINE=builtin.
+    const enginePref = (process.env.OMCP_POLICY_ENGINE || "").toLowerCase();
+    if (opaUrl && enginePref !== "builtin") {
+        const declared = (process.env.OMCP_OPA_ROLES || "").split(",").map((s) => s.trim()).filter(Boolean);
+        policyEngine = new OpaPolicyEngine({
+            url: opaUrl,
+            packagePath: process.env.OMCP_OPA_PACKAGE || "observability/authz",
+            declaredRoles: declared.length > 0 ? declared : undefined,
+            bearerToken: process.env.OMCP_OPA_TOKEN || undefined,
+        });
+        console.log(`[auth] RBAC policy engine = OPA at ${opaUrl} (package ${process.env.OMCP_OPA_PACKAGE || "observability/authz"})`);
+        // Pre-warm: the sync RBAC gate denies on a cache miss while the
+        // first async OPA call is in flight. Hit every (role, resource,
+        // action) combination from the declared role set so the very
+        // first user request gets a real decision instead of a warming-
+        // deny. With 3 roles × 10 resources × 4 actions = 120 calls,
+        // OPA handles this in <1s and we keep it best-effort (any
+        // failure surfaces in the OPA logs, the engine retries on the
+        // first user-facing call anyway).
+        const opaEngine = policyEngine;
+        void (async () => {
+            const roles = opaEngine.roles();
+            if (roles.length === 0)
+                return;
+            const resources = [...VALID_RESOURCES];
+            const actions = [...VALID_ACTIONS];
+            const tasks = [];
+            for (const role of roles) {
+                for (const resource of resources)
+                    for (const action of actions) {
+                        tasks.push(opaEngine.warmEvaluate([role], resource, action));
+                    }
+                tasks.push(opaEngine.warmList([role]));
+            }
+            try {
+                const settled = await Promise.allSettled(tasks);
+                const failed = settled.filter((s) => s.status === "rejected").length;
+                if (failed === 0) {
+                    console.log(`[auth] OPA cache pre-warmed: ${settled.length} decisions cached for ${roles.length} role(s)`);
+                }
+                else {
+                    console.warn(`[auth] OPA cache pre-warmed: ${settled.length - failed}/${settled.length} ok, ${failed} failed (gates will retry on first user call)`);
+                }
+            }
+            catch { /* best-effort */ }
+        })();
+    }
+    else if (policyFile) {
+        try {
+            policyEngine = loadPolicyFromFile(policyFile);
+            console.log(`[auth] RBAC policy loaded from ${policyFile} (${policyEngine.roles().join(", ")})`);
+        }
+        catch (e) {
+            const reason = e instanceof PolicyLoadError ? e.message : String(e);
+            console.error(`[auth] OMCP_RBAC_POLICY_FILE=${policyFile}: ${reason} — refusing to start (a malformed policy file would silently revert to the more permissive built-in default, defeating the point of the override)`);
+            process.exit(1);
+        }
+    }
+    const need = (resource, action) => buildRequirePermission(authRuntime, resource, action, policyEngineToMap(policyEngine));
+    // Management-plane audit log. Records one entry per mutating /api/*
+    // request. Writes JSONL to disk when OMCP_MGMT_AUDIT_FILE is set;
+    // otherwise an in-memory ring of the last 500 entries keeps the
+    // /api/audit endpoint useful in the demo / single-user case.
+    const mgmtAudit = new AuditLog({ file: process.env.OMCP_MGMT_AUDIT_FILE });
+    await mgmtAudit.bootstrap();
+    const audit = (resource, action) => buildAuditMiddleware({ audit: mgmtAudit, resource, action });
+    // Service catalog: optional operator-curated ownership / criticality /
+    // on-call metadata, keyed on the service name list_services returns.
+    // No file ⇒ empty catalog, enrichment is a no-op (anonymous demos
+    // see no behaviour change).
+    const catalog = new CatalogStore(await readCatalogFile(process.env.OMCP_SERVICE_CATALOG_FILE));
+    const products = new ProductsStore(await readProductsFile(process.env.OMCP_PRODUCTS_FILE));
+    // Protected route prefixes. /api/me, /api/auth/*, /api/info,
+    // /api/openapi.json deliberately don't appear here — they stay public.
+    for (const prefix of [
+        "/api/sources",
+        "/api/source-types",
+        "/api/services",
+        "/api/health",
+        "/api/health-thresholds",
+        "/api/topology",
+        "/api/settings",
+        "/api/connectors",
+        "/api/enterprise",
+        "/api/hub",
+        "/api/audit",
+        "/api/usage",
+        "/api/catalog",
+        "/api/policy",
+    ]) {
+        app.use(prefix, requireSession);
+    }
     // k8s-convention liveness/readiness probes at the root of the path
     // tree, no /api prefix. Helm chart points its probes here. Cheap
     // enough to skip the request-counter middleware.
@@ -387,6 +849,23 @@ async function main() {
                 platform: process.platform,
                 arch: process.arch,
             },
+            // Governance posture — surfaces the active management-plane
+            // configuration so external dashboards / discovery probes don't
+            // need a session to learn the deployment shape. Booleans only;
+            // file paths and the session secret stay private.
+            governance: {
+                authMode: authRuntime.mode,
+                authSecretEphemeral: !!authRuntime.secretEphemeral,
+                // OIDC issuer (URL only — never the client_secret) is the
+                // single piece of state external discovery needs to know
+                // *where* the IdP lives. Empty string when mode != "oidc".
+                oidcIssuer: oidcRuntime?.cfg.issuer ?? "",
+                auditPersisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
+                catalogConfigured: catalog.count() > 0 || !!process.env.OMCP_SERVICE_CATALOG_FILE,
+                redaction: REDACTION_ENABLED,
+                trustProxy: !!(process.env.OMCP_TRUST_PROXY && process.env.OMCP_TRUST_PROXY !== "false"),
+                toolRatePerMin: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
+            },
             plugins: loader.list().map((p) => ({
                 name: p.name,
                 source: p.source,
@@ -395,6 +874,269 @@ async function main() {
             })),
         });
     });
+    // Same per-IP cap for /api/me and the auth endpoints — the UI polls
+    // this on every page load to decide whether to show the login modal,
+    // so a 20/min limit per IP is generous for humans and tight for
+    // scripted abuse.
+    const authReadRateLimit = rateLimit({
+        windowMs: 60_000,
+        max: 60,
+        standardHeaders: true,
+        legacyHeaders: false,
+        message: { error: "rate limited" },
+    });
+    // Current identity for the management plane. Always public so the UI
+    // can decide whether to show a login modal even before sending its
+    // first authenticated request.
+    app.get("/api/me", authReadRateLimit, (req, res) => {
+        if (authRuntime.mode === "anonymous") {
+            res.json({ authenticated: false, mode: "anonymous" });
+            return;
+        }
+        const sess = req.session;
+        if (!sess) {
+            res.json({ authenticated: false, mode: authRuntime.mode });
+            return;
+        }
+        res.json({
+            authenticated: true,
+            mode: authRuntime.mode,
+            user: {
+                sub: sess.sub,
+                name: sess.name,
+                email: sess.email,
+                tenant: sess.tenant || "default",
+                roles: sess.roles ?? [],
+            },
+            permissions: listGrantedPermissions(sess.roles, policyEngineToMap(policyEngine)),
+            exp: sess.exp,
+            // When the user signed in via OIDC, surface the IdP issuer
+            // URL so the UI can render an appropriate badge or link to
+            // an IdP-side profile page. Empty / absent in basic mode.
+            idpIssuer: authRuntime.mode === "oidc" ? (oidcRuntime?.cfg.issuer ?? "") : undefined,
+        });
+    });
+    // --- /api/policy — read-only view of the RBAC policy in effect -------
+    // Useful when an operator is debugging "why did role X get a 403" and
+    // doesn't have a checkout to read DEFAULT_POLICY from source. Gated
+    // by admin-only delete-on-users so the policy schema isn't visible
+    // to non-admin sessions.
+    app.get("/api/policy", need("users", "delete"), (req, res) => {
+        const map = policyEngineToMap(policyEngine);
+        // Optional dry-run: ?roles=admin,operator&resource=sources&action=delete
+        // returns { allowed, reason } so operators can probe the active
+        // engine without writing tests against a checkout.
+        const q = req.query;
+        if (q.resource && q.action) {
+            const dryRoles = typeof q.roles === "string" ? q.roles.split(",").map((r) => r.trim()).filter(Boolean) : undefined;
+            // Validate the probe values against the active vocabulary so
+            // an operator typo doesn't get a misleading "allowed:false
+            // reason: roles do not grant <typo>" reply.
+            if (!VALID_RESOURCES.has(q.resource)) {
+                res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown resource '${q.resource}' (valid: ${[...VALID_RESOURCES].join(", ")})` } });
+                return;
+            }
+            if (!VALID_ACTIONS.has(q.action)) {
+                res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown action '${q.action}' (valid: ${[...VALID_ACTIONS].join(", ")})` } });
+                return;
+            }
+            const result = policyEngine.evaluate(dryRoles, q.resource, q.action);
+            res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, ...result } });
+            return;
+        }
+        res.json({
+            engine: policyEngine.kind(),
+            policy: map,
+            roles: policyEngine.roles(),
+            note: policyEngine.kind() === "builtin"
+                ? "DEFAULT_POLICY shipped with this build. Set OMCP_RBAC_POLICY_FILE to override."
+                : `policy loaded from ${policyEngine.kind()}; restart to reload.`,
+        });
+    });
+    // --- /api/audit — management-plane audit feed -------------------------
+    // Read-only, gated by the "audit:read" permission so only viewers /
+    // operators / admins (basically anyone authenticated in the default
+    // policy) can pull it. Supports optional ?from, ?to (RFC-3339), ?actor,
+    // ?action, ?limit (default 100, capped to ring size).
+    app.get("/api/audit", need("audit", "read"), (req, res) => {
+        // Tenant scoping: a non-admin caller (no `users:delete`) sees
+        // only their own tenant's entries. Admins see everything by
+        // default but can ?tenant=acme to filter. This avoids leaking
+        // other tenants' actor / target / path bytes through the audit
+        // surface — the chain-hash protected ground truth is still
+        // process-wide; the API view is per-tenant.
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const requestedTenant = qstr(req.query.tenant);
+        const tenantFilter = isAdmin ? requestedTenant : callerTenant;
+        const entries = mgmtAudit.list({
+            from: qstr(req.query.from),
+            to: qstr(req.query.to),
+            actor: qstr(req.query.actor),
+            action: qstr(req.query.action),
+            tenant: tenantFilter || undefined,
+            limit: qstr(req.query.limit) ? parseInt(qstr(req.query.limit), 10) : undefined,
+        });
+        res.json({
+            entries,
+            tipHash: mgmtAudit.tipHash,
+            persisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
+            // Tell the UI which tenant scope the view is currently showing
+            // so a cross-tenant admin sees an explicit "(all tenants)" hint.
+            scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
+        });
+    });
+    // --- /api/usage — per-identity MCP rate-limit snapshot -----------------
+    // Read-only view of the IdentityRateLimiter's bucket state. Gated by
+    // need("audit","read") — the same role set that already sees the
+    // audit log can see who is calling what. Anonymous /mcp traffic
+    // never enters a bucket so it doesn't show up here.
+    app.get("/api/usage", need("audit", "read"), (req, res) => {
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const requestedTenant = qstr(req.query.tenant);
+        const tenantFilter = isAdmin ? requestedTenant : callerTenant;
+        const actorFilter = qstr(req.query.actor);
+        // Union of identities known to either tracker. The tracker keys
+        // are composite "<tenant> <name>"; we split them back out for the
+        // response shape so the UI sees clean tenant + actor columns.
+        const idSet = new Set([
+            ...toolRateLimiter.knownIdentities(),
+            ...tokenBudget.knownIdentities(),
+        ]);
+        const now = Date.now();
+        const identities = [...idSet]
+            .map((id) => {
+            const split = splitIdentityKey(id);
+            if (tenantFilter && split.tenant !== tenantFilter)
+                return null;
+            if (actorFilter && split.actor !== actorFilter)
+                return null;
+            const r = toolRateLimiter.inspect(id, now);
+            const b = tokenBudget.inspect(id, now);
+            return {
+                actor: split.actor,
+                tenant: split.tenant,
+                count: r.count,
+                limit: r.limit,
+                windowMs: r.windowMs,
+                tokens: { used: b.used, limit: b.limit, windowMs: b.windowMs },
+            };
+        })
+            .filter((x) => x !== null);
+        res.json({
+            identities,
+            defaultLimit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
+            windowMs: 60_000,
+            tokens: {
+                defaultLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
+                windowMs: 24 * 60 * 60 * 1000,
+            },
+            // Same scoping breadcrumb /api/audit returns: which tenant
+            // window the response is showing. null = "all tenants" (admin).
+            scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
+        });
+    });
+    // --- /api/auth/* — login + logout for basic mode -----------------------
+    // Login: POST { username, password } → 200 + Set-Cookie on success, 401
+    // on bad creds, 400 on missing fields, 503 in anonymous mode (the UI
+    // shouldn't have rendered the modal at all in that case but we still
+    // answer cleanly). Logout: POST → 204 + clears the cookie.
+    const loginRateLimit = rateLimit({
+        windowMs: 60_000,
+        max: 20,
+        standardHeaders: true,
+        legacyHeaders: false,
+        message: { error: "too many login attempts, slow down" },
+    });
+    // Cached users-file mtime — on every login we stat the file and
+    // re-read when it's changed since the last check. Adding/removing
+    // a user therefore takes effect on the next login attempt, no server
+    // restart required. Cheap path: a single stat() per attempt; the
+    // rate limit caps that at 20/min/IP anyway.
+    let lastUsersMtimeMs = null;
+    async function maybeReloadUsers() {
+        const path = process.env.OMCP_USERS_FILE;
+        if (!path)
+            return;
+        try {
+            const { stat } = await import("node:fs/promises");
+            const st = await stat(path);
+            const mtime = st.mtimeMs;
+            if (lastUsersMtimeMs === null || mtime !== lastUsersMtimeMs) {
+                const fresh = await readUsersFile(path);
+                if (fresh && fresh.users.length > 0) {
+                    usersStore = fresh;
+                    lastUsersMtimeMs = mtime;
+                    if (lastUsersMtimeMs !== null) {
+                        console.log(`[auth] OMCP_USERS_FILE changed — reloaded ${fresh.users.length} user(s)`);
+                    }
+                }
+            }
+        }
+        catch {
+            // File transiently unreadable — keep the cached store; logins
+            // will continue to work with the last known set.
+        }
+    }
+    // Prime the cache so the first login doesn't log "changed" on every boot.
+    if (authRuntime.mode === "basic") {
+        const path = process.env.OMCP_USERS_FILE;
+        if (path) {
+            try {
+                const { statSync } = await import("node:fs");
+                lastUsersMtimeMs = statSync(path).mtimeMs;
+            }
+            catch { /* ignore — first login will pick it up */ }
+        }
+    }
+    app.post("/api/auth/login", loginRateLimit, async (req, res) => {
+        if (authRuntime.mode !== "basic" || !sessionCfg || !usersStore) {
+            res.status(503).json({ error: "auth mode does not accept logins" });
+            return;
+        }
+        await maybeReloadUsers();
+        const body = (req.body || {});
+        const username = typeof body.username === "string" ? body.username.trim() : "";
+        const password = typeof body.password === "string" ? body.password : "";
+        if (!username || !password) {
+            res.status(400).json({ error: "username and password are required" });
+            return;
+        }
+        const user = authenticate(username, password, usersStore);
+        if (!user) {
+            res.status(401).json({ error: "invalid credentials" });
+            return;
+        }
+        const { cookie } = issueSession({ sub: user.username, name: user.name, roles: user.roles, tenant: user.tenant }, sessionCfg);
+        const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
+        res.setHeader("Set-Cookie", setCookieHeader(cookie, sessionCfg, { secure }));
+        res.json({
+            ok: true,
+            user: { sub: user.username, name: user.name, roles: user.roles ?? [] },
+        });
+    });
+    // Same per-IP cap as login — defends against logout-as-disruption
+    // (an attacker spamming logouts at a forged session for another tab).
+    app.post("/api/auth/logout", loginRateLimit, (req, res) => {
+        if (authRuntime.mode === "anonymous" || !sessionCfg) {
+            res.status(204).end();
+            return;
+        }
+        const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
+        res.setHeader("Set-Cookie", clearCookieHeader(sessionCfg, { secure }));
+        res.status(204).end();
+    });
+    // OIDC code-flow endpoints (login redirect, callback, logout) — only
+    // mounted when OMCP_AUTH=oidc resolved cleanly. registerOidcRoutes is
+    // a no-op at the type level when oidcRuntime is undefined; we guard
+    // here so we don't even define the routes in basic/anonymous mode.
+    if (authRuntime.mode === "oidc" && oidcRuntime && sessionCfg) {
+        registerOidcRoutes(app, { sessionCfg, oidc: oidcRuntime });
+        console.log("[auth] OIDC endpoints registered: /api/auth/oidc/{login,callback,logout}");
+    }
     // Connectors currently loaded into this server (builtin + filesystem
     // plugins), with manifest metadata — drives the UI "Connectors" page.
     app.get("/api/connectors", (_req, res) => {
@@ -477,7 +1219,7 @@ async function main() {
     // Only catalog tarballUrls are fetched (no arbitrary URL in the body)
     // to avoid SSRF. The connector persists to PLUGINS_DIR (back it with
     // a PVC on k8s so it survives restarts).
-    app.post("/api/connectors/install", installRateLimit, async (req, res) => {
+    app.post("/api/connectors/install", installRateLimit, need("connectors", "write"), audit("connectors", "write"), async (req, res) => {
         if (process.env.ENABLE_UI_INSTALL !== "true") {
             return res.status(403).json({
                 error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
@@ -546,7 +1288,7 @@ async function main() {
     // ALWAYS verified against PLUGIN_TRUST_ROOT (signature + integrity),
     // so an unsigned/tampered bundle is rejected. Body is the raw tarball
     // bytes (application/octet-stream). Persists to PLUGINS_DIR.
-    app.post("/api/connectors/upload", installRateLimit, express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
+    app.post("/api/connectors/upload", installRateLimit, need("connectors", "write"), audit("connectors", "write"), express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
         if (process.env.ENABLE_UI_INSTALL !== "true") {
             return res.status(403).json({
                 error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
@@ -587,7 +1329,7 @@ async function main() {
         }
     });
     // Add a new source
-    app.post("/api/sources", installRateLimit, async (req, res) => {
+    app.post("/api/sources", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
         const { name, type, url, enabled, auth, tls } = req.body;
         if (!name || !type || !url) {
             res.status(400).json({ error: "name, type, and url are required" });
@@ -609,8 +1351,8 @@ async function main() {
         res.status(201).json({ ok: true, source });
     });
     // Update an existing source
-    app.put("/api/sources/:name", async (req, res) => {
-        const oldName = req.params.name;
+    app.put("/api/sources/:name", need("sources", "write"), audit("sources", "write"), async (req, res) => {
+        const oldName = String(req.params.name);
         const { name, type, url, enabled, auth, tls } = req.body;
         const existing = registry.getSourceConfigs().find((s) => s.name === oldName);
         if (!existing) {
@@ -638,8 +1380,8 @@ async function main() {
         res.json({ ok: true, source });
     });
     // Delete a source
-    app.delete("/api/sources/:name", async (req, res) => {
-        const name = req.params.name;
+    app.delete("/api/sources/:name", need("sources", "delete"), audit("sources", "delete"), async (req, res) => {
+        const name = String(req.params.name);
         const existing = registry.getSourceConfigs().find((s) => s.name === name);
         if (!existing) {
             res.status(404).json({ error: `Source "${name}" not found` });
@@ -650,7 +1392,7 @@ async function main() {
         res.json({ ok: true });
     });
     // Test a source connection (without saving)
-    app.post("/api/sources/test", installRateLimit, async (req, res) => {
+    app.post("/api/sources/test", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
         const { name, type, url, enabled, auth, tls } = req.body;
         if (!type || !url) {
             res.status(400).json({ error: "type and url are required" });
@@ -672,8 +1414,8 @@ async function main() {
         res.json(result);
     });
     // Toggle source enabled/disabled
-    app.patch("/api/sources/:name/toggle", async (req, res) => {
-        const name = req.params.name;
+    app.patch("/api/sources/:name/toggle", need("sources", "write"), audit("sources", "write"), async (req, res) => {
+        const name = String(req.params.name);
         const existing = registry.getSourceConfigs().find((s) => s.name === name);
         if (!existing) {
             res.status(404).json({ error: `Source "${name}" not found` });
@@ -694,28 +1436,193 @@ async function main() {
         }
     }
     // List discovered services
-    app.get("/api/services", async (_req, res) => {
+    app.get("/api/services", async (req, res) => {
         try {
+            const sess = req.session;
+            const callerTenant = sess?.tenant || "default";
             const result = await listServicesHandler(registry, {}, defaultContext());
-            res.json(parseToolResult(result));
+            const parsed = parseToolResult(result);
+            // Tenant-scope catalog enrichment so a viewer in tenant A
+            // doesn't accidentally see acme's owner/SLO metadata on a
+            // service that happens to share a name. Anonymous mode is
+            // session-less so callerTenant is "default" → matches
+            // entries with no tenant field too (pre-E7 behaviour).
+            if (parsed?.services) {
+                for (const s of parsed.services) {
+                    const entry = typeof s.name === "string" ? catalog.get(s.name, callerTenant) : undefined;
+                    if (entry)
+                        s.catalog = entry;
+                }
+            }
+            res.json(parsed);
         }
         catch {
             res.status(500).json({ error: "Failed to list services" });
         }
     });
+    // Read-only view of the configured catalog. Gated by the same
+    // "catalog:read" permission Phase E4 added to DEFAULT_POLICY.
+    app.get("/api/catalog", need("catalog", "read"), (req, res) => {
+        // Same scoping shape as /api/audit + /api/usage: non-admins see
+        // only their own tenant's catalog entries; admins see all by
+        // default and can ?tenant=X for an explicit drill-down.
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const requestedTenant = qstr(req.query.tenant);
+        const tenantFilter = isAdmin ? requestedTenant : callerTenant;
+        const services = catalog.list(tenantFilter || undefined);
+        res.json({
+            services,
+            count: Object.keys(services).length,
+            configured: !!process.env.OMCP_SERVICE_CATALOG_FILE,
+            scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
+        });
+    });
+    // --- /api/products — MCP Products catalogue ---------------------------
+    // Same scoping / staging-visibility pattern as /api/catalog. Non-admins
+    // see only their own tenant's PUBLISHED products; admins see all
+    // tenants by default + staging.
+    app.get("/api/products", need("products", "read"), (req, res) => {
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const requestedTenant = qstr(req.query.tenant);
+        const tenantFilter = isAdmin ? requestedTenant : callerTenant;
+        const includeStaging = isAdmin;
+        res.json({
+            products: products.list({ tenant: tenantFilter || undefined, includeStaging }),
+            configured: !!process.env.OMCP_PRODUCTS_FILE,
+            scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
+            includesStaging: includeStaging,
+        });
+    });
+    // Upsert a product. Body is the same shape as a single entry
+    // in OMCP_PRODUCTS_FILE. The URL-path id must match the body id
+    // (defence-in-depth: the gate keys on body, the path keys the
+    // audit entry). When OMCP_PRODUCTS_FILE is set we also write the
+    // updated catalogue back to disk so the change survives a
+    // restart; without the file, the upsert is in-memory only.
+    app.put("/api/products/:id", need("products", "write"), audit("products", "write"), async (req, res) => {
+        const id = String(req.params.id);
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const body = req.body;
+        if (!body || typeof body !== "object" || Array.isArray(body)) {
+            res.status(400).json({ error: "body must be a product object" });
+            return;
+        }
+        if (typeof body.id === "string" && body.id !== id) {
+            res.status(400).json({ error: `body.id '${body.id}' does not match URL path '${id}'` });
+            return;
+        }
+        // Force the id from the URL so the audit entry's target matches
+        // the persisted record even if the operator omitted it from the
+        // body.
+        const payload = { ...body, id };
+        let validated;
+        try {
+            validated = validateProduct(payload, `PUT /api/products/${id}`);
+        }
+        catch (e) {
+            if (e instanceof ProductsLoadError) {
+                res.status(400).json({ error: e.message });
+                return;
+            }
+            throw e;
+        }
+        // Tenant gate: non-admins can only write into their own tenant.
+        if (!isAdmin && (validated.tenant || "default") !== callerTenant) {
+            res.status(403).json({ error: "cannot write product into another tenant" });
+            return;
+        }
+        // If an existing product belongs to a different tenant, a non-
+        // admin overwrite would re-parent it — same 404-not-403 posture
+        // as cross-tenant gets.
+        const existing = products.get(id);
+        if (existing && !isAdmin && (existing.tenant || "default") !== callerTenant) {
+            res.status(404).json({ error: "not found" });
+            return;
+        }
+        const next = products.upsert(validated);
+        if (process.env.OMCP_PRODUCTS_FILE) {
+            try {
+                await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
+            }
+            catch (e) {
+                console.warn(`[products] PUT ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
+            }
+        }
+        res.json({ product: validated, persisted: !!process.env.OMCP_PRODUCTS_FILE });
+    });
+    app.delete("/api/products/:id", need("products", "delete"), audit("products", "delete"), async (req, res) => {
+        const id = String(req.params.id);
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const existing = products.get(id);
+        if (!existing) {
+            res.status(404).json({ error: "not found" });
+            return;
+        }
+        if (!isAdmin && (existing.tenant || "default") !== callerTenant) {
+            res.status(404).json({ error: "not found" });
+            return;
+        }
+        const { file: next } = products.delete(id);
+        if (process.env.OMCP_PRODUCTS_FILE) {
+            try {
+                await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
+            }
+            catch (e) {
+                console.warn(`[products] DELETE ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
+            }
+        }
+        res.status(204).end();
+    });
+    // Single product by id. Non-admins get a 404 (not 403) on a
+    // cross-tenant probe so the existence of the product isn't leaked
+    // — same posture as the rest of the tenancy layer.
+    app.get("/api/products/:id", need("products", "read"), (req, res) => {
+        const sess = req.session;
+        const isAdmin = hasPermission(sess?.roles, "users", "delete");
+        const callerTenant = sess?.tenant || "default";
+        const tenantFilter = isAdmin ? undefined : callerTenant;
+        const id = String(req.params.id);
+        const p = products.get(id, tenantFilter);
+        if (!p) {
+            res.status(404).json({ error: "not found" });
+            return;
+        }
+        // Non-admins also don't see staging products even if they happen
+        // to belong to the same tenant.
+        if (!isAdmin && p.status === "staging") {
+            res.status(404).json({ error: "not found" });
+            return;
+        }
+        res.json(p);
+    });
     // Health endpoint for UI dashboard
     app.get("/api/health/:service", async (req, res) => {
         try {
-            const result = await getServiceHealthHandler(registry, { service: req.params.service }, defaultContext());
-            res.json(parseToolResult(result));
+            const callerTenant = req.session?.tenant || "default";
+            const service = String(req.params.service);
+            const result = await getServiceHealthHandler(registry, { service }, defaultContext());
+            const parsed = parseToolResult(result);
+            const entry = catalog.get(service, callerTenant);
+            if (entry && parsed && typeof parsed === "object")
+                parsed.catalog = entry;
+            res.json(parsed);
         }
         catch {
             res.status(500).json({ error: "Failed to get service health" });
         }
     });
     // Health for all services
-    app.get("/api/health", async (_req, res) => {
+    app.get("/api/health", async (req, res) => {
         try {
+            const callerTenant = req.session?.tenant || "default";
             const servicesResult = await listServicesHandler(registry, {}, defaultContext());
             const parsed = parseToolResult(servicesResult);
             const services = parsed?.services || [];
@@ -723,7 +1630,14 @@ async function main() {
             for (const svc of services) {
                 try {
                     const result = await getServiceHealthHandler(registry, { service: svc.name }, defaultContext());
-                    health[svc.name] = parseToolResult(result);
+                    const h = parseToolResult(result);
+                    // Same tenant scoping as /api/services to avoid the
+                    // dashboard cross-tenant catalog leak the reviewer
+                    // caught in slice 3.
+                    const entry = catalog.get(svc.name, callerTenant);
+                    if (entry && h && typeof h === "object")
+                        h.catalog = entry;
+                    health[svc.name] = h;
                 }
                 catch {
                     health[svc.name] = { error: "failed to fetch health" };
@@ -771,7 +1685,7 @@ async function main() {
         res.json(config.settings);
     });
     // Update general settings
-    app.put("/api/settings", (req, res) => {
+    app.put("/api/settings", need("settings", "write"), audit("settings", "write"), (req, res) => {
         config = { ...config, settings: { ...config.settings, ...req.body } };
         saveConfig(config);
         res.json({ ok: true, settings: config.settings });
@@ -787,7 +1701,7 @@ async function main() {
     app.get("/api/health-thresholds", (_req, res) => {
         res.json(config.healthThresholds);
     });
-    app.put("/api/health-thresholds", (req, res) => {
+    app.put("/api/health-thresholds", need("health", "write"), audit("health", "write"), (req, res) => {
         config = { ...config, healthThresholds: { ...config.healthThresholds, ...req.body } };
         applyConfigToRuntime(config, registry);
         saveConfig(config);
@@ -796,9 +1710,9 @@ async function main() {
     // --- Per-Source Metrics API ---
     // Get metrics for a source (active metrics or defaults)
     app.get("/api/sources/:name/metrics", (req, res) => {
-        const connector = registry.getByName(req.params.name);
+        const connector = registry.getByName(String(req.params.name));
         if (!connector) {
-            res.status(404).json({ error: `Source "${req.params.name}" not found` });
+            res.status(404).json({ error: `Source "${String(req.params.name)}" not found` });
             return;
         }
         res.json({
@@ -807,8 +1721,8 @@ async function main() {
         });
     });
     // Update metrics for a source
-    app.put("/api/sources/:name/metrics", async (req, res) => {
-        const name = req.params.name;
+    app.put("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
+        const name = String(req.params.name);
         const sourceIdx = config.sources.findIndex((s) => s.name === name);
         if (sourceIdx === -1) {
             res.status(404).json({ error: `Source "${name}" not found` });
@@ -821,8 +1735,8 @@ async function main() {
         res.json({ ok: true });
     });
     // Reset a source's metrics to connector defaults
-    app.delete("/api/sources/:name/metrics", async (req, res) => {
-        const name = req.params.name;
+    app.delete("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
+        const name = String(req.params.name);
         const sourceIdx = config.sources.findIndex((s) => s.name === name);
         if (sourceIdx === -1) {
             res.status(404).json({ error: `Source "${name}" not found` });
@@ -861,6 +1775,51 @@ async function main() {
     }, 5 * 60 * 1000);
     // Single-tenant auth gate. No credentials configured → anonymous (current
     // behaviour, fully backward compatible). Configured → require a valid
+    // Per-identity sliding-window rate limit on the MCP HTTP transport.
+    // Each request from a named bearer-token caller increments that
+    // caller's bucket; once the per-window cap is hit the server replies
+    // 429 with a Retry-After. Anonymous /mcp traffic (no OMCP_API_KEYS
+    // configured) bypasses this — the global express-rate-limit IP gate
+    // still applies. Override via OMCP_TOOL_RATE_PER_MIN.
+    const toolRateLimiter = new IdentityRateLimiter({
+        limit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
+    });
+    // Per-identity tracker key. Composes tenant + principalId so two
+    // credentials of the same name in different tenants don't share
+    // a bucket. Surface-level fields in /api/usage are still split
+    // back out (see the row builder there) so the UI shows clean
+    // actor + tenant columns.
+    const identityKey = (ctx) => `${ctx.tenant}${ctx.principalId}`;
+    function splitIdentityKey(key) {
+        const i = key.indexOf("");
+        if (i < 0)
+            return { tenant: "default", actor: key };
+        return { tenant: key.slice(0, i), actor: key.slice(i + 1) };
+    }
+    // Token-budget: per-identity 24h rolling daily cap on tokens pulled
+    // through the MCP tool layer. Off by default (OMCP_TOOL_DAILY_TOKENS
+    // unset/zero/negative). When configured, big-data tools
+    // (query_logs / query_metrics / get_service_health) charge the
+    // estimated response size against the cap; over-cap calls return a
+    // structured OMCP_TOKEN_BUDGET_EXCEEDED payload instead of data.
+    const tokenBudget = new TokenBudget({
+        dailyLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
+        filePath: process.env.OMCP_TOKEN_BUDGET_FILE?.trim() || undefined,
+    });
+    // AWAIT bootstrap before any tool call can arrive: a void-fired
+    // bootstrap raced with /mcp requests would silently overwrite
+    // post-boot charges with the on-disk snapshot when it later
+    // resolved. The file is small (KB range) so the wait is
+    // negligible; a missing file returns immediately.
+    await tokenBudget.bootstrap();
+    // Flush on graceful shutdown so the debounce-window of pending
+    // charges isn't dropped on `kubectl rollout restart` etc. The
+    // process keeps running while we wait — the snapshot is small.
+    for (const sig of ["SIGTERM", "SIGINT"]) {
+        process.once(sig, () => {
+            void tokenBudget.flushNow().catch(() => { });
+        });
+    }
     // Bearer/X-API-Key on every /mcp request; resolve the principal + its
     // coarse source allow-list into the RequestContext.
     function gateCtx(req, res) {
@@ -873,7 +1832,31 @@ async function main() {
                 .json({ error: "unauthorized: valid Bearer token or X-API-Key required" });
             return null;
         }
-        return principalContext(cred.name, cred.allowedSources);
+        // Composite tenant:cred-name key so two creds with the same
+        // name in different tenants don't share a bucket.
+        const credTenant = (cred.tenant || "default");
+        const decision = toolRateLimiter.check(`${credTenant} ${cred.name}`);
+        // Standard RateLimit response headers — let well-behaved clients
+        // self-pace before they hit a 429. Emitted on BOTH allowed and
+        // denied paths so the caller always sees the live state.
+        res.setHeader("X-RateLimit-Limit", String(decision.limit));
+        res.setHeader("X-RateLimit-Remaining", String(Math.max(0, decision.limit - decision.count)));
+        res.setHeader("X-RateLimit-Window-Ms", String(decision.windowMs));
+        if (!decision.allowed) {
+            res.setHeader("Retry-After", String(decision.retryAfterSeconds));
+            res.status(429).json({
+                error: "rate limit exceeded for identity",
+                code: "OMCP_IDENTITY_RATE_LIMIT",
+                retryAfterSeconds: decision.retryAfterSeconds,
+                limit: decision.limit,
+                windowMs: decision.windowMs,
+            });
+            return null;
+        }
+        return principalContext(cred.name, cred.allowedSources, {
+            allowBypassRedaction: cred.bypassRedaction,
+            tenant: cred.tenant,
+        });
     }
     app.post("/mcp", async (req, res) => {
         const ctx = gateCtx(req, res);