@thotischner/observability-mcp 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/config/products.yaml.example +48 -0
  2. package/dist/audit/log.d.ts +99 -0
  3. package/dist/audit/log.js +180 -0
  4. package/dist/audit/log.test.d.ts +1 -0
  5. package/dist/audit/log.test.js +147 -0
  6. package/dist/audit/middleware.d.ts +20 -0
  7. package/dist/audit/middleware.js +50 -0
  8. package/dist/auth/credentials.d.ts +18 -0
  9. package/dist/auth/credentials.js +26 -1
  10. package/dist/auth/credentials.test.js +26 -1
  11. package/dist/auth/local-users.d.ts +62 -0
  12. package/dist/auth/local-users.js +143 -0
  13. package/dist/auth/local-users.test.d.ts +1 -0
  14. package/dist/auth/local-users.test.js +80 -0
  15. package/dist/auth/middleware.d.ts +48 -0
  16. package/dist/auth/middleware.js +65 -0
  17. package/dist/auth/middleware.test.d.ts +1 -0
  18. package/dist/auth/middleware.test.js +90 -0
  19. package/dist/auth/oidc/client.d.ts +73 -0
  20. package/dist/auth/oidc/client.js +104 -0
  21. package/dist/auth/oidc/client.test.d.ts +1 -0
  22. package/dist/auth/oidc/client.test.js +121 -0
  23. package/dist/auth/oidc/discovery.d.ts +38 -0
  24. package/dist/auth/oidc/discovery.js +48 -0
  25. package/dist/auth/oidc/discovery.test.d.ts +1 -0
  26. package/dist/auth/oidc/discovery.test.js +68 -0
  27. package/dist/auth/oidc/endpoints.d.ts +20 -0
  28. package/dist/auth/oidc/endpoints.js +124 -0
  29. package/dist/auth/oidc/endpoints.test.d.ts +7 -0
  30. package/dist/auth/oidc/endpoints.test.js +304 -0
  31. package/dist/auth/oidc/flow-cookie.d.ts +57 -0
  32. package/dist/auth/oidc/flow-cookie.js +142 -0
  33. package/dist/auth/oidc/flow-cookie.test.d.ts +1 -0
  34. package/dist/auth/oidc/flow-cookie.test.js +0 -0
  35. package/dist/auth/oidc/index.d.ts +7 -0
  36. package/dist/auth/oidc/index.js +6 -0
  37. package/dist/auth/oidc/jwks.d.ts +36 -0
  38. package/dist/auth/oidc/jwks.js +69 -0
  39. package/dist/auth/oidc/jwks.test.d.ts +1 -0
  40. package/dist/auth/oidc/jwks.test.js +65 -0
  41. package/dist/auth/oidc/jwt.d.ts +62 -0
  42. package/dist/auth/oidc/jwt.js +113 -0
  43. package/dist/auth/oidc/jwt.test.d.ts +1 -0
  44. package/dist/auth/oidc/jwt.test.js +141 -0
  45. package/dist/auth/oidc/pkce.d.ts +19 -0
  46. package/dist/auth/oidc/pkce.js +43 -0
  47. package/dist/auth/oidc/pkce.test.d.ts +1 -0
  48. package/dist/auth/oidc/pkce.test.js +55 -0
  49. package/dist/auth/oidc/runtime.d.ts +63 -0
  50. package/dist/auth/oidc/runtime.js +129 -0
  51. package/dist/auth/oidc/runtime.test.d.ts +1 -0
  52. package/dist/auth/oidc/runtime.test.js +180 -0
  53. package/dist/auth/policy/engine.d.ts +48 -0
  54. package/dist/auth/policy/engine.js +73 -0
  55. package/dist/auth/policy/engine.test.d.ts +1 -0
  56. package/dist/auth/policy/engine.test.js +98 -0
  57. package/dist/auth/policy/loader.d.ts +35 -0
  58. package/dist/auth/policy/loader.js +100 -0
  59. package/dist/auth/policy/opa.d.ts +69 -0
  60. package/dist/auth/policy/opa.js +162 -0
  61. package/dist/auth/policy/opa.test.d.ts +1 -0
  62. package/dist/auth/policy/opa.test.js +158 -0
  63. package/dist/auth/rbac.d.ts +40 -0
  64. package/dist/auth/rbac.js +120 -0
  65. package/dist/auth/rbac.test.d.ts +1 -0
  66. package/dist/auth/rbac.test.js +121 -0
  67. package/dist/auth/session.d.ts +66 -0
  68. package/dist/auth/session.js +146 -0
  69. package/dist/auth/session.test.d.ts +1 -0
  70. package/dist/auth/session.test.js +90 -0
  71. package/dist/catalog/loader.d.ts +67 -0
  72. package/dist/catalog/loader.js +122 -0
  73. package/dist/catalog/loader.test.d.ts +1 -0
  74. package/dist/catalog/loader.test.js +108 -0
  75. package/dist/connectors/kubernetes.d.ts +1 -0
  76. package/dist/connectors/kubernetes.js +12 -2
  77. package/dist/connectors/topology-vocabulary.d.ts +41 -0
  78. package/dist/connectors/topology-vocabulary.js +120 -0
  79. package/dist/connectors/topology-vocabulary.test.d.ts +1 -0
  80. package/dist/connectors/topology-vocabulary.test.js +63 -0
  81. package/dist/context.d.ts +13 -1
  82. package/dist/context.js +5 -1
  83. package/dist/index.js +1012 -29
  84. package/dist/net/egress-policy.js +2 -0
  85. package/dist/openapi.js +440 -0
  86. package/dist/openapi.test.d.ts +1 -0
  87. package/dist/openapi.test.js +64 -0
  88. package/dist/policy/redact.d.ts +44 -0
  89. package/dist/policy/redact.js +144 -0
  90. package/dist/policy/redact.test.d.ts +1 -0
  91. package/dist/policy/redact.test.js +172 -0
  92. package/dist/products/loader.d.ts +84 -0
  93. package/dist/products/loader.js +216 -0
  94. package/dist/products/loader.test.d.ts +1 -0
  95. package/dist/products/loader.test.js +168 -0
  96. package/dist/quota/limiter.d.ts +72 -0
  97. package/dist/quota/limiter.js +105 -0
  98. package/dist/quota/limiter.test.d.ts +1 -0
  99. package/dist/quota/limiter.test.js +119 -0
  100. package/dist/quota/token-budget.d.ts +119 -0
  101. package/dist/quota/token-budget.js +297 -0
  102. package/dist/quota/token-budget.test.d.ts +1 -0
  103. package/dist/quota/token-budget.test.js +215 -0
  104. package/dist/tenancy/context.d.ts +45 -0
  105. package/dist/tenancy/context.js +97 -0
  106. package/dist/tenancy/context.test.d.ts +1 -0
  107. package/dist/tenancy/context.test.js +72 -0
  108. package/dist/tenancy/migration.test.d.ts +7 -0
  109. package/dist/tenancy/migration.test.js +75 -0
  110. package/dist/ui/index.html +1454 -88
  111. package/package.json +20 -3
package/dist/index.js CHANGED
@@ -12,6 +12,22 @@ import { isTopologyProvider } from "./connectors/interface.js";
12
12
  import { defaultContext, principalContext } from "./context.js";
13
13
  import { enforceEntitledAccess, enterpriseGateStatus, enterpriseGateInfo, enterprisePolicyView, enterpriseCatalogView, enterpriseAuditTail, authorizeAdmin, updateRbacPolicy, updateCatalog, } from "./enterprise-gate.js";
14
14
  import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
15
+ import { issueSession, setCookieHeader, clearCookieHeader, generateSecret, } from "./auth/session.js";
16
+ import { readUsersFile, authenticate, } from "./auth/local-users.js";
17
+ import { buildSessionAttacher, buildRequireSession, } from "./auth/middleware.js";
18
+ import { buildRequirePermission, hasPermission, listGrantedPermissions, DEFAULT_POLICY, } from "./auth/rbac.js";
19
+ import { resolveOidcConfig, buildOidcRuntime } from "./auth/oidc/runtime.js";
20
+ import { registerOidcRoutes } from "./auth/oidc/endpoints.js";
21
+ import { BuiltinPolicyEngine } from "./auth/policy/engine.js";
22
+ import { loadPolicyFromFile, PolicyLoadError, VALID_RESOURCES, VALID_ACTIONS } from "./auth/policy/loader.js";
23
+ import { OpaPolicyEngine } from "./auth/policy/opa.js";
24
+ import { AuditLog } from "./audit/log.js";
25
+ import { buildAuditMiddleware } from "./audit/middleware.js";
26
+ import { readCatalogFile, CatalogStore } from "./catalog/loader.js";
27
+ import { readProductsFile, ProductsStore, validateProduct, writeProductsFile, ProductsLoadError } from "./products/loader.js";
28
+ import { redactValue } from "./policy/redact.js";
29
+ import { IdentityRateLimiter, resolveToolRatePerMin } from "./quota/limiter.js";
30
+ import { TokenBudget, estimateTokensFor, resolveDailyTokenLimit } from "./quota/token-budget.js";
15
31
  import { getPluginLoader } from "./connectors/loader.js";
16
32
  import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
17
33
  import { isValidConnectorName, installTarball } from "./connectors/install.js";
@@ -41,6 +57,55 @@ const SERVER_VERSION = (() => {
41
57
  return "unknown";
42
58
  }
43
59
  })();
60
+ /** Defensive read of a single query-string value. Express's
61
+ * `req.query[k]` is typed as `string | ParsedQs | (string | ParsedQs)[]`
62
+ * — a caller passing `?actor=a&actor=b` (or `?actor[]=a`) yields an
63
+ * array (or object) rather than a string, which then propagates as
64
+ * `[a,b]` into downstream filters that expect a string. This helper
65
+ * returns the first string-shaped value or undefined; arrays / nested
66
+ * objects collapse safely instead of leaking through. */
67
+ function qstr(v) {
68
+ if (typeof v === "string")
69
+ return v;
70
+ if (Array.isArray(v) && typeof v[0] === "string")
71
+ return v[0];
72
+ return undefined;
73
+ }
74
+ /** Forensic breadcrumb for redaction-bypass tool invocations.
75
+ *
76
+ * Deliberately omits the principal identifier: the credential name
77
+ * lives in OMCP_API_KEYS, and threading any derivative of it into the
78
+ * log channel re-introduces a leak surface that static analysers
79
+ * (rightly) flag. SIEM cross-correlation goes via the correlationId
80
+ * UUID — slice 2 will wire the management-plane audit chain to carry
81
+ * the same correlationId alongside the (chain-protected) principal,
82
+ * so a downstream investigator can join the two channels there.
83
+ */
84
+ function emitBypassEvent(event, ctx, args) {
85
+ console.error(JSON.stringify({
86
+ event,
87
+ ts: new Date().toISOString(),
88
+ auth: ctx.auth,
89
+ tool: "query_logs",
90
+ service: args?.service ?? null,
91
+ correlationId: ctx.correlationId,
92
+ ...(event === "redaction_bypass_denied" ? { reason: "credential_not_in_OMCP_KEY_BYPASS_REDACTION" } : {}),
93
+ }));
94
+ }
95
+ /** Bridge from the new PolicyEngine to the existing
96
+ * hasPermission/buildRequirePermission signatures (which still take
97
+ * a plain {role: Permission[]} map). Built-in engine exposes the
98
+ * raw map directly; engines that don't (slice 4's OPA) will fall
99
+ * back to a synthesized one via .list(). */
100
+ function policyEngineToMap(engine) {
101
+ if (engine instanceof BuiltinPolicyEngine)
102
+ return engine.raw();
103
+ const out = {};
104
+ for (const role of engine.roles()) {
105
+ out[role] = engine.list([role]);
106
+ }
107
+ return out;
108
+ }
44
109
  function applyConfigToRuntime(config, registry) {
45
110
  setHealthThresholds(config.healthThresholds);
46
111
  }
@@ -113,6 +178,131 @@ async function main() {
113
178
  // so we cannot share a single McpServer across HTTP sessions. Each new
114
179
  // session needs its own server. The factory captures the live registry
115
180
  // by reference so tool handlers always see the current configuration.
181
+ // Catalog enrichers for the MCP tool surface: wrap the standard
182
+ // tool-result shape ({content:[{text: json}]}) and inject .catalog
183
+ // metadata where it matches a known service name. No-op when the
184
+ // catalog is empty (the demo case) or when the payload doesn't
185
+ // parse as JSON. The HTTP `/api/services` + `/api/health` handlers
186
+ // call the loader.ts CatalogStore directly; this path mirrors that
187
+ // behaviour for MCP clients (Claude Desktop, the agent, ...).
188
+ // McpToolResult is whatever the wrapped handler returned — keep it
189
+ // untyped so we don't fight the SDK's narrow `content: [{type:"text",...}]`
190
+ // overload. We pass the value back unchanged when it doesn't parse,
191
+ // and otherwise mutate the parsed JSON before re-stringifying into a
192
+ // fresh wrapper that mirrors the handler's own shape.
193
+ function enrichToolServicesText(result, ctx) {
194
+ try {
195
+ const parsed = JSON.parse(result.content[0]?.text ?? "{}");
196
+ if (parsed && Array.isArray(parsed.services)) {
197
+ for (const s of parsed.services) {
198
+ // Scope enrichment to the caller's tenant so we don't
199
+ // leak owner / on-call / SLO bytes for other tenants'
200
+ // services that happen to share a name in the catalog.
201
+ const entry = typeof s?.name === "string" ? catalog.get(s.name, ctx.tenant) : undefined;
202
+ if (entry)
203
+ s.catalog = entry;
204
+ }
205
+ }
206
+ const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
207
+ return clone;
208
+ }
209
+ catch {
210
+ return result;
211
+ }
212
+ }
213
+ // Apply PII / secret redaction to a tool result's text payload. No-op
214
+ // when OMCP_REDACTION=off. Adds a top-level `_redacted` field with
215
+ // the per-category counts so the agent (and the human) sees a hint
216
+ // like `{ email: 4, ipv4: 2, totalMatches: 6 }` instead of silently
217
+ // losing data.
218
+ /** Charge the estimated tokens in a tool response against the
219
+ * per-identity daily budget. When the budget would be exceeded,
220
+ * replace the response with a structured error payload —
221
+ * the tool's data never crosses the boundary, and the agent
222
+ * sees a parseable {error: "OMCP_TOKEN_BUDGET_EXCEEDED", ...}
223
+ * rather than a generic failure. Anonymous principals are not
224
+ * charged (the budget is per-credential).
225
+ *
226
+ * This charges RETROACTIVELY: the tool body has already executed,
227
+ * so the work is done by the time we decide to deny — the call
228
+ * that flips the bucket over the cap still pays the cost; the
229
+ * N+1 call denies before doing work. Pre-flight denial would
230
+ * require predicting response size before the connector runs,
231
+ * which isn't tractable for query_logs / query_metrics where
232
+ * size is data-dependent. The trade-off is intentional: one
233
+ * over-cap call per bucket roll vs an unhelpful "request denied,
234
+ * size unknown" upstream. */
235
+ function chargeTokenBudget(result, ctx, toolName) {
236
+ if (ctx.auth !== "apikey")
237
+ return result;
238
+ const text = result.content[0]?.text ?? "";
239
+ const tokens = estimateTokensFor(text);
240
+ const decision = tokenBudget.check(identityKey(ctx), tokens);
241
+ if (decision.allowed || decision.limit === 0)
242
+ return result;
243
+ // A single request larger than the entire daily cap can never
244
+ // succeed by waiting — surface a distinct error code so the
245
+ // agent doesn't loop. Otherwise the wait-then-retry path is the
246
+ // right answer (and freedAtRetry tells the agent how much they
247
+ // can request after the wait).
248
+ const requestExceedsCap = tokens > decision.limit;
249
+ const errBody = {
250
+ error: requestExceedsCap ? "OMCP_TOKEN_REQUEST_EXCEEDS_BUDGET" : "OMCP_TOKEN_BUDGET_EXCEEDED",
251
+ tool: toolName,
252
+ used: decision.used,
253
+ limit: decision.limit,
254
+ requested: tokens,
255
+ retryAfterSeconds: requestExceedsCap ? 0 : decision.retryAfterSeconds,
256
+ freedAtRetry: decision.freedAtRetry,
257
+ message: requestExceedsCap
258
+ ? `This single response (~${tokens} tokens) is larger than the entire daily budget (${decision.limit}). Retrying won't help — narrow the query (smaller window / lower limit / more selective filter) or raise OMCP_TOOL_DAILY_TOKENS.`
259
+ : `Daily token budget exceeded (${decision.used}/${decision.limit} tokens used in the trailing 24h; this call would have added ~${tokens}). Try again in ~${Math.ceil(decision.retryAfterSeconds / 3600)}h or raise OMCP_TOOL_DAILY_TOKENS.`,
260
+ };
261
+ // Preserve any additional content entries (e.g. a future
262
+ // tool returning [text, image]) — only the text payload of the
263
+ // first entry is replaced with the error JSON; everything after
264
+ // it passes through.
265
+ return {
266
+ ...result,
267
+ content: [
268
+ { ...result.content[0], text: JSON.stringify(errBody) },
269
+ ...result.content.slice(1),
270
+ ],
271
+ };
272
+ }
273
+ const REDACTION_ENABLED = String(process.env.OMCP_REDACTION ?? "on").toLowerCase() !== "off";
274
+ function redactToolText(result, opts = {}) {
275
+ if (!REDACTION_ENABLED)
276
+ return result;
277
+ if (opts.bypass)
278
+ return result;
279
+ try {
280
+ const parsed = JSON.parse(result.content[0]?.text ?? "{}");
281
+ const r = redactValue(parsed);
282
+ const redacted = r.value;
283
+ if (r.totalMatches > 0 && redacted && typeof redacted === "object") {
284
+ redacted._redacted = { ...r.matches, totalMatches: r.totalMatches };
285
+ }
286
+ const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(redacted) } : c) };
287
+ return clone;
288
+ }
289
+ catch {
290
+ return result;
291
+ }
292
+ }
293
+ function enrichToolHealthText(result, serviceName, ctx) {
294
+ try {
295
+ const parsed = JSON.parse(result.content[0]?.text ?? "{}");
296
+ const entry = serviceName ? catalog.get(serviceName, ctx.tenant) : undefined;
297
+ if (entry && parsed && typeof parsed === "object")
298
+ parsed.catalog = entry;
299
+ const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
300
+ return clone;
301
+ }
302
+ catch {
303
+ return result;
304
+ }
305
+ }
116
306
  function createMcpServer(ctx) {
117
307
  const mcpServer = new McpServer({
118
308
  name: "observability-mcp",
@@ -140,7 +330,8 @@ async function main() {
140
330
  .describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
141
331
  }, async (args) => {
142
332
  await enforceEntitledAccess(ctx, { tool: "list_services" });
143
- return withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
333
+ const result = await withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
334
+ return enrichToolServicesText(result, ctx);
144
335
  });
145
336
  const metricsList = getAvailableMetricNames(registry);
146
337
  const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
@@ -172,7 +363,8 @@ async function main() {
172
363
  .describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
173
364
  }, async (args) => {
174
365
  await enforceEntitledAccess(ctx, { tool: "query_metrics", source: args?.source, service: args?.service });
175
- return withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
366
+ const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
367
+ return chargeTokenBudget(result, ctx, "query_metrics");
176
368
  });
177
369
  mcpServer.tool("query_logs", [
178
370
  "Fetch recent log entries for ONE service over a look-back window, with a pre-computed summary (error/warning counts and the most frequent error patterns).",
@@ -201,9 +393,48 @@ async function main() {
201
393
  .positive()
202
394
  .optional()
203
395
  .describe("Optional. Maximum number of log entries to return (most recent first). Default: 100."),
396
+ bypass_redaction: z
397
+ .boolean()
398
+ .optional()
399
+ .describe("Optional. When true, request that PII/secret redaction be skipped for this single call. The server only honours this when the calling credential was explicitly authorised via OMCP_KEY_BYPASS_REDACTION; otherwise the request still gets redacted output. Default: false."),
204
400
  }, async (args) => {
205
401
  await enforceEntitledAccess(ctx, { tool: "query_logs", source: args?.source, service: args?.service });
206
- return withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
402
+ const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
403
+ // Redact PII / secrets from the log payload before it crosses the
404
+ // MCP boundary into the agent's context. Per-call bypass kicks in
405
+ // only when BOTH (a) the credential is OMCP_KEY_BYPASS_REDACTION
406
+ // allow-listed, AND (b) the agent explicitly opted in via the
407
+ // bypass_redaction arg. Either alone keeps redaction on, so
408
+ // configuration-only and arg-only paths both fail closed.
409
+ const wantsBypass = args?.bypass_redaction === true;
410
+ const allowed = ctx.allowBypassRedaction === true;
411
+ const bypass = wantsBypass && allowed;
412
+ if (bypass || (wantsBypass && !allowed)) {
413
+ // Forensic trail:
414
+ // 1. stderr breadcrumb for SIEM tail-and-forward setups (the
415
+ // log channel keeps no identifying credential reference
416
+ // to avoid CodeQL taint findings — correlation goes via
417
+ // the audit chain entry below).
418
+ // 2. management-plane audit chain entry so the bypass
419
+ // invocation is tamper-evident alongside the rest of
420
+ // /api/*. Persists if OMCP_MGMT_AUDIT_FILE is set.
421
+ emitBypassEvent(bypass ? "redaction_bypass_engaged" : "redaction_bypass_denied", ctx, args);
422
+ void mgmtAudit.record({
423
+ actor: { sub: ctx.principalId },
424
+ tenant: ctx.tenant,
425
+ resource: "redaction",
426
+ action: "bypass",
427
+ method: "MCP",
428
+ path: "/mcp/query_logs",
429
+ status: bypass ? 200 : 403,
430
+ target: args?.service ?? undefined,
431
+ }).catch(() => {
432
+ // Audit record is best-effort — losing one entry must not
433
+ // crash the tool call. The chain itself remains intact.
434
+ });
435
+ }
436
+ const redacted = redactToolText(result, { bypass });
437
+ return chargeTokenBudget(redacted, ctx, "query_logs");
207
438
  });
208
439
  mcpServer.tool("get_service_health", [
209
440
  "Produce a single aggregated health verdict for ONE service by combining its metrics and logs.",
@@ -216,7 +447,9 @@ async function main() {
216
447
  .describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
217
448
  }, async (args) => {
218
449
  await enforceEntitledAccess(ctx, { tool: "get_service_health", service: args?.service });
219
- return withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
450
+ const result = await withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
451
+ const enriched = enrichToolHealthText(result, String(args?.service ?? ""), ctx);
452
+ return chargeTokenBudget(enriched, ctx, "get_service_health");
220
453
  });
221
454
  mcpServer.tool("detect_anomalies", [
222
455
  "Scan one or all monitored services for abnormal behavior and return the findings ranked by severity.",
@@ -284,8 +517,113 @@ async function main() {
284
517
  });
285
518
  return mcpServer;
286
519
  }
520
+ // --- Management-plane auth (basic mode) -----------------------------------
521
+ // Off by default. Enable with `OMCP_AUTH=basic` + `OMCP_USERS_FILE` and
522
+ // optionally `OMCP_SESSION_SECRET`. When the secret is omitted in basic
523
+ // mode the server generates one for the process lifetime — sessions
524
+ // won't survive a restart and a warning is logged. See docs/auth-basic.md.
525
+ //
526
+ // SECURITY DEFAULT: misconfiguration in basic mode is fail-CLOSED — the
527
+ // process exits with a non-zero status rather than silently degrading
528
+ // to anonymous. Set `OMCP_AUTH_ALLOW_FALLBACK=true` to opt back into
529
+ // the old fall-back-to-anonymous behaviour (only sensible for the
530
+ // throwaway-demo case where ops can immediately see the boot log).
531
+ const requestedAuthMode = String(process.env.OMCP_AUTH ?? "anonymous").toLowerCase();
532
+ const allowFallback = String(process.env.OMCP_AUTH_ALLOW_FALLBACK ?? "false").toLowerCase() === "true";
533
+ function authMisconfig(reason) {
534
+ if (allowFallback) {
535
+ console.error(`[auth] ${reason} — OMCP_AUTH_ALLOW_FALLBACK=true → falling back to anonymous`);
536
+ return;
537
+ }
538
+ console.error(`[auth] ${reason} — refusing to start (set OMCP_AUTH_ALLOW_FALLBACK=true to override)`);
539
+ process.exit(1);
540
+ }
541
+ let authMode = "anonymous";
542
+ let sessionCfg;
543
+ let usersStore = null;
544
+ let secretEphemeral = false;
545
+ let oidcRuntime;
546
+ if (requestedAuthMode === "basic") {
547
+ const usersPath = process.env.OMCP_USERS_FILE;
548
+ if (!usersPath) {
549
+ authMisconfig("OMCP_AUTH=basic requires OMCP_USERS_FILE");
550
+ }
551
+ else {
552
+ usersStore = await readUsersFile(usersPath);
553
+ if (!usersStore) {
554
+ authMisconfig(`OMCP_USERS_FILE=${usersPath} unreadable or malformed`);
555
+ usersStore = null;
556
+ }
557
+ else if (usersStore.users.length === 0) {
558
+ authMisconfig(`OMCP_USERS_FILE=${usersPath} has no users`);
559
+ usersStore = null;
560
+ }
561
+ else {
562
+ let secret = process.env.OMCP_SESSION_SECRET;
563
+ if (!secret || secret.length < 32) {
564
+ secret = generateSecret();
565
+ secretEphemeral = true;
566
+ console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars). Generated an ephemeral secret — " +
567
+ "sessions will be invalidated on restart. Set OMCP_SESSION_SECRET to a stable value in production.");
568
+ }
569
+ sessionCfg = { secret };
570
+ authMode = "basic";
571
+ console.log(`[auth] basic mode active — ${usersStore.users.length} user(s) loaded`);
572
+ }
573
+ }
574
+ }
575
+ else if (requestedAuthMode === "oidc") {
576
+ const r = resolveOidcConfig(process.env);
577
+ if (r.error || !r.config) {
578
+ authMisconfig(r.error ?? "OIDC misconfigured");
579
+ }
580
+ else {
581
+ let secret = process.env.OMCP_SESSION_SECRET;
582
+ if (!secret || secret.length < 32) {
583
+ secret = generateSecret();
584
+ secretEphemeral = true;
585
+ console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars) in OIDC mode. " +
586
+ "Generated an ephemeral secret — sessions and OIDC state cookies " +
587
+ "will be invalidated on restart. Set OMCP_SESSION_SECRET in production.");
588
+ }
589
+ sessionCfg = { secret };
590
+ authMode = "oidc";
591
+ oidcRuntime = buildOidcRuntime(r.config);
592
+ console.log(`[auth] OIDC mode active — issuer=${r.config.issuer} clientId=${r.config.clientId} rolesClaim=${r.config.rolesClaim} mappedRoles=${Object.keys(r.config.roleMap).length}`);
593
+ }
594
+ }
595
+ else if (requestedAuthMode !== "anonymous") {
596
+ authMisconfig(`unknown OMCP_AUTH=${requestedAuthMode}`);
597
+ }
598
+ const authRuntime = { mode: authMode, session: sessionCfg, secretEphemeral, oidc: oidcRuntime };
287
599
  // --- HTTP server ---
288
600
  const app = express();
601
+ // Trust-proxy: when set, Express will read req.ip / req.secure from
602
+ // X-Forwarded-For + X-Forwarded-Proto. OFF by default — forging those
603
+ // headers behind a misconfigured deployment is the kind of mistake
604
+ // that gives every audit entry the same client IP. Set
605
+ // `OMCP_TRUST_PROXY` to:
606
+ // "true" — trust every hop (Express default-on shape)
607
+ // "loopback" — trust 127.0.0.1 / ::1 only (sensible default
608
+ // when running behind a same-host nginx)
609
+ // "<n>" — trust the last <n> hops
610
+ // "<ip>,<ip>" — explicit list (single value or comma-separated)
611
+ // Any falsy / unset value leaves it OFF so req.ip stays the raw
612
+ // socket address.
613
+ const trustProxy = process.env.OMCP_TRUST_PROXY;
614
+ if (trustProxy && trustProxy !== "false") {
615
+ if (trustProxy === "true") {
616
+ app.set("trust proxy", true);
617
+ }
618
+ else if (/^\d+$/.test(trustProxy)) {
619
+ app.set("trust proxy", parseInt(trustProxy, 10));
620
+ }
621
+ else {
622
+ // string or comma-separated IPs / "loopback" / etc — let Express's
623
+ // parser handle the lookup (it accepts any of the above forms).
624
+ app.set("trust proxy", trustProxy);
625
+ }
626
+ }
289
627
  app.use(express.json({ limit: "1mb" }));
290
628
  // Security headers
291
629
  app.use((req, res, next) => {
@@ -317,6 +655,130 @@ async function main() {
317
655
  });
318
656
  next();
319
657
  });
658
+ // Broad rate-limit on the whole management-plane surface. Generous
659
+ // enough to leave a polling UI plenty of headroom (300/min per IP),
660
+ // tight enough to stop unauthenticated brute-force walks of /api/*
661
+ // (and to keep CodeQL's missing-rate-limiting rule satisfied for
662
+ // every downstream route).
663
+ app.use("/api", rateLimit({
664
+ windowMs: 60_000,
665
+ max: 300,
666
+ standardHeaders: true,
667
+ legacyHeaders: false,
668
+ message: { error: "rate limited" },
669
+ }));
670
+ // Management-plane auth: attach the session payload to every request
671
+ // (no decision logic here — anonymous mode is a no-op). The gate is
672
+ // mounted explicitly on each protected route prefix further down so
673
+ // there is no string-match-based "is this public?" branch anywhere.
674
+ app.use(buildSessionAttacher(authRuntime));
675
+ const requireSession = buildRequireSession(authRuntime);
676
+ // Active policy engine — built-in DEFAULT_POLICY by default. When
677
+ // OMCP_RBAC_POLICY_FILE is set we load it and ALWAYS abort on
678
+ // failure: OMCP_AUTH_ALLOW_FALLBACK is for *auth-mode* fallback
679
+ // (basic → anonymous), not for the policy file. An operator who
680
+ // deployed a restrictive policy to TIGHTEN the default would be
681
+ // worse off silently inheriting the broader built-in
682
+ // (DEFAULT_POLICY grants admin → redaction:bypass) than crashing
683
+ // with a clear error. Policy file errors are unconditionally
684
+ // fatal so the configured intent always wins.
685
+ let policyEngine = new BuiltinPolicyEngine(DEFAULT_POLICY);
686
+ const policyFile = process.env.OMCP_RBAC_POLICY_FILE?.trim();
687
+ const opaUrl = process.env.OMCP_OPA_URL?.trim();
688
+ // OPA takes precedence over a file: an operator who wired both
689
+ // probably wants OPA as the live engine and uses the file as a
690
+ // local fallback only via OMCP_POLICY_ENGINE=builtin.
691
+ const enginePref = (process.env.OMCP_POLICY_ENGINE || "").toLowerCase();
692
+ if (opaUrl && enginePref !== "builtin") {
693
+ const declared = (process.env.OMCP_OPA_ROLES || "").split(",").map((s) => s.trim()).filter(Boolean);
694
+ policyEngine = new OpaPolicyEngine({
695
+ url: opaUrl,
696
+ packagePath: process.env.OMCP_OPA_PACKAGE || "observability/authz",
697
+ declaredRoles: declared.length > 0 ? declared : undefined,
698
+ bearerToken: process.env.OMCP_OPA_TOKEN || undefined,
699
+ });
700
+ console.log(`[auth] RBAC policy engine = OPA at ${opaUrl} (package ${process.env.OMCP_OPA_PACKAGE || "observability/authz"})`);
701
+ // Pre-warm: the sync RBAC gate denies on a cache miss while the
702
+ // first async OPA call is in flight. Hit every (role, resource,
703
+ // action) combination from the declared role set so the very
704
+ // first user request gets a real decision instead of a warming-
705
+ // deny. With 3 roles × 10 resources × 4 actions = 120 calls,
706
+ // OPA handles this in <1s and we keep it best-effort (any
707
+ // failure surfaces in the OPA logs, the engine retries on the
708
+ // first user-facing call anyway).
709
+ const opaEngine = policyEngine;
710
+ void (async () => {
711
+ const roles = opaEngine.roles();
712
+ if (roles.length === 0)
713
+ return;
714
+ const resources = [...VALID_RESOURCES];
715
+ const actions = [...VALID_ACTIONS];
716
+ const tasks = [];
717
+ for (const role of roles) {
718
+ for (const resource of resources)
719
+ for (const action of actions) {
720
+ tasks.push(opaEngine.warmEvaluate([role], resource, action));
721
+ }
722
+ tasks.push(opaEngine.warmList([role]));
723
+ }
724
+ try {
725
+ const settled = await Promise.allSettled(tasks);
726
+ const failed = settled.filter((s) => s.status === "rejected").length;
727
+ if (failed === 0) {
728
+ console.log(`[auth] OPA cache pre-warmed: ${settled.length} decisions cached for ${roles.length} role(s)`);
729
+ }
730
+ else {
731
+ console.warn(`[auth] OPA cache pre-warmed: ${settled.length - failed}/${settled.length} ok, ${failed} failed (gates will retry on first user call)`);
732
+ }
733
+ }
734
+ catch { /* best-effort */ }
735
+ })();
736
+ }
737
+ else if (policyFile) {
738
+ try {
739
+ policyEngine = loadPolicyFromFile(policyFile);
740
+ console.log(`[auth] RBAC policy loaded from ${policyFile} (${policyEngine.roles().join(", ")})`);
741
+ }
742
+ catch (e) {
743
+ const reason = e instanceof PolicyLoadError ? e.message : String(e);
744
+ console.error(`[auth] OMCP_RBAC_POLICY_FILE=${policyFile}: ${reason} — refusing to start (a malformed policy file would silently revert to the more permissive built-in default, defeating the point of the override)`);
745
+ process.exit(1);
746
+ }
747
+ }
748
+ const need = (resource, action) => buildRequirePermission(authRuntime, resource, action, policyEngineToMap(policyEngine));
749
+ // Management-plane audit log. Records one entry per mutating /api/*
750
+ // request. Writes JSONL to disk when OMCP_MGMT_AUDIT_FILE is set;
751
+ // otherwise an in-memory ring of the last 500 entries keeps the
752
+ // /api/audit endpoint useful in the demo / single-user case.
753
+ const mgmtAudit = new AuditLog({ file: process.env.OMCP_MGMT_AUDIT_FILE });
754
+ await mgmtAudit.bootstrap();
755
+ const audit = (resource, action) => buildAuditMiddleware({ audit: mgmtAudit, resource, action });
756
+ // Service catalog: optional operator-curated ownership / criticality /
757
+ // on-call metadata, keyed on the service name list_services returns.
758
+ // No file ⇒ empty catalog, enrichment is a no-op (anonymous demos
759
+ // see no behaviour change).
760
+ const catalog = new CatalogStore(await readCatalogFile(process.env.OMCP_SERVICE_CATALOG_FILE));
761
+ const products = new ProductsStore(await readProductsFile(process.env.OMCP_PRODUCTS_FILE));
762
+ // Protected route prefixes. /api/me, /api/auth/*, /api/info,
763
+ // /api/openapi.json deliberately don't appear here — they stay public.
764
+ for (const prefix of [
765
+ "/api/sources",
766
+ "/api/source-types",
767
+ "/api/services",
768
+ "/api/health",
769
+ "/api/health-thresholds",
770
+ "/api/topology",
771
+ "/api/settings",
772
+ "/api/connectors",
773
+ "/api/enterprise",
774
+ "/api/hub",
775
+ "/api/audit",
776
+ "/api/usage",
777
+ "/api/catalog",
778
+ "/api/policy",
779
+ ]) {
780
+ app.use(prefix, requireSession);
781
+ }
320
782
  // k8s-convention liveness/readiness probes at the root of the path
321
783
  // tree, no /api prefix. Helm chart points its probes here. Cheap
322
784
  // enough to skip the request-counter middleware.
@@ -387,6 +849,23 @@ async function main() {
387
849
  platform: process.platform,
388
850
  arch: process.arch,
389
851
  },
852
+ // Governance posture — surfaces the active management-plane
853
+ // configuration so external dashboards / discovery probes don't
854
+ // need a session to learn the deployment shape. Booleans only;
855
+ // file paths and the session secret stay private.
856
+ governance: {
857
+ authMode: authRuntime.mode,
858
+ authSecretEphemeral: !!authRuntime.secretEphemeral,
859
+ // OIDC issuer (URL only — never the client_secret) is the
860
+ // single piece of state external discovery needs to know
861
+ // *where* the IdP lives. Empty string when mode != "oidc".
862
+ oidcIssuer: oidcRuntime?.cfg.issuer ?? "",
863
+ auditPersisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
864
+ catalogConfigured: catalog.count() > 0 || !!process.env.OMCP_SERVICE_CATALOG_FILE,
865
+ redaction: REDACTION_ENABLED,
866
+ trustProxy: !!(process.env.OMCP_TRUST_PROXY && process.env.OMCP_TRUST_PROXY !== "false"),
867
+ toolRatePerMin: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
868
+ },
390
869
  plugins: loader.list().map((p) => ({
391
870
  name: p.name,
392
871
  source: p.source,
@@ -395,6 +874,269 @@ async function main() {
395
874
  })),
396
875
  });
397
876
  });
877
+ // Same per-IP cap for /api/me and the auth endpoints — the UI polls
878
+ // this on every page load to decide whether to show the login modal,
879
+ // so a 20/min limit per IP is generous for humans and tight for
880
+ // scripted abuse.
881
+ const authReadRateLimit = rateLimit({
882
+ windowMs: 60_000,
883
+ max: 60,
884
+ standardHeaders: true,
885
+ legacyHeaders: false,
886
+ message: { error: "rate limited" },
887
+ });
888
+ // Current identity for the management plane. Always public so the UI
889
+ // can decide whether to show a login modal even before sending its
890
+ // first authenticated request.
891
+ app.get("/api/me", authReadRateLimit, (req, res) => {
892
+ if (authRuntime.mode === "anonymous") {
893
+ res.json({ authenticated: false, mode: "anonymous" });
894
+ return;
895
+ }
896
+ const sess = req.session;
897
+ if (!sess) {
898
+ res.json({ authenticated: false, mode: authRuntime.mode });
899
+ return;
900
+ }
901
+ res.json({
902
+ authenticated: true,
903
+ mode: authRuntime.mode,
904
+ user: {
905
+ sub: sess.sub,
906
+ name: sess.name,
907
+ email: sess.email,
908
+ tenant: sess.tenant || "default",
909
+ roles: sess.roles ?? [],
910
+ },
911
+ permissions: listGrantedPermissions(sess.roles, policyEngineToMap(policyEngine)),
912
+ exp: sess.exp,
913
+ // When the user signed in via OIDC, surface the IdP issuer
914
+ // URL so the UI can render an appropriate badge or link to
915
+ // an IdP-side profile page. Empty / absent in basic mode.
916
+ idpIssuer: authRuntime.mode === "oidc" ? (oidcRuntime?.cfg.issuer ?? "") : undefined,
917
+ });
918
+ });
919
+ // --- /api/policy — read-only view of the RBAC policy in effect -------
920
+ // Useful when an operator is debugging "why did role X get a 403" and
921
+ // doesn't have a checkout to read DEFAULT_POLICY from source. Gated
922
+ // by admin-only delete-on-users so the policy schema isn't visible
923
+ // to non-admin sessions.
924
+ app.get("/api/policy", need("users", "delete"), (req, res) => {
925
+ const map = policyEngineToMap(policyEngine);
926
+ // Optional dry-run: ?roles=admin,operator&resource=sources&action=delete
927
+ // returns { allowed, reason } so operators can probe the active
928
+ // engine without writing tests against a checkout.
929
+ const q = req.query;
930
+ if (q.resource && q.action) {
931
+ const dryRoles = typeof q.roles === "string" ? q.roles.split(",").map((r) => r.trim()).filter(Boolean) : undefined;
932
+ // Validate the probe values against the active vocabulary so
933
+ // an operator typo doesn't get a misleading "allowed:false
934
+ // reason: roles do not grant <typo>" reply.
935
+ if (!VALID_RESOURCES.has(q.resource)) {
936
+ res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown resource '${q.resource}' (valid: ${[...VALID_RESOURCES].join(", ")})` } });
937
+ return;
938
+ }
939
+ if (!VALID_ACTIONS.has(q.action)) {
940
+ res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown action '${q.action}' (valid: ${[...VALID_ACTIONS].join(", ")})` } });
941
+ return;
942
+ }
943
+ const result = policyEngine.evaluate(dryRoles, q.resource, q.action);
944
+ res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, ...result } });
945
+ return;
946
+ }
947
+ res.json({
948
+ engine: policyEngine.kind(),
949
+ policy: map,
950
+ roles: policyEngine.roles(),
951
+ note: policyEngine.kind() === "builtin"
952
+ ? "DEFAULT_POLICY shipped with this build. Set OMCP_RBAC_POLICY_FILE to override."
953
+ : `policy loaded from ${policyEngine.kind()}; restart to reload.`,
954
+ });
955
+ });
956
+ // --- /api/audit — management-plane audit feed -------------------------
957
+ // Read-only, gated by the "audit:read" permission so only viewers /
958
+ // operators / admins (basically anyone authenticated in the default
959
+ // policy) can pull it. Supports optional ?from, ?to (RFC-3339), ?actor,
960
+ // ?action, ?limit (default 100, capped to ring size).
961
+ app.get("/api/audit", need("audit", "read"), (req, res) => {
962
+ // Tenant scoping: a non-admin caller (no `users:delete`) sees
963
+ // only their own tenant's entries. Admins see everything by
964
+ // default but can ?tenant=acme to filter. This avoids leaking
965
+ // other tenants' actor / target / path bytes through the audit
966
+ // surface — the chain-hash protected ground truth is still
967
+ // process-wide; the API view is per-tenant.
968
+ const sess = req.session;
969
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
970
+ const callerTenant = sess?.tenant || "default";
971
+ const requestedTenant = qstr(req.query.tenant);
972
+ const tenantFilter = isAdmin ? requestedTenant : callerTenant;
973
+ const entries = mgmtAudit.list({
974
+ from: qstr(req.query.from),
975
+ to: qstr(req.query.to),
976
+ actor: qstr(req.query.actor),
977
+ action: qstr(req.query.action),
978
+ tenant: tenantFilter || undefined,
979
+ limit: qstr(req.query.limit) ? parseInt(qstr(req.query.limit), 10) : undefined,
980
+ });
981
+ res.json({
982
+ entries,
983
+ tipHash: mgmtAudit.tipHash,
984
+ persisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
985
+ // Tell the UI which tenant scope the view is currently showing
986
+ // so a cross-tenant admin sees an explicit "(all tenants)" hint.
987
+ scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
988
+ });
989
+ });
990
+ // --- /api/usage — per-identity MCP rate-limit snapshot -----------------
991
+ // Read-only view of the IdentityRateLimiter's bucket state. Gated by
992
+ // need("audit","read") — the same role set that already sees the
993
+ // audit log can see who is calling what. Anonymous /mcp traffic
994
+ // never enters a bucket so it doesn't show up here.
995
+ app.get("/api/usage", need("audit", "read"), (req, res) => {
996
+ const sess = req.session;
997
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
998
+ const callerTenant = sess?.tenant || "default";
999
+ const requestedTenant = qstr(req.query.tenant);
1000
+ const tenantFilter = isAdmin ? requestedTenant : callerTenant;
1001
+ const actorFilter = qstr(req.query.actor);
1002
+ // Union of identities known to either tracker. The tracker keys
1003
+ // are composite "<tenant> <name>"; we split them back out for the
1004
+ // response shape so the UI sees clean tenant + actor columns.
1005
+ const idSet = new Set([
1006
+ ...toolRateLimiter.knownIdentities(),
1007
+ ...tokenBudget.knownIdentities(),
1008
+ ]);
1009
+ const now = Date.now();
1010
+ const identities = [...idSet]
1011
+ .map((id) => {
1012
+ const split = splitIdentityKey(id);
1013
+ if (tenantFilter && split.tenant !== tenantFilter)
1014
+ return null;
1015
+ if (actorFilter && split.actor !== actorFilter)
1016
+ return null;
1017
+ const r = toolRateLimiter.inspect(id, now);
1018
+ const b = tokenBudget.inspect(id, now);
1019
+ return {
1020
+ actor: split.actor,
1021
+ tenant: split.tenant,
1022
+ count: r.count,
1023
+ limit: r.limit,
1024
+ windowMs: r.windowMs,
1025
+ tokens: { used: b.used, limit: b.limit, windowMs: b.windowMs },
1026
+ };
1027
+ })
1028
+ .filter((x) => x !== null);
1029
+ res.json({
1030
+ identities,
1031
+ defaultLimit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
1032
+ windowMs: 60_000,
1033
+ tokens: {
1034
+ defaultLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
1035
+ windowMs: 24 * 60 * 60 * 1000,
1036
+ },
1037
+ // Same scoping breadcrumb /api/audit returns: which tenant
1038
+ // window the response is showing. null = "all tenants" (admin).
1039
+ scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
1040
+ });
1041
+ });
1042
+ // --- /api/auth/* — login + logout for basic mode -----------------------
1043
+ // Login: POST { username, password } → 200 + Set-Cookie on success, 401
1044
+ // on bad creds, 400 on missing fields, 503 in anonymous mode (the UI
1045
+ // shouldn't have rendered the modal at all in that case but we still
1046
+ // answer cleanly). Logout: POST → 204 + clears the cookie.
1047
+ const loginRateLimit = rateLimit({
1048
+ windowMs: 60_000,
1049
+ max: 20,
1050
+ standardHeaders: true,
1051
+ legacyHeaders: false,
1052
+ message: { error: "too many login attempts, slow down" },
1053
+ });
1054
+ // Cached users-file mtime — on every login we stat the file and
1055
+ // re-read when it's changed since the last check. Adding/removing
1056
+ // a user therefore takes effect on the next login attempt, no server
1057
+ // restart required. Cheap path: a single stat() per attempt; the
1058
+ // rate limit caps that at 20/min/IP anyway.
1059
+ let lastUsersMtimeMs = null;
1060
+ async function maybeReloadUsers() {
1061
+ const path = process.env.OMCP_USERS_FILE;
1062
+ if (!path)
1063
+ return;
1064
+ try {
1065
+ const { stat } = await import("node:fs/promises");
1066
+ const st = await stat(path);
1067
+ const mtime = st.mtimeMs;
1068
+ if (lastUsersMtimeMs === null || mtime !== lastUsersMtimeMs) {
1069
+ const fresh = await readUsersFile(path);
1070
+ if (fresh && fresh.users.length > 0) {
1071
+ usersStore = fresh;
1072
+ lastUsersMtimeMs = mtime;
1073
+ if (lastUsersMtimeMs !== null) {
1074
+ console.log(`[auth] OMCP_USERS_FILE changed — reloaded ${fresh.users.length} user(s)`);
1075
+ }
1076
+ }
1077
+ }
1078
+ }
1079
+ catch {
1080
+ // File transiently unreadable — keep the cached store; logins
1081
+ // will continue to work with the last known set.
1082
+ }
1083
+ }
1084
+ // Prime the cache so the first login doesn't log "changed" on every boot.
1085
+ if (authRuntime.mode === "basic") {
1086
+ const path = process.env.OMCP_USERS_FILE;
1087
+ if (path) {
1088
+ try {
1089
+ const { statSync } = await import("node:fs");
1090
+ lastUsersMtimeMs = statSync(path).mtimeMs;
1091
+ }
1092
+ catch { /* ignore — first login will pick it up */ }
1093
+ }
1094
+ }
1095
+ app.post("/api/auth/login", loginRateLimit, async (req, res) => {
1096
+ if (authRuntime.mode !== "basic" || !sessionCfg || !usersStore) {
1097
+ res.status(503).json({ error: "auth mode does not accept logins" });
1098
+ return;
1099
+ }
1100
+ await maybeReloadUsers();
1101
+ const body = (req.body || {});
1102
+ const username = typeof body.username === "string" ? body.username.trim() : "";
1103
+ const password = typeof body.password === "string" ? body.password : "";
1104
+ if (!username || !password) {
1105
+ res.status(400).json({ error: "username and password are required" });
1106
+ return;
1107
+ }
1108
+ const user = authenticate(username, password, usersStore);
1109
+ if (!user) {
1110
+ res.status(401).json({ error: "invalid credentials" });
1111
+ return;
1112
+ }
1113
+ const { cookie } = issueSession({ sub: user.username, name: user.name, roles: user.roles, tenant: user.tenant }, sessionCfg);
1114
+ const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
1115
+ res.setHeader("Set-Cookie", setCookieHeader(cookie, sessionCfg, { secure }));
1116
+ res.json({
1117
+ ok: true,
1118
+ user: { sub: user.username, name: user.name, roles: user.roles ?? [] },
1119
+ });
1120
+ });
1121
+ // Same per-IP cap as login — defends against logout-as-disruption
1122
+ // (an attacker spamming logouts at a forged session for another tab).
1123
+ app.post("/api/auth/logout", loginRateLimit, (req, res) => {
1124
+ if (authRuntime.mode === "anonymous" || !sessionCfg) {
1125
+ res.status(204).end();
1126
+ return;
1127
+ }
1128
+ const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
1129
+ res.setHeader("Set-Cookie", clearCookieHeader(sessionCfg, { secure }));
1130
+ res.status(204).end();
1131
+ });
1132
+ // OIDC code-flow endpoints (login redirect, callback, logout) — only
1133
+ // mounted when OMCP_AUTH=oidc resolved cleanly. registerOidcRoutes is
1134
+ // a no-op at the type level when oidcRuntime is undefined; we guard
1135
+ // here so we don't even define the routes in basic/anonymous mode.
1136
+ if (authRuntime.mode === "oidc" && oidcRuntime && sessionCfg) {
1137
+ registerOidcRoutes(app, { sessionCfg, oidc: oidcRuntime });
1138
+ console.log("[auth] OIDC endpoints registered: /api/auth/oidc/{login,callback,logout}");
1139
+ }
398
1140
  // Connectors currently loaded into this server (builtin + filesystem
399
1141
  // plugins), with manifest metadata — drives the UI "Connectors" page.
400
1142
  app.get("/api/connectors", (_req, res) => {
@@ -477,7 +1219,7 @@ async function main() {
477
1219
  // Only catalog tarballUrls are fetched (no arbitrary URL in the body)
478
1220
  // to avoid SSRF. The connector persists to PLUGINS_DIR (back it with
479
1221
  // a PVC on k8s so it survives restarts).
480
- app.post("/api/connectors/install", installRateLimit, async (req, res) => {
1222
+ app.post("/api/connectors/install", installRateLimit, need("connectors", "write"), audit("connectors", "write"), async (req, res) => {
481
1223
  if (process.env.ENABLE_UI_INSTALL !== "true") {
482
1224
  return res.status(403).json({
483
1225
  error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
@@ -546,7 +1288,7 @@ async function main() {
546
1288
  // ALWAYS verified against PLUGIN_TRUST_ROOT (signature + integrity),
547
1289
  // so an unsigned/tampered bundle is rejected. Body is the raw tarball
548
1290
  // bytes (application/octet-stream). Persists to PLUGINS_DIR.
549
- app.post("/api/connectors/upload", installRateLimit, express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
1291
+ app.post("/api/connectors/upload", installRateLimit, need("connectors", "write"), audit("connectors", "write"), express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
550
1292
  if (process.env.ENABLE_UI_INSTALL !== "true") {
551
1293
  return res.status(403).json({
552
1294
  error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
@@ -587,7 +1329,7 @@ async function main() {
587
1329
  }
588
1330
  });
589
1331
  // Add a new source
590
- app.post("/api/sources", installRateLimit, async (req, res) => {
1332
+ app.post("/api/sources", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
591
1333
  const { name, type, url, enabled, auth, tls } = req.body;
592
1334
  if (!name || !type || !url) {
593
1335
  res.status(400).json({ error: "name, type, and url are required" });
@@ -609,8 +1351,8 @@ async function main() {
609
1351
  res.status(201).json({ ok: true, source });
610
1352
  });
611
1353
  // Update an existing source
612
- app.put("/api/sources/:name", async (req, res) => {
613
- const oldName = req.params.name;
1354
+ app.put("/api/sources/:name", need("sources", "write"), audit("sources", "write"), async (req, res) => {
1355
+ const oldName = String(req.params.name);
614
1356
  const { name, type, url, enabled, auth, tls } = req.body;
615
1357
  const existing = registry.getSourceConfigs().find((s) => s.name === oldName);
616
1358
  if (!existing) {
@@ -638,8 +1380,8 @@ async function main() {
638
1380
  res.json({ ok: true, source });
639
1381
  });
640
1382
  // Delete a source
641
- app.delete("/api/sources/:name", async (req, res) => {
642
- const name = req.params.name;
1383
+ app.delete("/api/sources/:name", need("sources", "delete"), audit("sources", "delete"), async (req, res) => {
1384
+ const name = String(req.params.name);
643
1385
  const existing = registry.getSourceConfigs().find((s) => s.name === name);
644
1386
  if (!existing) {
645
1387
  res.status(404).json({ error: `Source "${name}" not found` });
@@ -650,7 +1392,7 @@ async function main() {
650
1392
  res.json({ ok: true });
651
1393
  });
652
1394
  // Test a source connection (without saving)
653
- app.post("/api/sources/test", installRateLimit, async (req, res) => {
1395
+ app.post("/api/sources/test", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
654
1396
  const { name, type, url, enabled, auth, tls } = req.body;
655
1397
  if (!type || !url) {
656
1398
  res.status(400).json({ error: "type and url are required" });
@@ -672,8 +1414,8 @@ async function main() {
672
1414
  res.json(result);
673
1415
  });
674
1416
  // Toggle source enabled/disabled
675
- app.patch("/api/sources/:name/toggle", async (req, res) => {
676
- const name = req.params.name;
1417
+ app.patch("/api/sources/:name/toggle", need("sources", "write"), audit("sources", "write"), async (req, res) => {
1418
+ const name = String(req.params.name);
677
1419
  const existing = registry.getSourceConfigs().find((s) => s.name === name);
678
1420
  if (!existing) {
679
1421
  res.status(404).json({ error: `Source "${name}" not found` });
@@ -694,28 +1436,193 @@ async function main() {
694
1436
  }
695
1437
  }
696
1438
  // List discovered services
697
- app.get("/api/services", async (_req, res) => {
1439
+ app.get("/api/services", async (req, res) => {
698
1440
  try {
1441
+ const sess = req.session;
1442
+ const callerTenant = sess?.tenant || "default";
699
1443
  const result = await listServicesHandler(registry, {}, defaultContext());
700
- res.json(parseToolResult(result));
1444
+ const parsed = parseToolResult(result);
1445
+ // Tenant-scope catalog enrichment so a viewer in tenant A
1446
+ // doesn't accidentally see acme's owner/SLO metadata on a
1447
+ // service that happens to share a name. Anonymous mode is
1448
+ // session-less so callerTenant is "default" → matches
1449
+ // entries with no tenant field too (pre-E7 behaviour).
1450
+ if (parsed?.services) {
1451
+ for (const s of parsed.services) {
1452
+ const entry = typeof s.name === "string" ? catalog.get(s.name, callerTenant) : undefined;
1453
+ if (entry)
1454
+ s.catalog = entry;
1455
+ }
1456
+ }
1457
+ res.json(parsed);
701
1458
  }
702
1459
  catch {
703
1460
  res.status(500).json({ error: "Failed to list services" });
704
1461
  }
705
1462
  });
1463
+ // Read-only view of the configured catalog. Gated by the same
1464
+ // "catalog:read" permission Phase E4 added to DEFAULT_POLICY.
1465
+ app.get("/api/catalog", need("catalog", "read"), (req, res) => {
1466
+ // Same scoping shape as /api/audit + /api/usage: non-admins see
1467
+ // only their own tenant's catalog entries; admins see all by
1468
+ // default and can ?tenant=X for an explicit drill-down.
1469
+ const sess = req.session;
1470
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
1471
+ const callerTenant = sess?.tenant || "default";
1472
+ const requestedTenant = qstr(req.query.tenant);
1473
+ const tenantFilter = isAdmin ? requestedTenant : callerTenant;
1474
+ const services = catalog.list(tenantFilter || undefined);
1475
+ res.json({
1476
+ services,
1477
+ count: Object.keys(services).length,
1478
+ configured: !!process.env.OMCP_SERVICE_CATALOG_FILE,
1479
+ scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
1480
+ });
1481
+ });
1482
+ // --- /api/products — MCP Products catalogue ---------------------------
1483
+ // Same scoping / staging-visibility pattern as /api/catalog. Non-admins
1484
+ // see only their own tenant's PUBLISHED products; admins see all
1485
+ // tenants by default + staging.
1486
+ app.get("/api/products", need("products", "read"), (req, res) => {
1487
+ const sess = req.session;
1488
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
1489
+ const callerTenant = sess?.tenant || "default";
1490
+ const requestedTenant = qstr(req.query.tenant);
1491
+ const tenantFilter = isAdmin ? requestedTenant : callerTenant;
1492
+ const includeStaging = isAdmin;
1493
+ res.json({
1494
+ products: products.list({ tenant: tenantFilter || undefined, includeStaging }),
1495
+ configured: !!process.env.OMCP_PRODUCTS_FILE,
1496
+ scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
1497
+ includesStaging: includeStaging,
1498
+ });
1499
+ });
1500
+ // Upsert a product. Body is the same shape as a single entry
1501
+ // in OMCP_PRODUCTS_FILE. The URL-path id must match the body id
1502
+ // (defence-in-depth: the gate keys on body, the path keys the
1503
+ // audit entry). When OMCP_PRODUCTS_FILE is set we also write the
1504
+ // updated catalogue back to disk so the change survives a
1505
+ // restart; without the file, the upsert is in-memory only.
1506
+ app.put("/api/products/:id", need("products", "write"), audit("products", "write"), async (req, res) => {
1507
+ const id = String(req.params.id);
1508
+ const sess = req.session;
1509
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
1510
+ const callerTenant = sess?.tenant || "default";
1511
+ const body = req.body;
1512
+ if (!body || typeof body !== "object" || Array.isArray(body)) {
1513
+ res.status(400).json({ error: "body must be a product object" });
1514
+ return;
1515
+ }
1516
+ if (typeof body.id === "string" && body.id !== id) {
1517
+ res.status(400).json({ error: `body.id '${body.id}' does not match URL path '${id}'` });
1518
+ return;
1519
+ }
1520
+ // Force the id from the URL so the audit entry's target matches
1521
+ // the persisted record even if the operator omitted it from the
1522
+ // body.
1523
+ const payload = { ...body, id };
1524
+ let validated;
1525
+ try {
1526
+ validated = validateProduct(payload, `PUT /api/products/${id}`);
1527
+ }
1528
+ catch (e) {
1529
+ if (e instanceof ProductsLoadError) {
1530
+ res.status(400).json({ error: e.message });
1531
+ return;
1532
+ }
1533
+ throw e;
1534
+ }
1535
+ // Tenant gate: non-admins can only write into their own tenant.
1536
+ if (!isAdmin && (validated.tenant || "default") !== callerTenant) {
1537
+ res.status(403).json({ error: "cannot write product into another tenant" });
1538
+ return;
1539
+ }
1540
+ // If an existing product belongs to a different tenant, a non-
1541
+ // admin overwrite would re-parent it — same 404-not-403 posture
1542
+ // as cross-tenant gets.
1543
+ const existing = products.get(id);
1544
+ if (existing && !isAdmin && (existing.tenant || "default") !== callerTenant) {
1545
+ res.status(404).json({ error: "not found" });
1546
+ return;
1547
+ }
1548
+ const next = products.upsert(validated);
1549
+ if (process.env.OMCP_PRODUCTS_FILE) {
1550
+ try {
1551
+ await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
1552
+ }
1553
+ catch (e) {
1554
+ console.warn(`[products] PUT ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
1555
+ }
1556
+ }
1557
+ res.json({ product: validated, persisted: !!process.env.OMCP_PRODUCTS_FILE });
1558
+ });
1559
+ app.delete("/api/products/:id", need("products", "delete"), audit("products", "delete"), async (req, res) => {
1560
+ const id = String(req.params.id);
1561
+ const sess = req.session;
1562
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
1563
+ const callerTenant = sess?.tenant || "default";
1564
+ const existing = products.get(id);
1565
+ if (!existing) {
1566
+ res.status(404).json({ error: "not found" });
1567
+ return;
1568
+ }
1569
+ if (!isAdmin && (existing.tenant || "default") !== callerTenant) {
1570
+ res.status(404).json({ error: "not found" });
1571
+ return;
1572
+ }
1573
+ const { file: next } = products.delete(id);
1574
+ if (process.env.OMCP_PRODUCTS_FILE) {
1575
+ try {
1576
+ await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
1577
+ }
1578
+ catch (e) {
1579
+ console.warn(`[products] DELETE ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
1580
+ }
1581
+ }
1582
+ res.status(204).end();
1583
+ });
1584
+ // Single product by id. Non-admins get a 404 (not 403) on a
1585
+ // cross-tenant probe so the existence of the product isn't leaked
1586
+ // — same posture as the rest of the tenancy layer.
1587
+ app.get("/api/products/:id", need("products", "read"), (req, res) => {
1588
+ const sess = req.session;
1589
+ const isAdmin = hasPermission(sess?.roles, "users", "delete");
1590
+ const callerTenant = sess?.tenant || "default";
1591
+ const tenantFilter = isAdmin ? undefined : callerTenant;
1592
+ const id = String(req.params.id);
1593
+ const p = products.get(id, tenantFilter);
1594
+ if (!p) {
1595
+ res.status(404).json({ error: "not found" });
1596
+ return;
1597
+ }
1598
+ // Non-admins also don't see staging products even if they happen
1599
+ // to belong to the same tenant.
1600
+ if (!isAdmin && p.status === "staging") {
1601
+ res.status(404).json({ error: "not found" });
1602
+ return;
1603
+ }
1604
+ res.json(p);
1605
+ });
706
1606
  // Health endpoint for UI dashboard
707
1607
  app.get("/api/health/:service", async (req, res) => {
708
1608
  try {
709
- const result = await getServiceHealthHandler(registry, { service: req.params.service }, defaultContext());
710
- res.json(parseToolResult(result));
1609
+ const callerTenant = req.session?.tenant || "default";
1610
+ const service = String(req.params.service);
1611
+ const result = await getServiceHealthHandler(registry, { service }, defaultContext());
1612
+ const parsed = parseToolResult(result);
1613
+ const entry = catalog.get(service, callerTenant);
1614
+ if (entry && parsed && typeof parsed === "object")
1615
+ parsed.catalog = entry;
1616
+ res.json(parsed);
711
1617
  }
712
1618
  catch {
713
1619
  res.status(500).json({ error: "Failed to get service health" });
714
1620
  }
715
1621
  });
716
1622
  // Health for all services
717
- app.get("/api/health", async (_req, res) => {
1623
+ app.get("/api/health", async (req, res) => {
718
1624
  try {
1625
+ const callerTenant = req.session?.tenant || "default";
719
1626
  const servicesResult = await listServicesHandler(registry, {}, defaultContext());
720
1627
  const parsed = parseToolResult(servicesResult);
721
1628
  const services = parsed?.services || [];
@@ -723,7 +1630,14 @@ async function main() {
723
1630
  for (const svc of services) {
724
1631
  try {
725
1632
  const result = await getServiceHealthHandler(registry, { service: svc.name }, defaultContext());
726
- health[svc.name] = parseToolResult(result);
1633
+ const h = parseToolResult(result);
1634
+ // Same tenant scoping as /api/services to avoid the
1635
+ // dashboard cross-tenant catalog leak the reviewer
1636
+ // caught in slice 3.
1637
+ const entry = catalog.get(svc.name, callerTenant);
1638
+ if (entry && h && typeof h === "object")
1639
+ h.catalog = entry;
1640
+ health[svc.name] = h;
727
1641
  }
728
1642
  catch {
729
1643
  health[svc.name] = { error: "failed to fetch health" };
@@ -771,7 +1685,7 @@ async function main() {
771
1685
  res.json(config.settings);
772
1686
  });
773
1687
  // Update general settings
774
- app.put("/api/settings", (req, res) => {
1688
+ app.put("/api/settings", need("settings", "write"), audit("settings", "write"), (req, res) => {
775
1689
  config = { ...config, settings: { ...config.settings, ...req.body } };
776
1690
  saveConfig(config);
777
1691
  res.json({ ok: true, settings: config.settings });
@@ -787,7 +1701,7 @@ async function main() {
787
1701
  app.get("/api/health-thresholds", (_req, res) => {
788
1702
  res.json(config.healthThresholds);
789
1703
  });
790
- app.put("/api/health-thresholds", (req, res) => {
1704
+ app.put("/api/health-thresholds", need("health", "write"), audit("health", "write"), (req, res) => {
791
1705
  config = { ...config, healthThresholds: { ...config.healthThresholds, ...req.body } };
792
1706
  applyConfigToRuntime(config, registry);
793
1707
  saveConfig(config);
@@ -796,9 +1710,9 @@ async function main() {
796
1710
  // --- Per-Source Metrics API ---
797
1711
  // Get metrics for a source (active metrics or defaults)
798
1712
  app.get("/api/sources/:name/metrics", (req, res) => {
799
- const connector = registry.getByName(req.params.name);
1713
+ const connector = registry.getByName(String(req.params.name));
800
1714
  if (!connector) {
801
- res.status(404).json({ error: `Source "${req.params.name}" not found` });
1715
+ res.status(404).json({ error: `Source "${String(req.params.name)}" not found` });
802
1716
  return;
803
1717
  }
804
1718
  res.json({
@@ -807,8 +1721,8 @@ async function main() {
807
1721
  });
808
1722
  });
809
1723
  // Update metrics for a source
810
- app.put("/api/sources/:name/metrics", async (req, res) => {
811
- const name = req.params.name;
1724
+ app.put("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
1725
+ const name = String(req.params.name);
812
1726
  const sourceIdx = config.sources.findIndex((s) => s.name === name);
813
1727
  if (sourceIdx === -1) {
814
1728
  res.status(404).json({ error: `Source "${name}" not found` });
@@ -821,8 +1735,8 @@ async function main() {
821
1735
  res.json({ ok: true });
822
1736
  });
823
1737
  // Reset a source's metrics to connector defaults
824
- app.delete("/api/sources/:name/metrics", async (req, res) => {
825
- const name = req.params.name;
1738
+ app.delete("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
1739
+ const name = String(req.params.name);
826
1740
  const sourceIdx = config.sources.findIndex((s) => s.name === name);
827
1741
  if (sourceIdx === -1) {
828
1742
  res.status(404).json({ error: `Source "${name}" not found` });
@@ -861,6 +1775,51 @@ async function main() {
861
1775
  }, 5 * 60 * 1000);
862
1776
  // Single-tenant auth gate. No credentials configured → anonymous (current
863
1777
  // behaviour, fully backward compatible). Configured → require a valid
1778
+ // Per-identity sliding-window rate limit on the MCP HTTP transport.
1779
+ // Each request from a named bearer-token caller increments that
1780
+ // caller's bucket; once the per-window cap is hit the server replies
1781
+ // 429 with a Retry-After. Anonymous /mcp traffic (no OMCP_API_KEYS
1782
+ // configured) bypasses this — the global express-rate-limit IP gate
1783
+ // still applies. Override via OMCP_TOOL_RATE_PER_MIN.
1784
+ const toolRateLimiter = new IdentityRateLimiter({
1785
+ limit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
1786
+ });
1787
+ // Per-identity tracker key. Composes tenant + principalId so two
1788
+ // credentials of the same name in different tenants don't share
1789
+ // a bucket. Surface-level fields in /api/usage are still split
1790
+ // back out (see the row builder there) so the UI shows clean
1791
+ // actor + tenant columns.
1792
+ const identityKey = (ctx) => `${ctx.tenant}${ctx.principalId}`;
1793
+ function splitIdentityKey(key) {
1794
+ const i = key.indexOf("");
1795
+ if (i < 0)
1796
+ return { tenant: "default", actor: key };
1797
+ return { tenant: key.slice(0, i), actor: key.slice(i + 1) };
1798
+ }
1799
+ // Token-budget: per-identity 24h rolling daily cap on tokens pulled
1800
+ // through the MCP tool layer. Off by default (OMCP_TOOL_DAILY_TOKENS
1801
+ // unset/zero/negative). When configured, big-data tools
1802
+ // (query_logs / query_metrics / get_service_health) charge the
1803
+ // estimated response size against the cap; over-cap calls return a
1804
+ // structured OMCP_TOKEN_BUDGET_EXCEEDED payload instead of data.
1805
+ const tokenBudget = new TokenBudget({
1806
+ dailyLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
1807
+ filePath: process.env.OMCP_TOKEN_BUDGET_FILE?.trim() || undefined,
1808
+ });
1809
+ // AWAIT bootstrap before any tool call can arrive: a void-fired
1810
+ // bootstrap raced with /mcp requests would silently overwrite
1811
+ // post-boot charges with the on-disk snapshot when it later
1812
+ // resolved. The file is small (KB range) so the wait is
1813
+ // negligible; a missing file returns immediately.
1814
+ await tokenBudget.bootstrap();
1815
+ // Flush on graceful shutdown so the debounce-window of pending
1816
+ // charges isn't dropped on `kubectl rollout restart` etc. The
1817
+ // process keeps running while we wait — the snapshot is small.
1818
+ for (const sig of ["SIGTERM", "SIGINT"]) {
1819
+ process.once(sig, () => {
1820
+ void tokenBudget.flushNow().catch(() => { });
1821
+ });
1822
+ }
864
1823
  // Bearer/X-API-Key on every /mcp request; resolve the principal + its
865
1824
  // coarse source allow-list into the RequestContext.
866
1825
  function gateCtx(req, res) {
@@ -873,7 +1832,31 @@ async function main() {
873
1832
  .json({ error: "unauthorized: valid Bearer token or X-API-Key required" });
874
1833
  return null;
875
1834
  }
876
- return principalContext(cred.name, cred.allowedSources);
1835
+ // Composite tenant:cred-name key so two creds with the same
1836
+ // name in different tenants don't share a bucket.
1837
+ const credTenant = (cred.tenant || "default");
1838
+ const decision = toolRateLimiter.check(`${credTenant} ${cred.name}`);
1839
+ // Standard RateLimit response headers — let well-behaved clients
1840
+ // self-pace before they hit a 429. Emitted on BOTH allowed and
1841
+ // denied paths so the caller always sees the live state.
1842
+ res.setHeader("X-RateLimit-Limit", String(decision.limit));
1843
+ res.setHeader("X-RateLimit-Remaining", String(Math.max(0, decision.limit - decision.count)));
1844
+ res.setHeader("X-RateLimit-Window-Ms", String(decision.windowMs));
1845
+ if (!decision.allowed) {
1846
+ res.setHeader("Retry-After", String(decision.retryAfterSeconds));
1847
+ res.status(429).json({
1848
+ error: "rate limit exceeded for identity",
1849
+ code: "OMCP_IDENTITY_RATE_LIMIT",
1850
+ retryAfterSeconds: decision.retryAfterSeconds,
1851
+ limit: decision.limit,
1852
+ windowMs: decision.windowMs,
1853
+ });
1854
+ return null;
1855
+ }
1856
+ return principalContext(cred.name, cred.allowedSources, {
1857
+ allowBypassRedaction: cred.bypassRedaction,
1858
+ tenant: cred.tenant,
1859
+ });
877
1860
  }
878
1861
  app.post("/mcp", async (req, res) => {
879
1862
  const ctx = gateCtx(req, res);