@thotischner/observability-mcp 1.7.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/products.yaml.example +48 -0
- package/dist/analysis/history.d.ts +70 -0
- package/dist/analysis/history.js +170 -0
- package/dist/analysis/history.test.d.ts +1 -0
- package/dist/analysis/history.test.js +141 -0
- package/dist/audit/log.d.ts +108 -0
- package/dist/audit/log.js +200 -0
- package/dist/audit/log.test.d.ts +1 -0
- package/dist/audit/log.test.js +147 -0
- package/dist/audit/middleware.d.ts +20 -0
- package/dist/audit/middleware.js +50 -0
- package/dist/audit/redaction-bypass.d.ts +67 -0
- package/dist/audit/redaction-bypass.js +64 -0
- package/dist/audit/redaction-bypass.test.d.ts +1 -0
- package/dist/audit/redaction-bypass.test.js +72 -0
- package/dist/audit/sinks/types.d.ts +18 -0
- package/dist/audit/sinks/types.js +1 -0
- package/dist/audit/sinks/webhook.d.ts +45 -0
- package/dist/audit/sinks/webhook.js +111 -0
- package/dist/audit/sinks/webhook.test.d.ts +1 -0
- package/dist/audit/sinks/webhook.test.js +162 -0
- package/dist/auth/credentials.d.ts +29 -0
- package/dist/auth/credentials.js +53 -1
- package/dist/auth/credentials.test.js +46 -1
- package/dist/auth/csrf.d.ts +26 -0
- package/dist/auth/csrf.js +128 -0
- package/dist/auth/csrf.test.d.ts +1 -0
- package/dist/auth/csrf.test.js +143 -0
- package/dist/auth/local-users.d.ts +68 -0
- package/dist/auth/local-users.js +154 -0
- package/dist/auth/local-users.test.d.ts +1 -0
- package/dist/auth/local-users.test.js +121 -0
- package/dist/auth/middleware.d.ts +49 -0
- package/dist/auth/middleware.js +65 -0
- package/dist/auth/middleware.test.d.ts +1 -0
- package/dist/auth/middleware.test.js +90 -0
- package/dist/auth/oidc/client.d.ts +73 -0
- package/dist/auth/oidc/client.js +104 -0
- package/dist/auth/oidc/client.test.d.ts +1 -0
- package/dist/auth/oidc/client.test.js +121 -0
- package/dist/auth/oidc/dcr.d.ts +70 -0
- package/dist/auth/oidc/dcr.js +160 -0
- package/dist/auth/oidc/dcr.test.d.ts +1 -0
- package/dist/auth/oidc/dcr.test.js +109 -0
- package/dist/auth/oidc/discovery.d.ts +38 -0
- package/dist/auth/oidc/discovery.js +48 -0
- package/dist/auth/oidc/discovery.test.d.ts +1 -0
- package/dist/auth/oidc/discovery.test.js +68 -0
- package/dist/auth/oidc/endpoints.d.ts +20 -0
- package/dist/auth/oidc/endpoints.js +168 -0
- package/dist/auth/oidc/endpoints.test.d.ts +7 -0
- package/dist/auth/oidc/endpoints.test.js +304 -0
- package/dist/auth/oidc/flow-cookie.d.ts +57 -0
- package/dist/auth/oidc/flow-cookie.js +142 -0
- package/dist/auth/oidc/flow-cookie.test.d.ts +1 -0
- package/dist/auth/oidc/flow-cookie.test.js +0 -0
- package/dist/auth/oidc/index.d.ts +7 -0
- package/dist/auth/oidc/index.js +6 -0
- package/dist/auth/oidc/jwks.d.ts +36 -0
- package/dist/auth/oidc/jwks.js +69 -0
- package/dist/auth/oidc/jwks.test.d.ts +1 -0
- package/dist/auth/oidc/jwks.test.js +65 -0
- package/dist/auth/oidc/jwt.d.ts +62 -0
- package/dist/auth/oidc/jwt.js +113 -0
- package/dist/auth/oidc/jwt.test.d.ts +1 -0
- package/dist/auth/oidc/jwt.test.js +141 -0
- package/dist/auth/oidc/pkce.d.ts +19 -0
- package/dist/auth/oidc/pkce.js +43 -0
- package/dist/auth/oidc/pkce.test.d.ts +1 -0
- package/dist/auth/oidc/pkce.test.js +55 -0
- package/dist/auth/oidc/profiles.d.ts +22 -0
- package/dist/auth/oidc/profiles.js +95 -0
- package/dist/auth/oidc/profiles.test.d.ts +1 -0
- package/dist/auth/oidc/profiles.test.js +51 -0
- package/dist/auth/oidc/runtime.d.ts +66 -0
- package/dist/auth/oidc/runtime.js +142 -0
- package/dist/auth/oidc/runtime.test.d.ts +1 -0
- package/dist/auth/oidc/runtime.test.js +181 -0
- package/dist/auth/policy/batch-dry-run.d.ts +56 -0
- package/dist/auth/policy/batch-dry-run.js +129 -0
- package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
- package/dist/auth/policy/batch-dry-run.test.js +140 -0
- package/dist/auth/policy/engine.d.ts +64 -0
- package/dist/auth/policy/engine.js +87 -0
- package/dist/auth/policy/engine.test.d.ts +1 -0
- package/dist/auth/policy/engine.test.js +98 -0
- package/dist/auth/policy/loader.d.ts +45 -0
- package/dist/auth/policy/loader.js +137 -0
- package/dist/auth/policy/loader.test.d.ts +1 -0
- package/dist/auth/policy/loader.test.js +86 -0
- package/dist/auth/policy/opa.d.ts +69 -0
- package/dist/auth/policy/opa.js +173 -0
- package/dist/auth/policy/opa.test.d.ts +1 -0
- package/dist/auth/policy/opa.test.js +206 -0
- package/dist/auth/rbac.d.ts +62 -0
- package/dist/auth/rbac.js +162 -0
- package/dist/auth/rbac.test.d.ts +1 -0
- package/dist/auth/rbac.test.js +183 -0
- package/dist/auth/session.d.ts +66 -0
- package/dist/auth/session.js +146 -0
- package/dist/auth/session.test.d.ts +1 -0
- package/dist/auth/session.test.js +90 -0
- package/dist/catalog/loader.d.ts +67 -0
- package/dist/catalog/loader.js +122 -0
- package/dist/catalog/loader.test.d.ts +1 -0
- package/dist/catalog/loader.test.js +108 -0
- package/dist/cli/index.js +3 -0
- package/dist/cli/inspector-config.d.ts +9 -0
- package/dist/cli/inspector-config.js +28 -0
- package/dist/cli/inspector-config.test.d.ts +1 -0
- package/dist/cli/inspector-config.test.js +33 -0
- package/dist/cli/lib.d.ts +1 -1
- package/dist/cli/lib.js +1 -0
- package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
- package/dist/conformance/mcp-2025-11-25.test.js +206 -0
- package/dist/connectors/interface.d.ts +5 -1
- package/dist/connectors/loader.js +6 -4
- package/dist/connectors/loader.test.d.ts +1 -0
- package/dist/connectors/loader.test.js +78 -0
- package/dist/connectors/prometheus.test.js +31 -13
- package/dist/connectors/registry.d.ts +13 -0
- package/dist/connectors/registry.js +30 -0
- package/dist/connectors/registry.test.js +56 -2
- package/dist/context.d.ts +45 -1
- package/dist/context.js +40 -1
- package/dist/context.test.d.ts +1 -0
- package/dist/context.test.js +58 -0
- package/dist/federation/registry.d.ts +32 -0
- package/dist/federation/registry.js +77 -0
- package/dist/federation/registry.test.d.ts +1 -0
- package/dist/federation/registry.test.js +130 -0
- package/dist/federation/upstream.d.ts +60 -0
- package/dist/federation/upstream.js +114 -0
- package/dist/index.js +2124 -73
- package/dist/middleware/ssrfGuard.d.ts +15 -0
- package/dist/middleware/ssrfGuard.js +103 -0
- package/dist/middleware/ssrfGuard.test.d.ts +1 -0
- package/dist/middleware/ssrfGuard.test.js +81 -0
- package/dist/net/egress-policy.js +2 -0
- package/dist/observability/otel.d.ts +20 -0
- package/dist/observability/otel.js +118 -0
- package/dist/observability/otel.test.d.ts +1 -0
- package/dist/observability/otel.test.js +56 -0
- package/dist/openapi.js +654 -6
- package/dist/openapi.test.d.ts +1 -0
- package/dist/openapi.test.js +98 -0
- package/dist/policy/redact.d.ts +44 -0
- package/dist/policy/redact.js +144 -0
- package/dist/policy/redact.test.d.ts +1 -0
- package/dist/policy/redact.test.js +172 -0
- package/dist/postmortem/synthesizer.d.ts +83 -0
- package/dist/postmortem/synthesizer.js +205 -0
- package/dist/postmortem/synthesizer.test.d.ts +1 -0
- package/dist/postmortem/synthesizer.test.js +141 -0
- package/dist/products/loader.d.ts +112 -0
- package/dist/products/loader.js +289 -0
- package/dist/products/loader.test.d.ts +1 -0
- package/dist/products/loader.test.js +257 -0
- package/dist/quota/charge.d.ts +28 -0
- package/dist/quota/charge.js +30 -0
- package/dist/quota/charge.test.d.ts +1 -0
- package/dist/quota/charge.test.js +83 -0
- package/dist/quota/limiter.d.ts +97 -0
- package/dist/quota/limiter.js +161 -0
- package/dist/quota/limiter.test.d.ts +1 -0
- package/dist/quota/limiter.test.js +205 -0
- package/dist/quota/token-budget.d.ts +119 -0
- package/dist/quota/token-budget.js +297 -0
- package/dist/quota/token-budget.test.d.ts +1 -0
- package/dist/quota/token-budget.test.js +215 -0
- package/dist/scim/group-role-map.d.ts +4 -0
- package/dist/scim/group-role-map.js +33 -0
- package/dist/scim/group-role-map.test.d.ts +1 -0
- package/dist/scim/group-role-map.test.js +33 -0
- package/dist/scim/routes.d.ts +15 -0
- package/dist/scim/routes.js +249 -0
- package/dist/scim/store.d.ts +37 -0
- package/dist/scim/store.js +178 -0
- package/dist/scim/store.test.d.ts +1 -0
- package/dist/scim/store.test.js +121 -0
- package/dist/scim/types.d.ts +73 -0
- package/dist/scim/types.js +29 -0
- package/dist/sdk/hooks.d.ts +77 -0
- package/dist/sdk/hooks.js +72 -0
- package/dist/sdk/hooks.test.d.ts +1 -0
- package/dist/sdk/hooks.test.js +159 -0
- package/dist/sdk/index.d.ts +2 -0
- package/dist/sdk/index.js +1 -0
- package/dist/sdk/manifest-schema.d.ts +17 -0
- package/dist/sdk/manifest-schema.js +21 -0
- package/dist/tenancy/context.d.ts +45 -0
- package/dist/tenancy/context.js +97 -0
- package/dist/tenancy/context.test.d.ts +1 -0
- package/dist/tenancy/context.test.js +72 -0
- package/dist/tenancy/migration.test.d.ts +7 -0
- package/dist/tenancy/migration.test.js +75 -0
- package/dist/tools/context-seam.test.js +6 -1
- package/dist/tools/detect-anomalies.d.ts +1 -1
- package/dist/tools/detect-anomalies.js +5 -4
- package/dist/tools/generate-postmortem.d.ts +35 -0
- package/dist/tools/generate-postmortem.js +191 -0
- package/dist/tools/get-anomaly-history.d.ts +35 -0
- package/dist/tools/get-anomaly-history.js +126 -0
- package/dist/tools/get-service-health.d.ts +1 -1
- package/dist/tools/get-service-health.js +4 -3
- package/dist/tools/list-services.d.ts +1 -1
- package/dist/tools/list-services.js +3 -2
- package/dist/tools/list-sources.d.ts +1 -1
- package/dist/tools/list-sources.js +6 -2
- package/dist/tools/query-logs.d.ts +1 -1
- package/dist/tools/query-logs.js +2 -2
- package/dist/tools/query-metrics.d.ts +1 -1
- package/dist/tools/query-metrics.js +19 -6
- package/dist/tools/query-traces.d.ts +47 -0
- package/dist/tools/query-traces.js +145 -0
- package/dist/tools/query-traces.test.d.ts +1 -0
- package/dist/tools/query-traces.test.js +110 -0
- package/dist/tools/registry-names.d.ts +35 -0
- package/dist/tools/registry-names.js +54 -0
- package/dist/tools/registry-names.test.d.ts +1 -0
- package/dist/tools/registry-names.test.js +61 -0
- package/dist/tools/topology.d.ts +3 -3
- package/dist/tools/topology.js +10 -6
- package/dist/topology/merge.d.ts +22 -0
- package/dist/topology/merge.js +178 -0
- package/dist/topology/merge.test.d.ts +1 -0
- package/dist/topology/merge.test.js +110 -0
- package/dist/transport/sessionStore.d.ts +66 -0
- package/dist/transport/sessionStore.js +138 -0
- package/dist/transport/sessionStore.test.d.ts +1 -0
- package/dist/transport/sessionStore.test.js +118 -0
- package/dist/transport/websocket.d.ts +35 -0
- package/dist/transport/websocket.js +133 -0
- package/dist/transport/websocket.test.d.ts +1 -0
- package/dist/transport/websocket.test.js +124 -0
- package/dist/types.d.ts +51 -0
- package/dist/ui/index.html +3083 -88
- package/package.json +32 -5
package/dist/index.js
CHANGED
|
@@ -9,19 +9,54 @@ import { z } from "zod";
|
|
|
9
9
|
import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
|
|
10
10
|
import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
|
|
11
11
|
import { isTopologyProvider } from "./connectors/interface.js";
|
|
12
|
-
import { defaultContext, principalContext } from "./context.js";
|
|
12
|
+
import { defaultContext, principalContext, sessionContext, allowsTool } from "./context.js";
|
|
13
|
+
import { parseKeyTenants } from "./tenancy/context.js";
|
|
13
14
|
import { enforceEntitledAccess, enterpriseGateStatus, enterpriseGateInfo, enterprisePolicyView, enterpriseCatalogView, enterpriseAuditTail, authorizeAdmin, updateRbacPolicy, updateCatalog, } from "./enterprise-gate.js";
|
|
14
15
|
import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
|
|
16
|
+
import { issueSession, setCookieHeader, clearCookieHeader, generateSecret, } from "./auth/session.js";
|
|
17
|
+
import { readUsersFile, writeUsersFile, authenticate, } from "./auth/local-users.js";
|
|
18
|
+
import { buildSessionAttacher, buildRequireSession, } from "./auth/middleware.js";
|
|
19
|
+
import { buildRequirePermissionFromEngine, hasPermission, listGrantedPermissions, DEFAULT_POLICY, } from "./auth/rbac.js";
|
|
20
|
+
import { resolveOidcConfig, buildOidcRuntime } from "./auth/oidc/runtime.js";
|
|
21
|
+
import { registerOidcRoutes } from "./auth/oidc/endpoints.js";
|
|
22
|
+
import { ScimStore } from "./scim/store.js";
|
|
23
|
+
import { registerScimRoutes } from "./scim/routes.js";
|
|
24
|
+
import { BuiltinPolicyEngine } from "./auth/policy/engine.js";
|
|
25
|
+
import { loadPolicyFromFile, writePolicyFile, PolicyLoadError, VALID_RESOURCES, VALID_ACTIONS } from "./auth/policy/loader.js";
|
|
26
|
+
import { OpaPolicyEngine } from "./auth/policy/opa.js";
|
|
27
|
+
import { evaluateBatch, batchResultToCsv } from "./auth/policy/batch-dry-run.js";
|
|
28
|
+
import { AuditLog } from "./audit/log.js";
|
|
29
|
+
import { buildAuditMiddleware } from "./audit/middleware.js";
|
|
30
|
+
import { WebhookSink } from "./audit/sinks/webhook.js";
|
|
31
|
+
import { buildBypassBreadcrumb, buildBypassAuditParams } from "./audit/redaction-bypass.js";
|
|
32
|
+
import { readCatalogFile, CatalogStore } from "./catalog/loader.js";
|
|
33
|
+
import { readProductsFile, ProductsStore, validateProduct, writeProductsFile, ProductsLoadError } from "./products/loader.js";
|
|
34
|
+
import { REGISTERED_TOOL_NAMES, REGISTERED_TOOLS, unknownToolNames } from "./tools/registry-names.js";
|
|
35
|
+
import { redactValue } from "./policy/redact.js";
|
|
36
|
+
import { IdentityRateLimiter, resolveToolRatePerMin, parseKeyRateLimits } from "./quota/limiter.js";
|
|
37
|
+
import { TokenBudget, estimateTokensFor, resolveDailyTokenLimit } from "./quota/token-budget.js";
|
|
38
|
+
import { applyBudgetDecision } from "./quota/charge.js";
|
|
15
39
|
import { getPluginLoader } from "./connectors/loader.js";
|
|
16
40
|
import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
|
|
17
41
|
import { isValidConnectorName, installTarball } from "./connectors/install.js";
|
|
18
42
|
import { PluginVerificationError } from "./connectors/verify.js";
|
|
19
43
|
import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
|
|
44
|
+
import { initOtel } from "./observability/otel.js";
|
|
45
|
+
import { WebSocketServerTransport } from "./transport/websocket.js";
|
|
46
|
+
import { HookRegistry } from "./sdk/hooks.js";
|
|
47
|
+
import { UpstreamClient } from "./federation/upstream.js";
|
|
48
|
+
import { FederationRegistry, parseFederationEnv } from "./federation/registry.js";
|
|
49
|
+
import { buildCsrfIssuer, buildCsrfEnforcer, csrfBypassFromEnv } from "./auth/csrf.js";
|
|
50
|
+
import { checkOutboundUrl, ssrfGuardFromEnv } from "./middleware/ssrfGuard.js";
|
|
20
51
|
import { buildOpenApiSpec } from "./openapi.js";
|
|
21
52
|
import { listSourcesHandler } from "./tools/list-sources.js";
|
|
22
53
|
import { listServicesHandler } from "./tools/list-services.js";
|
|
23
54
|
import { queryMetricsHandler } from "./tools/query-metrics.js";
|
|
24
55
|
import { queryLogsHandler } from "./tools/query-logs.js";
|
|
56
|
+
import { queryTracesHandler } from "./tools/query-traces.js";
|
|
57
|
+
import { getAnomalyHistoryHandler } from "./tools/get-anomaly-history.js";
|
|
58
|
+
import { generatePostmortemHandler } from "./tools/generate-postmortem.js";
|
|
59
|
+
import { AnomalyHistory, fromEnv as anomalyHistoryFromEnv } from "./analysis/history.js";
|
|
25
60
|
import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-service-health.js";
|
|
26
61
|
import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
|
|
27
62
|
import { getTopologyHandler, getBlastRadiusHandler } from "./tools/topology.js";
|
|
@@ -41,6 +76,47 @@ const SERVER_VERSION = (() => {
|
|
|
41
76
|
return "unknown";
|
|
42
77
|
}
|
|
43
78
|
})();
|
|
79
|
+
/** Defensive read of a single query-string value. Express's
|
|
80
|
+
* `req.query[k]` is typed as `string | ParsedQs | (string | ParsedQs)[]`
|
|
81
|
+
* — a caller passing `?actor=a&actor=b` (or `?actor[]=a`) yields an
|
|
82
|
+
* array (or object) rather than a string, which then propagates as
|
|
83
|
+
* `[a,b]` into downstream filters that expect a string. This helper
|
|
84
|
+
* returns the first string-shaped value or undefined; arrays / nested
|
|
85
|
+
* objects collapse safely instead of leaking through. */
|
|
86
|
+
function qstr(v) {
|
|
87
|
+
if (typeof v === "string")
|
|
88
|
+
return v;
|
|
89
|
+
if (Array.isArray(v) && typeof v[0] === "string")
|
|
90
|
+
return v[0];
|
|
91
|
+
return undefined;
|
|
92
|
+
}
|
|
93
|
+
/** Forensic breadcrumb for redaction-bypass tool invocations.
|
|
94
|
+
*
|
|
95
|
+
* Deliberately omits the principal identifier: the credential name
|
|
96
|
+
* lives in OMCP_API_KEYS, and threading any derivative of it into the
|
|
97
|
+
* log channel re-introduces a leak surface that static analysers
|
|
98
|
+
* (rightly) flag. SIEM cross-correlation goes via the correlationId
|
|
99
|
+
* UUID — slice 2 will wire the management-plane audit chain to carry
|
|
100
|
+
* the same correlationId alongside the (chain-protected) principal,
|
|
101
|
+
* so a downstream investigator can join the two channels there.
|
|
102
|
+
*/
|
|
103
|
+
function emitBypassEvent(event, ctx, args) {
|
|
104
|
+
console.error(JSON.stringify(buildBypassBreadcrumb(event, ctx, args)));
|
|
105
|
+
}
|
|
106
|
+
/** Bridge from the new PolicyEngine to the existing
|
|
107
|
+
* hasPermission/buildRequirePermission signatures (which still take
|
|
108
|
+
* a plain {role: Permission[]} map). Built-in engine exposes the
|
|
109
|
+
* raw map directly; engines that don't (slice 4's OPA) will fall
|
|
110
|
+
* back to a synthesized one via .list(). */
|
|
111
|
+
function policyEngineToMap(engine) {
|
|
112
|
+
if (engine instanceof BuiltinPolicyEngine)
|
|
113
|
+
return engine.raw();
|
|
114
|
+
const out = {};
|
|
115
|
+
for (const role of engine.roles()) {
|
|
116
|
+
out[role] = engine.list([role]);
|
|
117
|
+
}
|
|
118
|
+
return out;
|
|
119
|
+
}
|
|
44
120
|
function applyConfigToRuntime(config, registry) {
|
|
45
121
|
setHealthThresholds(config.healthThresholds);
|
|
46
122
|
}
|
|
@@ -62,21 +138,25 @@ function getAvailableMetricNames(registry) {
|
|
|
62
138
|
}
|
|
63
139
|
/** Validate source URL: must be http/https, reject obviously dangerous targets */
|
|
64
140
|
function validateSourceUrl(url) {
|
|
141
|
+
// Phase F11: delegate to the shared SSRF guard. Strict by default;
|
|
142
|
+
// operators add OMCP_ALLOW_PRIVATE_BACKENDS=true to allow in-cluster
|
|
143
|
+
// backends. Cloud-metadata IPs (AWS 169.254.169.254, GCE
|
|
144
|
+
// fd00:ec2::254) are rejected regardless.
|
|
145
|
+
const v = checkOutboundUrl(url, ssrfGuardFromEnv());
|
|
146
|
+
if (!v.allow)
|
|
147
|
+
return v.reason ?? `URL "${url}" is rejected by the SSRF guard`;
|
|
148
|
+
// Extra Google-metadata-hostname check (DNS-based, not in the
|
|
149
|
+
// numeric guard).
|
|
65
150
|
try {
|
|
66
|
-
const
|
|
67
|
-
if (
|
|
68
|
-
return `Invalid URL scheme "${parsed.protocol}". Only http and https are allowed.`;
|
|
69
|
-
}
|
|
70
|
-
// Block cloud metadata endpoints
|
|
71
|
-
const host = parsed.hostname.toLowerCase();
|
|
72
|
-
if (host === "169.254.169.254" || host === "metadata.google.internal") {
|
|
151
|
+
const host = new URL(url).hostname.toLowerCase();
|
|
152
|
+
if (host === "metadata.google.internal") {
|
|
73
153
|
return "Access to cloud metadata endpoints is not allowed.";
|
|
74
154
|
}
|
|
75
|
-
return null;
|
|
76
155
|
}
|
|
77
156
|
catch {
|
|
78
|
-
|
|
157
|
+
/* already caught by checkOutboundUrl */
|
|
79
158
|
}
|
|
159
|
+
return null;
|
|
80
160
|
}
|
|
81
161
|
// Hard cap for a downloaded/uploaded connector tarball (defence against
|
|
82
162
|
// a hostile or accidental huge artifact OOM-ing the server).
|
|
@@ -104,6 +184,13 @@ async function main() {
|
|
|
104
184
|
if (STDIO) {
|
|
105
185
|
console.log = (...a) => console.error(...a);
|
|
106
186
|
}
|
|
187
|
+
// OpenTelemetry self-tracing — opt-in via OMCP_OTEL_ENABLED. Init
|
|
188
|
+
// before express() so HTTP auto-instrumentation captures every
|
|
189
|
+
// /api/* and /mcp request. Skipped in stdio mode (no HTTP surface
|
|
190
|
+
// and the auto-instrumentation would emit noise per stdio call).
|
|
191
|
+
if (!STDIO) {
|
|
192
|
+
await initOtel({ serviceVersion: process.env.npm_package_version });
|
|
193
|
+
}
|
|
107
194
|
let config = loadConfig();
|
|
108
195
|
await getPluginLoader().load();
|
|
109
196
|
const registry = new ConnectorRegistry();
|
|
@@ -113,13 +200,154 @@ async function main() {
|
|
|
113
200
|
// so we cannot share a single McpServer across HTTP sessions. Each new
|
|
114
201
|
// session needs its own server. The factory captures the live registry
|
|
115
202
|
// by reference so tool handlers always see the current configuration.
|
|
203
|
+
// Catalog enrichers for the MCP tool surface: wrap the standard
|
|
204
|
+
// tool-result shape ({content:[{text: json}]}) and inject .catalog
|
|
205
|
+
// metadata where it matches a known service name. No-op when the
|
|
206
|
+
// catalog is empty (the demo case) or when the payload doesn't
|
|
207
|
+
// parse as JSON. The HTTP `/api/services` + `/api/health` handlers
|
|
208
|
+
// call the loader.ts CatalogStore directly; this path mirrors that
|
|
209
|
+
// behaviour for MCP clients (Claude Desktop, the agent, ...).
|
|
210
|
+
// McpToolResult is whatever the wrapped handler returned — keep it
|
|
211
|
+
// untyped so we don't fight the SDK's narrow `content: [{type:"text",...}]`
|
|
212
|
+
// overload. We pass the value back unchanged when it doesn't parse,
|
|
213
|
+
// and otherwise mutate the parsed JSON before re-stringifying into a
|
|
214
|
+
// fresh wrapper that mirrors the handler's own shape.
|
|
215
|
+
function enrichToolServicesText(result, ctx) {
|
|
216
|
+
try {
|
|
217
|
+
const parsed = JSON.parse(result.content[0]?.text ?? "{}");
|
|
218
|
+
if (parsed && Array.isArray(parsed.services)) {
|
|
219
|
+
for (const s of parsed.services) {
|
|
220
|
+
// Scope enrichment to the caller's tenant so we don't
|
|
221
|
+
// leak owner / on-call / SLO bytes for other tenants'
|
|
222
|
+
// services that happen to share a name in the catalog.
|
|
223
|
+
const entry = typeof s?.name === "string" ? catalog.get(s.name, ctx.tenant) : undefined;
|
|
224
|
+
if (entry)
|
|
225
|
+
s.catalog = entry;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
|
|
229
|
+
return clone;
|
|
230
|
+
}
|
|
231
|
+
catch {
|
|
232
|
+
return result;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// Apply PII / secret redaction to a tool result's text payload. No-op
|
|
236
|
+
// when OMCP_REDACTION=off. Adds a top-level `_redacted` field with
|
|
237
|
+
// the per-category counts so the agent (and the human) sees a hint
|
|
238
|
+
// like `{ email: 4, ipv4: 2, totalMatches: 6 }` instead of silently
|
|
239
|
+
// losing data.
|
|
240
|
+
/** Charge the estimated tokens in a tool response against the
|
|
241
|
+
* per-identity daily budget. When the budget would be exceeded,
|
|
242
|
+
* replace the response with a structured error payload —
|
|
243
|
+
* the tool's data never crosses the boundary, and the agent
|
|
244
|
+
* sees a parseable {error: "OMCP_TOKEN_BUDGET_EXCEEDED", ...}
|
|
245
|
+
* rather than a generic failure. Anonymous principals are not
|
|
246
|
+
* charged (the budget is per-credential).
|
|
247
|
+
*
|
|
248
|
+
* This charges RETROACTIVELY: the tool body has already executed,
|
|
249
|
+
* so the work is done by the time we decide to deny — the call
|
|
250
|
+
* that flips the bucket over the cap still pays the cost; the
|
|
251
|
+
* N+1 call denies before doing work. Pre-flight denial would
|
|
252
|
+
* require predicting response size before the connector runs,
|
|
253
|
+
* which isn't tractable for query_logs / query_metrics where
|
|
254
|
+
* size is data-dependent. The trade-off is intentional: one
|
|
255
|
+
* over-cap call per bucket roll vs an unhelpful "request denied,
|
|
256
|
+
* size unknown" upstream. */
|
|
257
|
+
function chargeTokenBudget(result, ctx, toolName) {
|
|
258
|
+
if (ctx.auth !== "apikey")
|
|
259
|
+
return result;
|
|
260
|
+
const text = result.content[0]?.text ?? "";
|
|
261
|
+
const tokens = estimateTokensFor(text);
|
|
262
|
+
const decision = tokenBudget.check(identityKey(ctx), tokens);
|
|
263
|
+
return applyBudgetDecision(result, decision, tokens, toolName);
|
|
264
|
+
}
|
|
265
|
+
const REDACTION_ENABLED = String(process.env.OMCP_REDACTION ?? "on").toLowerCase() !== "off";
|
|
266
|
+
function redactToolText(result, opts = {}) {
|
|
267
|
+
if (!REDACTION_ENABLED)
|
|
268
|
+
return result;
|
|
269
|
+
if (opts.bypass)
|
|
270
|
+
return result;
|
|
271
|
+
try {
|
|
272
|
+
const parsed = JSON.parse(result.content[0]?.text ?? "{}");
|
|
273
|
+
const r = redactValue(parsed);
|
|
274
|
+
const redacted = r.value;
|
|
275
|
+
if (r.totalMatches > 0 && redacted && typeof redacted === "object") {
|
|
276
|
+
redacted._redacted = { ...r.matches, totalMatches: r.totalMatches };
|
|
277
|
+
}
|
|
278
|
+
const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(redacted) } : c) };
|
|
279
|
+
return clone;
|
|
280
|
+
}
|
|
281
|
+
catch {
|
|
282
|
+
return result;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
function enrichToolHealthText(result, serviceName, ctx) {
|
|
286
|
+
try {
|
|
287
|
+
const parsed = JSON.parse(result.content[0]?.text ?? "{}");
|
|
288
|
+
const entry = serviceName ? catalog.get(serviceName, ctx.tenant) : undefined;
|
|
289
|
+
if (entry && parsed && typeof parsed === "object")
|
|
290
|
+
parsed.catalog = entry;
|
|
291
|
+
const clone = { ...result, content: result.content.map((c, i) => i === 0 ? { ...c, text: JSON.stringify(parsed) } : c) };
|
|
292
|
+
return clone;
|
|
293
|
+
}
|
|
294
|
+
catch {
|
|
295
|
+
return result;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
116
298
|
function createMcpServer(ctx) {
|
|
117
299
|
const mcpServer = new McpServer({
|
|
118
300
|
name: "observability-mcp",
|
|
119
301
|
version: SERVER_VERSION,
|
|
120
302
|
});
|
|
121
303
|
// --- Register tools with Zod schemas ---
|
|
122
|
-
|
|
304
|
+
// Product-aware registration: when the active credential is bound
|
|
305
|
+
// to a Product (OMCP_KEY_PRODUCTS), `ctx.allowedTools` carries that
|
|
306
|
+
// Product's `tools` allow-list and we skip the registration of any
|
|
307
|
+
// tool not in it. Anonymous + Product-less sessions leave
|
|
308
|
+
// allowedTools undefined and see every tool — the bypass is the
|
|
309
|
+
// back-compat path the open-source default relies on.
|
|
310
|
+
//
|
|
311
|
+
// The wrapper also wires Phase F7 hook fan-out: every tool dispatch
|
|
312
|
+
// fires tool_pre_invoke before the handler and tool_post_invoke after.
|
|
313
|
+
// Plugins can deny the call (allow:false → isError CallToolResult),
|
|
314
|
+
// mutate the args before dispatch, or mutate the result before it
|
|
315
|
+
// reaches the caller. When no hooks are registered (the default in
|
|
316
|
+
// the OSS demo) the wrapper is a thin pass-through.
|
|
317
|
+
const registerTool = ((name, ...rest) => {
|
|
318
|
+
if (!allowsTool(ctx.allowedTools, name))
|
|
319
|
+
return undefined;
|
|
320
|
+
if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
|
|
321
|
+
const originalHandler = rest[rest.length - 1];
|
|
322
|
+
const wrappedHandler = async (args, extra) => {
|
|
323
|
+
const hookCtxBase = {
|
|
324
|
+
principal: ctx.principalId,
|
|
325
|
+
tenant: ctx.tenant || "default",
|
|
326
|
+
target: name,
|
|
327
|
+
};
|
|
328
|
+
const pre = await hookRegistry.fire("tool_pre_invoke", { ...hookCtxBase, kind: "tool_pre_invoke" }, { args });
|
|
329
|
+
if (!pre.allow) {
|
|
330
|
+
return {
|
|
331
|
+
content: [{ type: "text", text: pre.reason ?? "denied by plugin hook" }],
|
|
332
|
+
isError: true,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
const effectiveArgs = pre.payload?.args ?? args;
|
|
336
|
+
const result = await originalHandler(effectiveArgs, extra);
|
|
337
|
+
const post = await hookRegistry.fire("tool_post_invoke", { ...hookCtxBase, kind: "tool_post_invoke" }, { args: effectiveArgs, result });
|
|
338
|
+
if (!post.allow) {
|
|
339
|
+
return {
|
|
340
|
+
content: [{ type: "text", text: post.reason ?? "denied by plugin hook" }],
|
|
341
|
+
isError: true,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
return post.payload?.result ?? result;
|
|
345
|
+
};
|
|
346
|
+
rest[rest.length - 1] = wrappedHandler;
|
|
347
|
+
}
|
|
348
|
+
return mcpServer.tool(name, ...rest);
|
|
349
|
+
});
|
|
350
|
+
registerTool("list_sources", [
|
|
123
351
|
"List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
|
|
124
352
|
"When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
|
|
125
353
|
"Behavior: read-only, no side effects. Returns one entry per source with its name, type, configured URL, signal types (metrics/logs), and a live up/down status. Never throws for an unreachable backend — the backend is reported as down instead.",
|
|
@@ -128,7 +356,7 @@ async function main() {
|
|
|
128
356
|
await enforceEntitledAccess(ctx, { tool: "list_sources" });
|
|
129
357
|
return withToolMetrics("list_sources", () => listSourcesHandler(registry, ctx));
|
|
130
358
|
});
|
|
131
|
-
|
|
359
|
+
registerTool("list_services", [
|
|
132
360
|
"Discover the service names that can be queried, aggregated across every connected backend.",
|
|
133
361
|
"When to use: call this before `query_metrics`, `query_logs`, or `get_service_health` to obtain the exact, case-sensitive service name those tools require.",
|
|
134
362
|
"Behavior: read-only, no side effects. Returns one entry per service with the service name, the source(s) it was discovered in, and which signals are available for it (metrics, logs, or both).",
|
|
@@ -140,12 +368,13 @@ async function main() {
|
|
|
140
368
|
.describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
|
|
141
369
|
}, async (args) => {
|
|
142
370
|
await enforceEntitledAccess(ctx, { tool: "list_services" });
|
|
143
|
-
|
|
371
|
+
const result = await withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
|
|
372
|
+
return enrichToolServicesText(result, ctx);
|
|
144
373
|
});
|
|
145
374
|
const metricsList = getAvailableMetricNames(registry);
|
|
146
375
|
const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
|
|
147
376
|
const uniqueNames = [...new Set(metricNames)];
|
|
148
|
-
|
|
377
|
+
registerTool("query_metrics", [
|
|
149
378
|
"Fetch the raw time-series for ONE metric of ONE service over a look-back window, returned together with pre-computed summary statistics.",
|
|
150
379
|
"When to use: when you need the actual numeric values or the trend of a known metric. For a 'is this service OK?' verdict use `get_service_health`; to find which services are misbehaving use `detect_anomalies`.",
|
|
151
380
|
"Prerequisites: get the exact service name from `list_services` and choose a metric from the list at the end of this description.",
|
|
@@ -172,9 +401,10 @@ async function main() {
|
|
|
172
401
|
.describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
|
|
173
402
|
}, async (args) => {
|
|
174
403
|
await enforceEntitledAccess(ctx, { tool: "query_metrics", source: args?.source, service: args?.service });
|
|
175
|
-
|
|
404
|
+
const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
|
|
405
|
+
return chargeTokenBudget(result, ctx, "query_metrics");
|
|
176
406
|
});
|
|
177
|
-
|
|
407
|
+
registerTool("query_logs", [
|
|
178
408
|
"Fetch recent log entries for ONE service over a look-back window, with a pre-computed summary (error/warning counts and the most frequent error patterns).",
|
|
179
409
|
"When to use: to inspect what a service actually logged, or to investigate an error spike surfaced by `detect_anomalies` / `get_service_health`. For numeric metrics use `query_metrics` instead.",
|
|
180
410
|
"Prerequisites: get the exact service name from `list_services` (the service must expose a logs signal).",
|
|
@@ -201,11 +431,88 @@ async function main() {
|
|
|
201
431
|
.positive()
|
|
202
432
|
.optional()
|
|
203
433
|
.describe("Optional. Maximum number of log entries to return (most recent first). Default: 100."),
|
|
434
|
+
bypass_redaction: z
|
|
435
|
+
.boolean()
|
|
436
|
+
.optional()
|
|
437
|
+
.describe("Optional. When true, request that PII/secret redaction be skipped for this single call. The server only honours this when the calling credential was explicitly authorised via OMCP_KEY_BYPASS_REDACTION; otherwise the request still gets redacted output. Default: false."),
|
|
204
438
|
}, async (args) => {
|
|
205
439
|
await enforceEntitledAccess(ctx, { tool: "query_logs", source: args?.source, service: args?.service });
|
|
206
|
-
|
|
440
|
+
const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
|
|
441
|
+
// Redact PII / secrets from the log payload before it crosses the
|
|
442
|
+
// MCP boundary into the agent's context. Per-call bypass kicks in
|
|
443
|
+
// only when BOTH (a) the credential is OMCP_KEY_BYPASS_REDACTION
|
|
444
|
+
// allow-listed, AND (b) the agent explicitly opted in via the
|
|
445
|
+
// bypass_redaction arg. Either alone keeps redaction on, so
|
|
446
|
+
// configuration-only and arg-only paths both fail closed.
|
|
447
|
+
const wantsBypass = args?.bypass_redaction === true;
|
|
448
|
+
const allowed = ctx.allowBypassRedaction === true;
|
|
449
|
+
const bypass = wantsBypass && allowed;
|
|
450
|
+
if (bypass || (wantsBypass && !allowed)) {
|
|
451
|
+
// Forensic trail:
|
|
452
|
+
// 1. stderr breadcrumb for SIEM tail-and-forward setups (the
|
|
453
|
+
// log channel keeps no identifying credential reference
|
|
454
|
+
// to avoid CodeQL taint findings — correlation goes via
|
|
455
|
+
// the audit chain entry below).
|
|
456
|
+
// 2. management-plane audit chain entry so the bypass
|
|
457
|
+
// invocation is tamper-evident alongside the rest of
|
|
458
|
+
// /api/*. Persists if OMCP_MGMT_AUDIT_FILE is set.
|
|
459
|
+
emitBypassEvent(bypass ? "redaction_bypass_engaged" : "redaction_bypass_denied", ctx, args);
|
|
460
|
+
void mgmtAudit.record(buildBypassAuditParams(bypass, ctx, args)).catch(() => {
|
|
461
|
+
// Audit record is best-effort — losing one entry must not
|
|
462
|
+
// crash the tool call. The chain itself remains intact.
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
const redacted = redactToolText(result, { bypass });
|
|
466
|
+
return chargeTokenBudget(redacted, ctx, "query_logs");
|
|
467
|
+
});
|
|
468
|
+
registerTool("get_anomaly_history", [
|
|
469
|
+
"Replay historical anomaly scores for a service from the TSDB the gateway writes to (omcp_anomaly_score series).",
|
|
470
|
+
"When to use: post-mortem reconstruction, trend analysis on detector noise, or pulling context for the LLM when an incident is reviewed after the fact.",
|
|
471
|
+
"Prerequisites: the operator must have OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus source pointed at the same TSDB so the round-trip closes.",
|
|
472
|
+
"Behavior: read-only. Returns the time-series of scores. Empty result means either no anomalies in the window or history is disabled.",
|
|
473
|
+
"Related: `detect_anomalies` for the live scores; `query_metrics` if you want to write the PromQL by hand.",
|
|
474
|
+
].join(" "), {
|
|
475
|
+
service: z.string().describe("Service name to filter on."),
|
|
476
|
+
duration: z.string().optional().describe("Rolling window, e.g. '1h', '24h'. Default '1h'."),
|
|
477
|
+
method: z.string().optional().describe("Filter by detector method ('mad' / 'seasonality' / 'correlator'). Optional."),
|
|
478
|
+
}, async (args) => {
|
|
479
|
+
await enforceEntitledAccess(ctx, { tool: "get_anomaly_history", service: args?.service });
|
|
480
|
+
const result = await withToolMetrics("get_anomaly_history", () => getAnomalyHistoryHandler(registry, args, ctx));
|
|
481
|
+
return chargeTokenBudget(result, ctx, "get_anomaly_history");
|
|
482
|
+
});
|
|
483
|
+
registerTool("generate_postmortem", [
|
|
484
|
+
"Stitch the gateway's primitives (anomaly history, blast-radius, traces, log highlights) into a single markdown post-mortem report for one service over a given window.",
|
|
485
|
+
"When to use: after an incident, when the operator or LLM wants 'one document the on-call can read in 60 seconds' instead of poking the individual tools.",
|
|
486
|
+
"Prerequisites: anomaly history requires OMCP_ANOMALY_HISTORY_REMOTE_WRITE + a Prometheus source. Traces require Tempo / Jaeger. Blast-radius requires a topology provider.",
|
|
487
|
+
"Behavior: read-only. Returns markdown by default; pass `format='json'` for the structured shape. Output capped (timeline 20 rows, blast-radius 30 nodes, 10 traces) — JSON shape carries the full data.",
|
|
488
|
+
"Related: `get_anomaly_history`, `query_traces`, `get_blast_radius` for the underlying primitives.",
|
|
489
|
+
].join(" "), {
|
|
490
|
+
service: z.string().describe("Suspected root-cause service."),
|
|
491
|
+
duration: z.string().optional().describe("Window length, e.g. '1h', '6h'. Default '1h'."),
|
|
492
|
+
format: z.enum(["markdown", "json"]).optional().describe("'markdown' (default) or 'json'."),
|
|
493
|
+
}, async (args) => {
|
|
494
|
+
await enforceEntitledAccess(ctx, { tool: "generate_postmortem", service: args?.service });
|
|
495
|
+
const result = await withToolMetrics("generate_postmortem", () => generatePostmortemHandler(registry, args, ctx));
|
|
496
|
+
return chargeTokenBudget(result, ctx, "generate_postmortem");
|
|
497
|
+
});
|
|
498
|
+
registerTool("query_traces", [
|
|
499
|
+
"Query distributed traces for a service over a given timeframe.",
|
|
500
|
+
"Returns ranked trace summaries (duration, span count, error status) with a p50/p95 aggregate across the returned set.",
|
|
501
|
+
"When to use: investigate tail-latency outliers, walk call chains across services for a specific time window, or pull traces related to an anomaly that the metric/log tools surfaced first.",
|
|
502
|
+
"Prerequisites: get the exact service name from `list_services`. A Tempo / Jaeger / OTLP connector must be configured.",
|
|
503
|
+
"Behavior: read-only. `filter` accepts the backend's native query language (TraceQL on Tempo, tag query on Jaeger). When `errorsOnly=true`, only traces with at least one error span are returned. Default limit is 50.",
|
|
504
|
+
].join(" "), {
|
|
505
|
+
service: z.string().describe("Service name (e.g. 'payment-service')."),
|
|
506
|
+
duration: z.string().optional().describe("Rolling time window, e.g. '5m', '1h'. Default '15m'."),
|
|
507
|
+
filter: z.string().optional().describe("Backend-native filter (TraceQL on Tempo, tag query on Jaeger). Optional."),
|
|
508
|
+
limit: z.number().int().positive().optional().describe("Soft cap on returned trace summaries. Default 50."),
|
|
509
|
+
errorsOnly: z.boolean().optional().describe("If true, only traces with at least one error span."),
|
|
510
|
+
}, async (args) => {
|
|
511
|
+
await enforceEntitledAccess(ctx, { tool: "query_traces", service: args?.service });
|
|
512
|
+
const result = await withToolMetrics("query_traces", () => queryTracesHandler(registry, args, ctx));
|
|
513
|
+
return chargeTokenBudget(result, ctx, "query_traces");
|
|
207
514
|
});
|
|
208
|
-
|
|
515
|
+
registerTool("get_service_health", [
|
|
209
516
|
"Produce a single aggregated health verdict for ONE service by combining its metrics and logs.",
|
|
210
517
|
"When to use: the fastest way to answer 'is this service healthy right now and why?'. Use `query_metrics`/`query_logs` to drill into the underlying numbers, or `detect_anomalies` to scan many services at once.",
|
|
211
518
|
"Prerequisites: get the exact service name from `list_services`.",
|
|
@@ -216,9 +523,11 @@ async function main() {
|
|
|
216
523
|
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
|
|
217
524
|
}, async (args) => {
|
|
218
525
|
await enforceEntitledAccess(ctx, { tool: "get_service_health", service: args?.service });
|
|
219
|
-
|
|
526
|
+
const result = await withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
|
|
527
|
+
const enriched = enrichToolHealthText(result, String(args?.service ?? ""), ctx);
|
|
528
|
+
return chargeTokenBudget(enriched, ctx, "get_service_health");
|
|
220
529
|
});
|
|
221
|
-
|
|
530
|
+
registerTool("detect_anomalies", [
|
|
222
531
|
"Scan one or all monitored services for abnormal behavior and return the findings ranked by severity.",
|
|
223
532
|
"When to use: the entry point for 'is anything wrong anywhere?' triage. Once a service is flagged, follow up with `get_service_health` for the verdict or `query_metrics`/`query_logs` for the raw evidence.",
|
|
224
533
|
"Behavior: read-only, no side effects. Applies z-score analysis to metrics, detects log error-rate spikes, and correlates the two. Returns a list of anomalies, each with the affected service, metric/signal, severity, the deviation (e.g. σ and % change), and a short explanation. No anomalies yields an empty list, not an error.",
|
|
@@ -240,7 +549,7 @@ async function main() {
|
|
|
240
549
|
await enforceEntitledAccess(ctx, { tool: "detect_anomalies", source: args?.source, service: args?.service });
|
|
241
550
|
return withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx));
|
|
242
551
|
});
|
|
243
|
-
|
|
552
|
+
registerTool("get_topology", [
|
|
244
553
|
"Return the infrastructure topology graph (Resources and Edges) from every topology-capable connector.",
|
|
245
554
|
"When to use: when an agent needs to reason about which workload runs on which host, who owns whom, or which scope (namespace/project/folder) a resource belongs to. Pair with `get_blast_radius` for shared-host RCA.",
|
|
246
555
|
"Behavior: read-only, no side effects. Returns `{ sources, resources, edges, total, truncated }`. Filters compose: `source` to one connector, `kind` to one resource type (e.g. 'pod', 'node', 'deployment'), `scope` to members of a namespace/folder/project. Output is capped by `limit` (default 500, max 5000) and edges referencing dropped resources are removed.",
|
|
@@ -269,7 +578,7 @@ async function main() {
|
|
|
269
578
|
await enforceEntitledAccess(ctx, { tool: "get_topology", source: args?.source });
|
|
270
579
|
return withToolMetrics("get_topology", () => getTopologyHandler(registry, args, ctx));
|
|
271
580
|
});
|
|
272
|
-
|
|
581
|
+
registerTool("get_blast_radius", [
|
|
273
582
|
"Given a resource, return who else fails if its underlying host(s) fail.",
|
|
274
583
|
"When to use: cross-cutting RCA — when several services degrade together and you suspect a shared host. Works for any RUNS_ON relationship: pod→node, vm→hypervisor, container→host.",
|
|
275
584
|
"Behavior: read-only, no side effects. Resolves `resource` to a Resource (accepts canonical id, exact name, or unique substring), determines its host(s) via RUNS_ON, then lists every other resource that runs on those hosts, bucketed by ownership root (the terminal `OWNED_BY` target — e.g. the Deployment, not the ReplicaSet). If the target is itself a host, its tenants are reported. Returns a structured error if the resource is ambiguous or unknown.",
|
|
@@ -282,10 +591,132 @@ async function main() {
|
|
|
282
591
|
await enforceEntitledAccess(ctx, { tool: "get_blast_radius" });
|
|
283
592
|
return withToolMetrics("get_blast_radius", () => getBlastRadiusHandler(registry, args, ctx));
|
|
284
593
|
});
|
|
594
|
+
// Phase F10: federated tools — every upstream MCP server's tools
|
|
595
|
+
// show up here under `<prefix>.<upstream-tool>`. The handler is a
|
|
596
|
+
// pure proxy: it forwards args verbatim and returns the upstream's
|
|
597
|
+
// CallToolResult unchanged. The wrapping registerTool() at the top
|
|
598
|
+
// of this function still fires F7 lifecycle hooks + the F1
|
|
599
|
+
// Product-allow-list gate, so federated tools obey the same policy
|
|
600
|
+
// surface as native ones.
|
|
601
|
+
for (const info of federationRegistry.getNamespacedTools()) {
|
|
602
|
+
// Upstream's inputSchema is forwarded verbatim. The SDK's
|
|
603
|
+
// tool() overload signatures don't carry an obvious type for a
|
|
604
|
+
// dynamic-shape schema, so we cast to `any` at the boundary and
|
|
605
|
+
// let the upstream contract speak for the validation.
|
|
606
|
+
registerTool(info.namespacedName, info.description || `Federated from upstream ${info.sourceName}.`, info.inputSchema ?? {}, async (args) => {
|
|
607
|
+
await enforceEntitledAccess(ctx, { tool: info.namespacedName });
|
|
608
|
+
return withToolMetrics(info.namespacedName, () => federationRegistry.callNamespacedTool(info.namespacedName, args));
|
|
609
|
+
});
|
|
610
|
+
}
|
|
285
611
|
return mcpServer;
|
|
286
612
|
}
|
|
613
|
+
// --- Management-plane auth (basic mode) -----------------------------------
|
|
614
|
+
// Off by default. Enable with `OMCP_AUTH=basic` + `OMCP_USERS_FILE` and
|
|
615
|
+
// optionally `OMCP_SESSION_SECRET`. When the secret is omitted in basic
|
|
616
|
+
// mode the server generates one for the process lifetime — sessions
|
|
617
|
+
// won't survive a restart and a warning is logged. See docs/auth-basic.md.
|
|
618
|
+
//
|
|
619
|
+
// SECURITY DEFAULT: misconfiguration in basic mode is fail-CLOSED — the
|
|
620
|
+
// process exits with a non-zero status rather than silently degrading
|
|
621
|
+
// to anonymous. Set `OMCP_AUTH_ALLOW_FALLBACK=true` to opt back into
|
|
622
|
+
// the old fall-back-to-anonymous behaviour (only sensible for the
|
|
623
|
+
// throwaway-demo case where ops can immediately see the boot log).
|
|
624
|
+
const requestedAuthMode = String(process.env.OMCP_AUTH ?? "anonymous").toLowerCase();
|
|
625
|
+
const allowFallback = String(process.env.OMCP_AUTH_ALLOW_FALLBACK ?? "false").toLowerCase() === "true";
|
|
626
|
+
function authMisconfig(reason) {
|
|
627
|
+
if (allowFallback) {
|
|
628
|
+
console.error(`[auth] ${reason} — OMCP_AUTH_ALLOW_FALLBACK=true → falling back to anonymous`);
|
|
629
|
+
return;
|
|
630
|
+
}
|
|
631
|
+
console.error(`[auth] ${reason} — refusing to start (set OMCP_AUTH_ALLOW_FALLBACK=true to override)`);
|
|
632
|
+
process.exit(1);
|
|
633
|
+
}
|
|
634
|
+
let authMode = "anonymous";
|
|
635
|
+
let sessionCfg;
|
|
636
|
+
let usersStore = null;
|
|
637
|
+
let secretEphemeral = false;
|
|
638
|
+
let oidcRuntime;
|
|
639
|
+
if (requestedAuthMode === "basic") {
|
|
640
|
+
const usersPath = process.env.OMCP_USERS_FILE;
|
|
641
|
+
if (!usersPath) {
|
|
642
|
+
authMisconfig("OMCP_AUTH=basic requires OMCP_USERS_FILE");
|
|
643
|
+
}
|
|
644
|
+
else {
|
|
645
|
+
usersStore = await readUsersFile(usersPath);
|
|
646
|
+
if (!usersStore) {
|
|
647
|
+
authMisconfig(`OMCP_USERS_FILE=${usersPath} unreadable or malformed`);
|
|
648
|
+
usersStore = null;
|
|
649
|
+
}
|
|
650
|
+
else if (usersStore.users.length === 0) {
|
|
651
|
+
authMisconfig(`OMCP_USERS_FILE=${usersPath} has no users`);
|
|
652
|
+
usersStore = null;
|
|
653
|
+
}
|
|
654
|
+
else {
|
|
655
|
+
let secret = process.env.OMCP_SESSION_SECRET;
|
|
656
|
+
if (!secret || secret.length < 32) {
|
|
657
|
+
secret = generateSecret();
|
|
658
|
+
secretEphemeral = true;
|
|
659
|
+
console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars). Generated an ephemeral secret — " +
|
|
660
|
+
"sessions will be invalidated on restart. Set OMCP_SESSION_SECRET to a stable value in production.");
|
|
661
|
+
}
|
|
662
|
+
sessionCfg = { secret };
|
|
663
|
+
authMode = "basic";
|
|
664
|
+
console.log(`[auth] basic mode active — ${usersStore.users.length} user(s) loaded`);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
else if (requestedAuthMode === "oidc") {
|
|
669
|
+
const r = resolveOidcConfig(process.env);
|
|
670
|
+
if (r.error || !r.config) {
|
|
671
|
+
authMisconfig(r.error ?? "OIDC misconfigured");
|
|
672
|
+
}
|
|
673
|
+
else {
|
|
674
|
+
let secret = process.env.OMCP_SESSION_SECRET;
|
|
675
|
+
if (!secret || secret.length < 32) {
|
|
676
|
+
secret = generateSecret();
|
|
677
|
+
secretEphemeral = true;
|
|
678
|
+
console.warn("[auth] OMCP_SESSION_SECRET not set (or < 32 chars) in OIDC mode. " +
|
|
679
|
+
"Generated an ephemeral secret — sessions and OIDC state cookies " +
|
|
680
|
+
"will be invalidated on restart. Set OMCP_SESSION_SECRET in production.");
|
|
681
|
+
}
|
|
682
|
+
sessionCfg = { secret };
|
|
683
|
+
authMode = "oidc";
|
|
684
|
+
oidcRuntime = buildOidcRuntime(r.config);
|
|
685
|
+
console.log(`[auth] OIDC mode active — issuer=${r.config.issuer} clientId=${r.config.clientId} rolesClaim=${r.config.rolesClaim} mappedRoles=${Object.keys(r.config.roleMap).length}`);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
else if (requestedAuthMode !== "anonymous") {
|
|
689
|
+
authMisconfig(`unknown OMCP_AUTH=${requestedAuthMode}`);
|
|
690
|
+
}
|
|
691
|
+
const authRuntime = { mode: authMode, session: sessionCfg, secretEphemeral, oidc: oidcRuntime };
|
|
287
692
|
// --- HTTP server ---
|
|
288
693
|
const app = express();
|
|
694
|
+
// Trust-proxy: when set, Express will read req.ip / req.secure from
|
|
695
|
+
// X-Forwarded-For + X-Forwarded-Proto. OFF by default — forging those
|
|
696
|
+
// headers behind a misconfigured deployment is the kind of mistake
|
|
697
|
+
// that gives every audit entry the same client IP. Set
|
|
698
|
+
// `OMCP_TRUST_PROXY` to:
|
|
699
|
+
// "true" — trust every hop (Express default-on shape)
|
|
700
|
+
// "loopback" — trust 127.0.0.1 / ::1 only (sensible default
|
|
701
|
+
// when running behind a same-host nginx)
|
|
702
|
+
// "<n>" — trust the last <n> hops
|
|
703
|
+
// "<ip>,<ip>" — explicit list (single value or comma-separated)
|
|
704
|
+
// Any falsy / unset value leaves it OFF so req.ip stays the raw
|
|
705
|
+
// socket address.
|
|
706
|
+
const trustProxy = process.env.OMCP_TRUST_PROXY;
|
|
707
|
+
if (trustProxy && trustProxy !== "false") {
|
|
708
|
+
if (trustProxy === "true") {
|
|
709
|
+
app.set("trust proxy", true);
|
|
710
|
+
}
|
|
711
|
+
else if (/^\d+$/.test(trustProxy)) {
|
|
712
|
+
app.set("trust proxy", parseInt(trustProxy, 10));
|
|
713
|
+
}
|
|
714
|
+
else {
|
|
715
|
+
// string or comma-separated IPs / "loopback" / etc — let Express's
|
|
716
|
+
// parser handle the lookup (it accepts any of the above forms).
|
|
717
|
+
app.set("trust proxy", trustProxy);
|
|
718
|
+
}
|
|
719
|
+
}
|
|
289
720
|
app.use(express.json({ limit: "1mb" }));
|
|
290
721
|
// Security headers
|
|
291
722
|
app.use((req, res, next) => {
|
|
@@ -317,11 +748,287 @@ async function main() {
|
|
|
317
748
|
});
|
|
318
749
|
next();
|
|
319
750
|
});
|
|
751
|
+
// Broad rate-limit on the whole management-plane surface. Generous
|
|
752
|
+
// enough to leave a polling UI plenty of headroom (300/min per IP),
|
|
753
|
+
// tight enough to stop unauthenticated brute-force walks of /api/*
|
|
754
|
+
// (and to keep CodeQL's missing-rate-limiting rule satisfied for
|
|
755
|
+
// every downstream route).
|
|
756
|
+
app.use("/api", rateLimit({
|
|
757
|
+
windowMs: 60_000,
|
|
758
|
+
max: 300,
|
|
759
|
+
standardHeaders: true,
|
|
760
|
+
legacyHeaders: false,
|
|
761
|
+
message: { error: "rate limited" },
|
|
762
|
+
}));
|
|
763
|
+
// Management-plane auth: attach the session payload to every request
|
|
764
|
+
// (no decision logic here — anonymous mode is a no-op). The gate is
|
|
765
|
+
// mounted explicitly on each protected route prefix further down so
|
|
766
|
+
// there is no string-match-based "is this public?" branch anywhere.
|
|
767
|
+
app.use(buildSessionAttacher(authRuntime));
|
|
768
|
+
const requireSession = buildRequireSession(authRuntime);
|
|
769
|
+
// Phase F11: CSRF — double-submit cookie pattern, enforced on every
|
|
770
|
+
// mutating /api/* request. The issuer runs top-of-pipe so any page
|
|
771
|
+
// render leaves a CSRF token cookie the SPA can read + echo back.
|
|
772
|
+
// Bearer-token clients (CI, agents, MCP clients) bypass by default
|
|
773
|
+
// since they can't be a browser confused-deputy.
|
|
774
|
+
const csrfCfg = {
|
|
775
|
+
bypassBearer: csrfBypassFromEnv(),
|
|
776
|
+
secureCookie: (r) => r.secure || r.headers["x-forwarded-proto"] === "https",
|
|
777
|
+
};
|
|
778
|
+
app.use(buildCsrfIssuer(csrfCfg));
|
|
779
|
+
app.use("/api", buildCsrfEnforcer(csrfCfg));
|
|
780
|
+
// Active policy engine — built-in DEFAULT_POLICY by default. When
|
|
781
|
+
// OMCP_RBAC_POLICY_FILE is set we load it and ALWAYS abort on
|
|
782
|
+
// failure: OMCP_AUTH_ALLOW_FALLBACK is for *auth-mode* fallback
|
|
783
|
+
// (basic → anonymous), not for the policy file. An operator who
|
|
784
|
+
// deployed a restrictive policy to TIGHTEN the default would be
|
|
785
|
+
// worse off silently inheriting the broader built-in
|
|
786
|
+
// (DEFAULT_POLICY grants admin → redaction:bypass) than crashing
|
|
787
|
+
// with a clear error. Policy file errors are unconditionally
|
|
788
|
+
// fatal so the configured intent always wins.
|
|
789
|
+
let policyEngine = new BuiltinPolicyEngine(DEFAULT_POLICY);
|
|
790
|
+
const policyFile = process.env.OMCP_RBAC_POLICY_FILE?.trim();
|
|
791
|
+
const opaUrl = process.env.OMCP_OPA_URL?.trim();
|
|
792
|
+
// OPA takes precedence over a file: an operator who wired both
|
|
793
|
+
// probably wants OPA as the live engine and uses the file as a
|
|
794
|
+
// local fallback only via OMCP_POLICY_ENGINE=builtin.
|
|
795
|
+
const enginePref = (process.env.OMCP_POLICY_ENGINE || "").toLowerCase();
|
|
796
|
+
if (opaUrl && enginePref !== "builtin") {
|
|
797
|
+
const declared = (process.env.OMCP_OPA_ROLES || "").split(",").map((s) => s.trim()).filter(Boolean);
|
|
798
|
+
policyEngine = new OpaPolicyEngine({
|
|
799
|
+
url: opaUrl,
|
|
800
|
+
packagePath: process.env.OMCP_OPA_PACKAGE || "observability/authz",
|
|
801
|
+
declaredRoles: declared.length > 0 ? declared : undefined,
|
|
802
|
+
bearerToken: process.env.OMCP_OPA_TOKEN || undefined,
|
|
803
|
+
});
|
|
804
|
+
console.log(`[auth] RBAC policy engine = OPA at ${opaUrl} (package ${process.env.OMCP_OPA_PACKAGE || "observability/authz"})`);
|
|
805
|
+
// Pre-warm: the sync RBAC gate denies on a cache miss while the
|
|
806
|
+
// first async OPA call is in flight. Hit every (role, resource,
|
|
807
|
+
// action) combination from the declared role set so the very
|
|
808
|
+
// first user request gets a real decision instead of a warming-
|
|
809
|
+
// deny. With 3 roles × 10 resources × 4 actions = 120 calls,
|
|
810
|
+
// OPA handles this in <1s and we keep it best-effort (any
|
|
811
|
+
// failure surfaces in the OPA logs, the engine retries on the
|
|
812
|
+
// first user-facing call anyway).
|
|
813
|
+
const opaEngine = policyEngine;
|
|
814
|
+
void (async () => {
|
|
815
|
+
const roles = opaEngine.roles();
|
|
816
|
+
if (roles.length === 0)
|
|
817
|
+
return;
|
|
818
|
+
const resources = [...VALID_RESOURCES];
|
|
819
|
+
const actions = [...VALID_ACTIONS];
|
|
820
|
+
// Tenant-aware pre-warm: the gate keys cache per
|
|
821
|
+
// (roles, resource, action, tenant) so a tenant-conditional
|
|
822
|
+
// Rego rule that fires for "acme" but not "bigco" produces a
|
|
823
|
+
// distinct cached verdict per tenant. The pre-warm iterates
|
|
824
|
+
// every known declared tenant + "default" so the first user
|
|
825
|
+
// request from a tenant'd identity gets a real decision
|
|
826
|
+
// instead of a warming-deny. OIDC tenants only known at
|
|
827
|
+
// runtime are still subject to first-request warming, but
|
|
828
|
+
// operator-set OMCP_KEY_TENANTS land here.
|
|
829
|
+
const knownTenants = new Set(["default"]);
|
|
830
|
+
// parseKeyTenants is the same parser the credentials layer
|
|
831
|
+
// uses, so the warm set is exactly what the gate will see.
|
|
832
|
+
for (const t of parseKeyTenants(process.env.OMCP_KEY_TENANTS).values()) {
|
|
833
|
+
if (t)
|
|
834
|
+
knownTenants.add(t);
|
|
835
|
+
}
|
|
836
|
+
const tenants = Array.from(knownTenants);
|
|
837
|
+
const tasks = [];
|
|
838
|
+
for (const tenant of tenants) {
|
|
839
|
+
for (const role of roles) {
|
|
840
|
+
for (const resource of resources)
|
|
841
|
+
for (const action of actions) {
|
|
842
|
+
tasks.push(opaEngine.warmEvaluate([role], resource, action, tenant));
|
|
843
|
+
}
|
|
844
|
+
tasks.push(opaEngine.warmList([role], tenant));
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
try {
|
|
848
|
+
const settled = await Promise.allSettled(tasks);
|
|
849
|
+
const failed = settled.filter((s) => s.status === "rejected").length;
|
|
850
|
+
const tlbl = tenants.length === 1 ? "1 tenant" : `${tenants.length} tenants`;
|
|
851
|
+
if (failed === 0) {
|
|
852
|
+
console.log(`[auth] OPA cache pre-warmed: ${settled.length} decisions cached for ${roles.length} role(s) × ${tlbl}`);
|
|
853
|
+
}
|
|
854
|
+
else {
|
|
855
|
+
console.warn(`[auth] OPA cache pre-warmed: ${settled.length - failed}/${settled.length} ok, ${failed} failed across ${tlbl} (gates will retry on first user call)`);
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
catch { /* best-effort */ }
|
|
859
|
+
})();
|
|
860
|
+
}
|
|
861
|
+
else if (policyFile) {
|
|
862
|
+
try {
|
|
863
|
+
policyEngine = loadPolicyFromFile(policyFile);
|
|
864
|
+
console.log(`[auth] RBAC policy loaded from ${policyFile} (${policyEngine.roles().join(", ")})`);
|
|
865
|
+
}
|
|
866
|
+
catch (e) {
|
|
867
|
+
const reason = e instanceof PolicyLoadError ? e.message : String(e);
|
|
868
|
+
console.error(`[auth] OMCP_RBAC_POLICY_FILE=${policyFile}: ${reason} — refusing to start (a malformed policy file would silently revert to the more permissive built-in default, defeating the point of the override)`);
|
|
869
|
+
process.exit(1);
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
// Use the engine-aware variant so tenant (session.tenant) flows into
|
|
873
|
+
// engine.evaluate() — required for tenant-conditional Rego rules
|
|
874
|
+
// (`input.tenant == "acme"` etc.) under OMCP_OPA_URL. Built-in /
|
|
875
|
+
// file-loaded engines ignore the tenant ctx, so the behaviour is
|
|
876
|
+
// unchanged for those deployments.
|
|
877
|
+
const need = (resource, action) => buildRequirePermissionFromEngine(authRuntime, resource, action, policyEngine);
|
|
878
|
+
// Management-plane audit log. Records one entry per mutating /api/*
|
|
879
|
+
// request. Writes JSONL to disk when OMCP_MGMT_AUDIT_FILE is set;
|
|
880
|
+
// otherwise an in-memory ring of the last 500 entries keeps the
|
|
881
|
+
// /api/audit endpoint useful in the demo / single-user case.
|
|
882
|
+
// External audit sinks — opt-in via env. Each chained entry is
|
|
883
|
+
// mirrored to every configured sink; the on-disk JSONL master
|
|
884
|
+
// remains the source of truth (the hash chain is never split).
|
|
885
|
+
const auditSinks = [];
|
|
886
|
+
if (process.env.OMCP_AUDIT_WEBHOOK_URL) {
|
|
887
|
+
auditSinks.push(new WebhookSink({
|
|
888
|
+
url: process.env.OMCP_AUDIT_WEBHOOK_URL,
|
|
889
|
+
token: process.env.OMCP_AUDIT_WEBHOOK_TOKEN,
|
|
890
|
+
deadLetterFile: process.env.OMCP_AUDIT_WEBHOOK_DLQ,
|
|
891
|
+
}));
|
|
892
|
+
console.log("AuditLog: webhook sink enabled -> %s%s", process.env.OMCP_AUDIT_WEBHOOK_URL, process.env.OMCP_AUDIT_WEBHOOK_DLQ
|
|
893
|
+
? ` (DLQ: ${process.env.OMCP_AUDIT_WEBHOOK_DLQ})`
|
|
894
|
+
: "");
|
|
895
|
+
}
|
|
896
|
+
const mgmtAudit = new AuditLog({
|
|
897
|
+
file: process.env.OMCP_MGMT_AUDIT_FILE,
|
|
898
|
+
sinks: auditSinks,
|
|
899
|
+
});
|
|
900
|
+
await mgmtAudit.bootstrap();
|
|
901
|
+
process.on("SIGTERM", () => {
|
|
902
|
+
mgmtAudit
|
|
903
|
+
.flushSinks()
|
|
904
|
+
.catch((err) => console.warn("AuditLog flushSinks failed:", err));
|
|
905
|
+
});
|
|
906
|
+
const audit = (resource, action) => buildAuditMiddleware({ audit: mgmtAudit, resource, action });
|
|
907
|
+
// Plugin lifecycle hook registry — populated by the loader at boot
|
|
908
|
+
// (one entry per manifest `hooks[]` entry) and mutable at runtime
|
|
909
|
+
// when a connector is installed via /api/connectors/install. Each
|
|
910
|
+
// tool dispatch in createMcpServer fans through this registry's
|
|
911
|
+
// tool_pre_invoke / tool_post_invoke chains; resource and prompt
|
|
912
|
+
// hooks plug into their respective seams as they ship.
|
|
913
|
+
const hookRegistry = new HookRegistry();
|
|
914
|
+
// Phase F15: anomaly-history sink — opt-in via
|
|
915
|
+
// OMCP_ANOMALY_HISTORY_REMOTE_WRITE. When configured, anomaly
|
|
916
|
+
// scores written via anomalyHistory.record() flush to the
|
|
917
|
+
// configured TSDB on a 10-second timer. The MCP tool
|
|
918
|
+
// get_anomaly_history queries them back via any Prometheus source
|
|
919
|
+
// pointed at the same TSDB.
|
|
920
|
+
//
|
|
921
|
+
// The detector-side hook that actually records per-anomaly scores
|
|
922
|
+
// is plumbed in F15b (it requires passing this instance into the
|
|
923
|
+
// detectAnomaliesHandler — minor surgery deferred). The
|
|
924
|
+
// infrastructure ships now so externally-written omcp_anomaly_score
|
|
925
|
+
// metrics are already queryable end-to-end.
|
|
926
|
+
const anomalyHistory = new AnomalyHistory(anomalyHistoryFromEnv());
|
|
927
|
+
anomalyHistory.start();
|
|
928
|
+
if (anomalyHistory.isEnabled()) {
|
|
929
|
+
console.log("AnomalyHistory: TSDB sink enabled (OMCP_ANOMALY_HISTORY_REMOTE_WRITE set)");
|
|
930
|
+
}
|
|
931
|
+
process.on("SIGTERM", () => {
|
|
932
|
+
void anomalyHistory.stop().catch(() => undefined);
|
|
933
|
+
});
|
|
934
|
+
// Federation registry — populated from OMCP_FEDERATION_UPSTREAMS at
|
|
935
|
+
// boot. Each upstream connects, fetches tools/list, and exposes its
|
|
936
|
+
// tools under `<prefix>.<upstream-tool-name>` on the gateway's
|
|
937
|
+
// surface. Failures are logged + the upstream is left in `degraded`
|
|
938
|
+
// (no tools) so the gateway boots regardless of upstream health.
|
|
939
|
+
const federationRegistry = new FederationRegistry();
|
|
940
|
+
for (const cfg of parseFederationEnv()) {
|
|
941
|
+
const client = new UpstreamClient({
|
|
942
|
+
name: cfg.name,
|
|
943
|
+
url: cfg.url,
|
|
944
|
+
bearerToken: cfg.bearerToken,
|
|
945
|
+
});
|
|
946
|
+
federationRegistry.add(client);
|
|
947
|
+
client.connect().catch((err) => {
|
|
948
|
+
console.warn("federation upstream %s initial connect failed: %s", cfg.name, err instanceof Error ? err.message : String(err));
|
|
949
|
+
});
|
|
950
|
+
}
|
|
951
|
+
if (federationRegistry.list().length > 0) {
|
|
952
|
+
console.log("federation: %d upstream(s) configured: %s", federationRegistry.list().length, federationRegistry.list().map((u) => `${u.name}=${u.url}`).join(", "));
|
|
953
|
+
}
|
|
954
|
+
process.on("SIGTERM", () => {
|
|
955
|
+
federationRegistry
|
|
956
|
+
.closeAll()
|
|
957
|
+
.catch((err) => console.warn("federation closeAll failed:", err));
|
|
958
|
+
});
|
|
959
|
+
// Service catalog: optional operator-curated ownership / criticality /
|
|
960
|
+
// on-call metadata, keyed on the service name list_services returns.
|
|
961
|
+
// No file ⇒ empty catalog, enrichment is a no-op (anonymous demos
|
|
962
|
+
// see no behaviour change).
|
|
963
|
+
const catalog = new CatalogStore(await readCatalogFile(process.env.OMCP_SERVICE_CATALOG_FILE));
|
|
964
|
+
// Hot-reload aware: passing the path lets `products.maybeReload()`
|
|
965
|
+
// pick up out-of-band edits to OMCP_PRODUCTS_FILE without a restart.
|
|
966
|
+
// Each /api/products* handler awaits maybeReload() before reading,
|
|
967
|
+
// so a `kubectl apply` of an updated ConfigMap or a git-ops edit is
|
|
968
|
+
// visible on the very next request.
|
|
969
|
+
const productsPath = process.env.OMCP_PRODUCTS_FILE;
|
|
970
|
+
const products = new ProductsStore(await readProductsFile(productsPath), { path: productsPath });
|
|
971
|
+
// Seed the mtime cursor from the file we just loaded so the first
|
|
972
|
+
// maybeReload() call doesn't redundantly re-parse the boot state.
|
|
973
|
+
await products.pinMtimeAfterWrite();
|
|
974
|
+
// Protected route prefixes. /api/me, /api/auth/*, /api/info,
|
|
975
|
+
// /api/openapi.json deliberately don't appear here — they stay public.
|
|
976
|
+
for (const prefix of [
|
|
977
|
+
"/api/sources",
|
|
978
|
+
"/api/source-types",
|
|
979
|
+
"/api/services",
|
|
980
|
+
"/api/health",
|
|
981
|
+
"/api/health-thresholds",
|
|
982
|
+
"/api/topology",
|
|
983
|
+
"/api/settings",
|
|
984
|
+
"/api/connectors",
|
|
985
|
+
"/api/enterprise",
|
|
986
|
+
"/api/hub",
|
|
987
|
+
"/api/audit",
|
|
988
|
+
"/api/usage",
|
|
989
|
+
"/api/catalog",
|
|
990
|
+
"/api/policy",
|
|
991
|
+
]) {
|
|
992
|
+
app.use(prefix, requireSession);
|
|
993
|
+
}
|
|
320
994
|
// k8s-convention liveness/readiness probes at the root of the path
|
|
321
995
|
// tree, no /api prefix. Helm chart points its probes here. Cheap
|
|
322
996
|
// enough to skip the request-counter middleware.
|
|
323
997
|
let ready = false;
|
|
324
998
|
app.get("/healthz", (_req, res) => res.type("text").send("ok"));
|
|
999
|
+
// Procurement-time probe: the MCP spec revisions and transports the
|
|
1000
|
+
// gateway supports. Static today — kept as a separate endpoint so a
|
|
1001
|
+
// discovery tool / RFP probe / catalog scanner can resolve our
|
|
1002
|
+
// compliance posture without sending a real MCP handshake.
|
|
1003
|
+
// See docs/mcp-conformance.md for the test suite that proves it.
|
|
1004
|
+
app.get("/api/conformance", (_req, res) => {
|
|
1005
|
+
res.json({
|
|
1006
|
+
revisions: ["2025-11-25"],
|
|
1007
|
+
transports: ["streamable-http", "stdio", "websocket"],
|
|
1008
|
+
methods: {
|
|
1009
|
+
// Methods exercised by the conformance harness. "supported"
|
|
1010
|
+
// is the union of methods that return a non -32601 envelope
|
|
1011
|
+
// for any conforming caller. Per-method spec compliance is
|
|
1012
|
+
// proven by src/conformance/mcp-2025-11-25.test.ts.
|
|
1013
|
+
supported: [
|
|
1014
|
+
"initialize",
|
|
1015
|
+
"notifications/initialized",
|
|
1016
|
+
"ping",
|
|
1017
|
+
"tools/list",
|
|
1018
|
+
"tools/call",
|
|
1019
|
+
],
|
|
1020
|
+
optional: [
|
|
1021
|
+
"resources/list",
|
|
1022
|
+
"resources/read",
|
|
1023
|
+
"prompts/list",
|
|
1024
|
+
"prompts/get",
|
|
1025
|
+
"logging/setLevel",
|
|
1026
|
+
],
|
|
1027
|
+
},
|
|
1028
|
+
harnessPath: "mcp-server/src/conformance/mcp-2025-11-25.test.ts",
|
|
1029
|
+
docs: "docs/mcp-conformance.md",
|
|
1030
|
+
});
|
|
1031
|
+
});
|
|
325
1032
|
app.get("/readyz", (_req, res) => {
|
|
326
1033
|
if (ready)
|
|
327
1034
|
return res.type("text").send("ok");
|
|
@@ -344,11 +1051,33 @@ async function main() {
|
|
|
344
1051
|
// Serve Web UI
|
|
345
1052
|
app.use(express.static(join(__dirname, "ui")));
|
|
346
1053
|
// --- API endpoints for Web UI ---
|
|
347
|
-
// List sources with health status
|
|
348
|
-
|
|
1054
|
+
// List sources with health status — tenant-scoped.
|
|
1055
|
+
// Non-admin callers see only their own tenant's sources + globals
|
|
1056
|
+
// (untagged). Admins (users:delete) see everything, with optional
|
|
1057
|
+
// ?tenant=acme drill-down. Anonymous mode (no session) sees
|
|
1058
|
+
// everything — preserves single-tenant default. The `tenant` field
|
|
1059
|
+
// is included on every entry so the UI can render scope badges.
|
|
1060
|
+
app.get("/api/sources", async (req, res) => {
|
|
1061
|
+
const sess = req.session;
|
|
1062
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
1063
|
+
const callerTenant = sess?.tenant || "default";
|
|
1064
|
+
const requestedTenant = qstr(req.query.tenant);
|
|
349
1065
|
const health = await registry.healthCheckAll();
|
|
350
1066
|
const configs = registry.getSourceConfigs();
|
|
351
|
-
const
|
|
1067
|
+
const filtered = configs.filter((c) => {
|
|
1068
|
+
// Anonymous: every source.
|
|
1069
|
+
if (!sess)
|
|
1070
|
+
return true;
|
|
1071
|
+
// Admin with ?tenant=X drill-down: untagged + that tenant.
|
|
1072
|
+
if (isAdmin && requestedTenant)
|
|
1073
|
+
return !c.tenant || c.tenant === requestedTenant;
|
|
1074
|
+
// Admin no filter: every source (cross-tenant view).
|
|
1075
|
+
if (isAdmin)
|
|
1076
|
+
return true;
|
|
1077
|
+
// Non-admin: own tenant + untagged.
|
|
1078
|
+
return !c.tenant || c.tenant === callerTenant;
|
|
1079
|
+
});
|
|
1080
|
+
const sources = filtered.map((c) => {
|
|
352
1081
|
const connector = registry.getByName(c.name);
|
|
353
1082
|
return {
|
|
354
1083
|
name: c.name,
|
|
@@ -357,6 +1086,7 @@ async function main() {
|
|
|
357
1086
|
enabled: c.enabled,
|
|
358
1087
|
auth: c.auth ? { type: c.auth.type } : undefined,
|
|
359
1088
|
tls: c.tls || undefined,
|
|
1089
|
+
tenant: c.tenant,
|
|
360
1090
|
signalType: connector?.signalType || null,
|
|
361
1091
|
status: health[c.name]?.status || (c.enabled ? "down" : "disabled"),
|
|
362
1092
|
latencyMs: health[c.name]?.latencyMs || null,
|
|
@@ -369,6 +1099,15 @@ async function main() {
|
|
|
369
1099
|
app.get("/api/source-types", (_req, res) => {
|
|
370
1100
|
res.json(getSupportedTypes());
|
|
371
1101
|
});
|
|
1102
|
+
// Get the registry of MCP tools the server can advertise (name +
|
|
1103
|
+
// category + one-line summary). The Products modal uses this to
|
|
1104
|
+
// populate the tools-allowlist picker so a typo can't happen at
|
|
1105
|
+
// authoring time; the server-side typo guard (PR #343) stays as
|
|
1106
|
+
// defence-in-depth. Open to every viewer — there's nothing
|
|
1107
|
+
// sensitive in the catalogue, it's just static metadata.
|
|
1108
|
+
app.get("/api/tools/registry", (_req, res) => {
|
|
1109
|
+
res.json({ tools: REGISTERED_TOOLS });
|
|
1110
|
+
});
|
|
372
1111
|
// Server info — version, loaded plugins, MCP protocol version, build metadata.
|
|
373
1112
|
// Used by the Web UI footer and by operators to confirm what's deployed.
|
|
374
1113
|
app.get("/api/info", async (_req, res) => {
|
|
@@ -387,6 +1126,23 @@ async function main() {
|
|
|
387
1126
|
platform: process.platform,
|
|
388
1127
|
arch: process.arch,
|
|
389
1128
|
},
|
|
1129
|
+
// Governance posture — surfaces the active management-plane
|
|
1130
|
+
// configuration so external dashboards / discovery probes don't
|
|
1131
|
+
// need a session to learn the deployment shape. Booleans only;
|
|
1132
|
+
// file paths and the session secret stay private.
|
|
1133
|
+
governance: {
|
|
1134
|
+
authMode: authRuntime.mode,
|
|
1135
|
+
authSecretEphemeral: !!authRuntime.secretEphemeral,
|
|
1136
|
+
// OIDC issuer (URL only — never the client_secret) is the
|
|
1137
|
+
// single piece of state external discovery needs to know
|
|
1138
|
+
// *where* the IdP lives. Empty string when mode != "oidc".
|
|
1139
|
+
oidcIssuer: oidcRuntime?.cfg.issuer ?? "",
|
|
1140
|
+
auditPersisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
|
|
1141
|
+
catalogConfigured: catalog.count() > 0 || !!process.env.OMCP_SERVICE_CATALOG_FILE,
|
|
1142
|
+
redaction: REDACTION_ENABLED,
|
|
1143
|
+
trustProxy: !!(process.env.OMCP_TRUST_PROXY && process.env.OMCP_TRUST_PROXY !== "false"),
|
|
1144
|
+
toolRatePerMin: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
|
|
1145
|
+
},
|
|
390
1146
|
plugins: loader.list().map((p) => ({
|
|
391
1147
|
name: p.name,
|
|
392
1148
|
source: p.source,
|
|
@@ -395,6 +1151,596 @@ async function main() {
|
|
|
395
1151
|
})),
|
|
396
1152
|
});
|
|
397
1153
|
});
|
|
1154
|
+
// Same per-IP cap for /api/me and the auth endpoints — the UI polls
|
|
1155
|
+
// this on every page load to decide whether to show the login modal,
|
|
1156
|
+
// so a 20/min limit per IP is generous for humans and tight for
|
|
1157
|
+
// scripted abuse.
|
|
1158
|
+
const authReadRateLimit = rateLimit({
|
|
1159
|
+
windowMs: 60_000,
|
|
1160
|
+
max: 60,
|
|
1161
|
+
standardHeaders: true,
|
|
1162
|
+
legacyHeaders: false,
|
|
1163
|
+
message: { error: "rate limited" },
|
|
1164
|
+
});
|
|
1165
|
+
// Current identity for the management plane. Always public so the UI
|
|
1166
|
+
// can decide whether to show a login modal even before sending its
|
|
1167
|
+
// first authenticated request.
|
|
1168
|
+
app.get("/api/me", authReadRateLimit, (req, res) => {
|
|
1169
|
+
if (authRuntime.mode === "anonymous") {
|
|
1170
|
+
res.json({ authenticated: false, mode: "anonymous" });
|
|
1171
|
+
return;
|
|
1172
|
+
}
|
|
1173
|
+
const sess = req.session;
|
|
1174
|
+
if (!sess) {
|
|
1175
|
+
res.json({ authenticated: false, mode: authRuntime.mode });
|
|
1176
|
+
return;
|
|
1177
|
+
}
|
|
1178
|
+
res.json({
|
|
1179
|
+
authenticated: true,
|
|
1180
|
+
mode: authRuntime.mode,
|
|
1181
|
+
user: {
|
|
1182
|
+
sub: sess.sub,
|
|
1183
|
+
name: sess.name,
|
|
1184
|
+
email: sess.email,
|
|
1185
|
+
tenant: sess.tenant || "default",
|
|
1186
|
+
roles: sess.roles ?? [],
|
|
1187
|
+
},
|
|
1188
|
+
permissions: listGrantedPermissions(sess.roles, policyEngineToMap(policyEngine)),
|
|
1189
|
+
exp: sess.exp,
|
|
1190
|
+
// When the user signed in via OIDC, surface the IdP issuer
|
|
1191
|
+
// URL so the UI can render an appropriate badge or link to
|
|
1192
|
+
// an IdP-side profile page. Empty / absent in basic mode.
|
|
1193
|
+
idpIssuer: authRuntime.mode === "oidc" ? (oidcRuntime?.cfg.issuer ?? "") : undefined,
|
|
1194
|
+
});
|
|
1195
|
+
});
|
|
1196
|
+
// --- /api/policy — read-only view of the RBAC policy in effect -------
|
|
1197
|
+
// Useful when an operator is debugging "why did role X get a 403" and
|
|
1198
|
+
// doesn't have a checkout to read DEFAULT_POLICY from source. Gated
|
|
1199
|
+
// by admin-only delete-on-users so the policy schema isn't visible
|
|
1200
|
+
// to non-admin sessions.
|
|
1201
|
+
app.get("/api/policy", need("users", "delete"), (req, res) => {
|
|
1202
|
+
const map = policyEngineToMap(policyEngine);
|
|
1203
|
+
// The OPA engine's kind() is prefixed `opa:` (see opa.ts:198).
|
|
1204
|
+
// Surface a `tenantAware` boolean so operators can confirm at a
|
|
1205
|
+
// glance whether the active engine honours session.tenant in
|
|
1206
|
+
// .evaluate() — the BuiltinPolicyEngine ignores tenant ctx; OPA
|
|
1207
|
+
// threads it into the Rego input. This is the property required
|
|
1208
|
+
// for `allow { input.tenant == "acme" }` rules to actually fire.
|
|
1209
|
+
const tenantAware = policyEngine.kind().startsWith("opa:");
|
|
1210
|
+
// Optional dry-run: ?roles=admin,operator&resource=sources&action=delete[&tenant=acme]
|
|
1211
|
+
// returns { allowed, reason } so operators can probe the active
|
|
1212
|
+
// engine without writing tests against a checkout. Tenant defaults
|
|
1213
|
+
// to the caller's session tenant; an admin can override via the
|
|
1214
|
+
// ?tenant= query string to probe verdicts for any tenant.
|
|
1215
|
+
const q = req.query;
|
|
1216
|
+
if (q.resource && q.action) {
|
|
1217
|
+
const dryRoles = typeof q.roles === "string" ? q.roles.split(",").map((r) => r.trim()).filter(Boolean) : undefined;
|
|
1218
|
+
// Validate the probe values against the active vocabulary so
|
|
1219
|
+
// an operator typo doesn't get a misleading "allowed:false
|
|
1220
|
+
// reason: roles do not grant <typo>" reply.
|
|
1221
|
+
if (!VALID_RESOURCES.has(q.resource)) {
|
|
1222
|
+
res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown resource '${q.resource}' (valid: ${[...VALID_RESOURCES].join(", ")})` } });
|
|
1223
|
+
return;
|
|
1224
|
+
}
|
|
1225
|
+
if (!VALID_ACTIONS.has(q.action)) {
|
|
1226
|
+
res.json({ dryRun: { roles: dryRoles ?? [], resource: q.resource, action: q.action, allowed: false, reason: `unknown action '${q.action}' (valid: ${[...VALID_ACTIONS].join(", ")})` } });
|
|
1227
|
+
return;
|
|
1228
|
+
}
|
|
1229
|
+
const callerSess = req.session;
|
|
1230
|
+
// Tenant resolution: explicit ?tenant= override wins, else the
|
|
1231
|
+
// caller's session tenant. The probe runs at users:delete (admin),
|
|
1232
|
+
// so a cross-tenant override is intentional — exactly how an
|
|
1233
|
+
// operator debugs "why doesn't my tenant-conditional Rego rule
|
|
1234
|
+
// fire for tenant Acme?".
|
|
1235
|
+
const probeTenant = typeof q.tenant === "string" && q.tenant
|
|
1236
|
+
? q.tenant.trim()
|
|
1237
|
+
: callerSess?.tenant;
|
|
1238
|
+
const result = policyEngine.evaluate(dryRoles, q.resource, q.action, probeTenant ? { tenant: probeTenant } : undefined);
|
|
1239
|
+
res.json({
|
|
1240
|
+
dryRun: {
|
|
1241
|
+
roles: dryRoles ?? [],
|
|
1242
|
+
resource: q.resource,
|
|
1243
|
+
action: q.action,
|
|
1244
|
+
tenant: probeTenant,
|
|
1245
|
+
...result,
|
|
1246
|
+
},
|
|
1247
|
+
});
|
|
1248
|
+
return;
|
|
1249
|
+
}
|
|
1250
|
+
res.json({
|
|
1251
|
+
engine: policyEngine.kind(),
|
|
1252
|
+
tenantAware,
|
|
1253
|
+
policy: map,
|
|
1254
|
+
roles: policyEngine.roles(),
|
|
1255
|
+
note: policyEngine.kind() === "builtin"
|
|
1256
|
+
? "DEFAULT_POLICY shipped with this build. Set OMCP_RBAC_POLICY_FILE to override."
|
|
1257
|
+
: `policy loaded from ${policyEngine.kind()}; restart to reload.`,
|
|
1258
|
+
});
|
|
1259
|
+
});
|
|
1260
|
+
// Phase F16: batch policy dry-run. Evaluates every
|
|
1261
|
+
// (subject × resource × action) cell against the active engine and
|
|
1262
|
+
// returns a matrix the UI heat-map renders. Gated identically to
|
|
1263
|
+
// the single-call dry-run on GET /api/policy. Capped at 100×100×10
|
|
1264
|
+
// cells per request — a single OPA query per cell is cheap on the
|
|
1265
|
+
// BuiltinPolicyEngine but a careless caller could hose an external
|
|
1266
|
+
// OPA, so the limit fences that. Operators get CSV via
|
|
1267
|
+
// Accept: text/csv for ticket attachments.
|
|
1268
|
+
app.post("/api/policy/dry-run-batch", need("users", "delete"), audit("policy", "read"), async (req, res) => {
|
|
1269
|
+
const body = (req.body ?? {});
|
|
1270
|
+
const subjects = Array.isArray(body.subjects) ? body.subjects : [];
|
|
1271
|
+
const resources = Array.isArray(body.resources) ? body.resources : [];
|
|
1272
|
+
const actions = Array.isArray(body.actions) ? body.actions : [];
|
|
1273
|
+
const result = await evaluateBatch(policyEngine, { subjects, resources, actions }, VALID_RESOURCES, VALID_ACTIONS);
|
|
1274
|
+
if (req.headers["accept"]?.toString().includes("text/csv")) {
|
|
1275
|
+
res.type("text/csv").send(batchResultToCsv(result));
|
|
1276
|
+
return;
|
|
1277
|
+
}
|
|
1278
|
+
res.json(result);
|
|
1279
|
+
});
|
|
1280
|
+
// --- /api/subjects — aggregated principals catalogue ------------------
|
|
1281
|
+
// The third k8s-shaped RBAC view: who the deployment knows about.
|
|
1282
|
+
// Three independent sources, returned in three independent arrays so
|
|
1283
|
+
// the UI can table each section separately:
|
|
1284
|
+
// - users : OMCP_USERS_FILE (basic-mode local users). Password
|
|
1285
|
+
// hashes are never returned.
|
|
1286
|
+
// - apiKeys : OMCP_API_KEYS names (the bearer-token catalogue).
|
|
1287
|
+
// Tokens are never returned; only metadata (tenant,
|
|
1288
|
+
// bound product, source allow-list, bypass flag).
|
|
1289
|
+
// - oidcGroups: keys of OMCP_OIDC_ROLE_MAP — every group the
|
|
1290
|
+
// operator has explicitly mapped to an OMCP role.
|
|
1291
|
+
// Runtime-only groups (claims that arrive without an
|
|
1292
|
+
// OMCP-side mapping) are skipped on purpose; they
|
|
1293
|
+
// produce no roles by definition.
|
|
1294
|
+
// Gated identically to /api/policy.
|
|
1295
|
+
app.get("/api/subjects", need("users", "delete"), async (_req, res) => {
|
|
1296
|
+
// Local users.
|
|
1297
|
+
const usersOut = [];
|
|
1298
|
+
if (process.env.OMCP_USERS_FILE) {
|
|
1299
|
+
try {
|
|
1300
|
+
const f = await readUsersFile(process.env.OMCP_USERS_FILE);
|
|
1301
|
+
if (f && Array.isArray(f.users)) {
|
|
1302
|
+
for (const u of f.users) {
|
|
1303
|
+
usersOut.push({
|
|
1304
|
+
username: u.username,
|
|
1305
|
+
name: u.name,
|
|
1306
|
+
roles: u.roles ? u.roles.slice() : [],
|
|
1307
|
+
tenant: u.tenant || "default",
|
|
1308
|
+
});
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
catch (e) {
|
|
1313
|
+
// Read failures don't 500 the whole endpoint — surface an
|
|
1314
|
+
// empty users array; admins can check the boot log for the
|
|
1315
|
+
// file-load diagnostic.
|
|
1316
|
+
console.warn(`[/api/subjects] readUsersFile failed: ${e.message}`);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
// API key credentials (tokens stripped).
|
|
1320
|
+
const apiKeysOut = [];
|
|
1321
|
+
for (const c of loadCredentials()) {
|
|
1322
|
+
apiKeysOut.push({
|
|
1323
|
+
name: c.name,
|
|
1324
|
+
tenant: c.tenant || "default",
|
|
1325
|
+
productId: c.productId,
|
|
1326
|
+
bypassRedaction: !!c.bypassRedaction,
|
|
1327
|
+
allowedSources: c.allowedSources,
|
|
1328
|
+
});
|
|
1329
|
+
}
|
|
1330
|
+
// OIDC groups → role mappings.
|
|
1331
|
+
const oidcGroupsOut = [];
|
|
1332
|
+
const roleMapRaw = process.env.OMCP_OIDC_ROLE_MAP;
|
|
1333
|
+
if (roleMapRaw) {
|
|
1334
|
+
try {
|
|
1335
|
+
const parsed = JSON.parse(roleMapRaw);
|
|
1336
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
1337
|
+
for (const [claim, role] of Object.entries(parsed)) {
|
|
1338
|
+
if (typeof role === "string" && claim) {
|
|
1339
|
+
oidcGroupsOut.push({ claim, role });
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
catch {
|
|
1345
|
+
// The OIDC runtime already rejects an invalid role map at
|
|
1346
|
+
// boot — if parsing fails here it's almost certainly a
|
|
1347
|
+
// transient state during config reload. Surface empty.
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
res.json({
|
|
1351
|
+
users: usersOut,
|
|
1352
|
+
apiKeys: apiKeysOut,
|
|
1353
|
+
oidcGroups: oidcGroupsOut,
|
|
1354
|
+
// Surface which env vars actually drive each list so an
|
|
1355
|
+
// admin diagnosing "where is my user?" sees the source path
|
|
1356
|
+
// without having to read the deploy.
|
|
1357
|
+
sources: {
|
|
1358
|
+
users: process.env.OMCP_USERS_FILE || null,
|
|
1359
|
+
apiKeys: process.env.OMCP_API_KEYS ? "OMCP_API_KEYS" : null,
|
|
1360
|
+
oidcGroups: process.env.OMCP_OIDC_ROLE_MAP ? "OMCP_OIDC_ROLE_MAP" : null,
|
|
1361
|
+
},
|
|
1362
|
+
});
|
|
1363
|
+
});
|
|
1364
|
+
// Update a user's roles. Today this is the only binding-shape that
|
|
1365
|
+
// OMCP can actually mutate at runtime: api-key roles aren't stored
|
|
1366
|
+
// anywhere (creds carry sources / tenant / product but not roles),
|
|
1367
|
+
// and OIDC group → role mappings come from OMCP_OIDC_ROLE_MAP which
|
|
1368
|
+
// is read once at boot. The Bindings UI surface api-key + oidc rows
|
|
1369
|
+
// explain the env-source path instead of offering an edit affordance.
|
|
1370
|
+
app.put("/api/users/:username/roles", need("users", "delete"), audit("users", "write"), async (req, res) => {
|
|
1371
|
+
const username = String(req.params.username);
|
|
1372
|
+
const path = process.env.OMCP_USERS_FILE;
|
|
1373
|
+
if (!path) {
|
|
1374
|
+
res.status(409).json({ error: "OMCP_USERS_FILE is not configured — basic-mode user roles can't be edited via the API." });
|
|
1375
|
+
return;
|
|
1376
|
+
}
|
|
1377
|
+
const body = req.body;
|
|
1378
|
+
if (!body || !Array.isArray(body.roles) || !body.roles.every((r) => typeof r === "string")) {
|
|
1379
|
+
res.status(400).json({ error: "body must include { roles: string[] }" });
|
|
1380
|
+
return;
|
|
1381
|
+
}
|
|
1382
|
+
const requestedRoles = body.roles;
|
|
1383
|
+
// Reject role names not in the active policy engine's catalogue —
|
|
1384
|
+
// assigning a user a role that grants nothing is almost always a
|
|
1385
|
+
// typo, not intent. Same defence-in-depth posture as the products
|
|
1386
|
+
// typo guard (PR #343).
|
|
1387
|
+
const knownRoles = new Set(policyEngine.roles());
|
|
1388
|
+
const unknown = requestedRoles.filter((r) => !knownRoles.has(r));
|
|
1389
|
+
if (unknown.length > 0) {
|
|
1390
|
+
res.status(422).json({
|
|
1391
|
+
error: `unknown role name(s) for user '${username}': ${unknown.join(", ")}`,
|
|
1392
|
+
code: "OMCP_USER_UNKNOWN_ROLE",
|
|
1393
|
+
unknown,
|
|
1394
|
+
available: Array.from(knownRoles),
|
|
1395
|
+
});
|
|
1396
|
+
return;
|
|
1397
|
+
}
|
|
1398
|
+
const file = await readUsersFile(path);
|
|
1399
|
+
if (!file) {
|
|
1400
|
+
res.status(404).json({ error: `users file at ${path} is unreadable or empty` });
|
|
1401
|
+
return;
|
|
1402
|
+
}
|
|
1403
|
+
const idx = file.users.findIndex((u) => u.username === username);
|
|
1404
|
+
if (idx < 0) {
|
|
1405
|
+
res.status(404).json({ error: `user '${username}' not found` });
|
|
1406
|
+
return;
|
|
1407
|
+
}
|
|
1408
|
+
file.users[idx].roles = requestedRoles;
|
|
1409
|
+
try {
|
|
1410
|
+
await writeUsersFile(path, file);
|
|
1411
|
+
}
|
|
1412
|
+
catch (e) {
|
|
1413
|
+
res.status(500).json({ error: `failed to persist users file: ${e.message}` });
|
|
1414
|
+
return;
|
|
1415
|
+
}
|
|
1416
|
+
// Refresh the in-memory store so the next login picks up the new
|
|
1417
|
+
// role set without a server restart. maybeReloadUsers stat()s the
|
|
1418
|
+
// file's mtime, which we just bumped via the atomic rename.
|
|
1419
|
+
await maybeReloadUsers();
|
|
1420
|
+
res.json({ ok: true, username, roles: requestedRoles });
|
|
1421
|
+
});
|
|
1422
|
+
// Upsert a role in the file-backed RBAC policy. File engine only:
|
|
1423
|
+
// built-in defaults are immutable in source; OPA is the Rego
|
|
1424
|
+
// source of truth. The UI hides the affordance under non-file
|
|
1425
|
+
// engines via the [data-engine-required="file"] CSS gate; the
|
|
1426
|
+
// endpoint enforces the rule too for defence-in-depth.
|
|
1427
|
+
app.put("/api/policy/roles/:name", need("users", "delete"), audit("users", "write"), async (req, res) => {
|
|
1428
|
+
const name = String(req.params.name);
|
|
1429
|
+
// Reject names with shell-unfriendly characters early so the
|
|
1430
|
+
// YAML round-trip can't accidentally produce an exotic key.
|
|
1431
|
+
if (!/^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$/.test(name)) {
|
|
1432
|
+
res.status(400).json({ error: `role name '${name}' must match [A-Za-z0-9][A-Za-z0-9._-]{0,63}` });
|
|
1433
|
+
return;
|
|
1434
|
+
}
|
|
1435
|
+
const policyFile = process.env.OMCP_RBAC_POLICY_FILE?.trim();
|
|
1436
|
+
if (!policyEngine.kind().startsWith("file:")) {
|
|
1437
|
+
// Built-in (immutable source) or OPA (Rego is the source of
|
|
1438
|
+
// truth) — role authoring isn't available. Return distinct
|
|
1439
|
+
// error codes so the UI can show the right hint without
|
|
1440
|
+
// string-matching the message.
|
|
1441
|
+
const code = policyEngine.kind() === "builtin"
|
|
1442
|
+
? "OMCP_POLICY_ENGINE_BUILTIN"
|
|
1443
|
+
: policyEngine.kind().startsWith("opa:")
|
|
1444
|
+
? "OMCP_POLICY_ENGINE_OPA"
|
|
1445
|
+
: "OMCP_POLICY_ENGINE_NOT_FILE";
|
|
1446
|
+
res.status(409).json({
|
|
1447
|
+
error: `role authoring requires the file engine — current is '${policyEngine.kind()}'`,
|
|
1448
|
+
code,
|
|
1449
|
+
});
|
|
1450
|
+
return;
|
|
1451
|
+
}
|
|
1452
|
+
if (!policyFile) {
|
|
1453
|
+
res.status(409).json({
|
|
1454
|
+
error: "OMCP_RBAC_POLICY_FILE is not configured — role authoring writes through that file.",
|
|
1455
|
+
code: "OMCP_POLICY_FILE_NOT_SET",
|
|
1456
|
+
});
|
|
1457
|
+
return;
|
|
1458
|
+
}
|
|
1459
|
+
const body = req.body;
|
|
1460
|
+
if (!body || !Array.isArray(body.permissions)) {
|
|
1461
|
+
res.status(400).json({ error: "body must include { permissions: [{resource, action}] }" });
|
|
1462
|
+
return;
|
|
1463
|
+
}
|
|
1464
|
+
const cleanPerms = [];
|
|
1465
|
+
for (let i = 0; i < body.permissions.length; i++) {
|
|
1466
|
+
const p = body.permissions[i];
|
|
1467
|
+
if (!p || typeof p !== "object" || typeof p.resource !== "string" || typeof p.action !== "string") {
|
|
1468
|
+
res.status(400).json({ error: `body.permissions[${i}] must be { resource: string, action: string }` });
|
|
1469
|
+
return;
|
|
1470
|
+
}
|
|
1471
|
+
if (!VALID_RESOURCES.has(p.resource)) {
|
|
1472
|
+
res.status(422).json({
|
|
1473
|
+
error: `unknown resource '${p.resource}'`,
|
|
1474
|
+
code: "OMCP_POLICY_UNKNOWN_RESOURCE",
|
|
1475
|
+
unknown: p.resource,
|
|
1476
|
+
available: [...VALID_RESOURCES],
|
|
1477
|
+
});
|
|
1478
|
+
return;
|
|
1479
|
+
}
|
|
1480
|
+
if (!VALID_ACTIONS.has(p.action)) {
|
|
1481
|
+
res.status(422).json({
|
|
1482
|
+
error: `unknown action '${p.action}'`,
|
|
1483
|
+
code: "OMCP_POLICY_UNKNOWN_ACTION",
|
|
1484
|
+
unknown: p.action,
|
|
1485
|
+
available: [...VALID_ACTIONS],
|
|
1486
|
+
});
|
|
1487
|
+
return;
|
|
1488
|
+
}
|
|
1489
|
+
cleanPerms.push({ resource: p.resource, action: p.action });
|
|
1490
|
+
}
|
|
1491
|
+
// De-duplicate exact (resource, action) pairs so the file
|
|
1492
|
+
// doesn't accumulate redundant entries via re-saves.
|
|
1493
|
+
const seen = new Set();
|
|
1494
|
+
const dedup = [];
|
|
1495
|
+
for (const p of cleanPerms) {
|
|
1496
|
+
const k = p.resource + ":" + p.action;
|
|
1497
|
+
if (seen.has(k))
|
|
1498
|
+
continue;
|
|
1499
|
+
seen.add(k);
|
|
1500
|
+
dedup.push(p);
|
|
1501
|
+
}
|
|
1502
|
+
// Snapshot the existing map (via raw()) and overlay the upsert.
|
|
1503
|
+
// BuiltinPolicyEngine is the only kind that reaches here per the
|
|
1504
|
+
// checks above.
|
|
1505
|
+
const current = {};
|
|
1506
|
+
if (policyEngine instanceof BuiltinPolicyEngine) {
|
|
1507
|
+
for (const [r, ps] of Object.entries(policyEngine.raw())) {
|
|
1508
|
+
current[r] = ps.slice();
|
|
1509
|
+
}
|
|
1510
|
+
}
|
|
1511
|
+
current[name] = dedup;
|
|
1512
|
+
try {
|
|
1513
|
+
await writePolicyFile(policyFile, current);
|
|
1514
|
+
}
|
|
1515
|
+
catch (e) {
|
|
1516
|
+
if (e instanceof PolicyLoadError) {
|
|
1517
|
+
res.status(422).json({ error: e.message });
|
|
1518
|
+
return;
|
|
1519
|
+
}
|
|
1520
|
+
res.status(500).json({ error: `failed to persist policy: ${e.message}` });
|
|
1521
|
+
return;
|
|
1522
|
+
}
|
|
1523
|
+
// Hot-swap the in-memory engine so the next gate evaluation
|
|
1524
|
+
// picks up the new role without a restart. `replace()` mutates
|
|
1525
|
+
// in-place, so existing middleware closures over `policyEngine`
|
|
1526
|
+
// see the new map immediately.
|
|
1527
|
+
if (policyEngine instanceof BuiltinPolicyEngine) {
|
|
1528
|
+
const fresh = loadPolicyFromFile(policyFile);
|
|
1529
|
+
if (fresh instanceof BuiltinPolicyEngine) {
|
|
1530
|
+
policyEngine.replace(fresh.raw());
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
res.json({ ok: true, name, permissions: dedup });
|
|
1534
|
+
});
|
|
1535
|
+
// --- /api/audit — management-plane audit feed -------------------------
|
|
1536
|
+
// Read-only, gated by the "audit:read" permission so only viewers /
|
|
1537
|
+
// operators / admins (basically anyone authenticated in the default
|
|
1538
|
+
// policy) can pull it. Supports optional ?from, ?to (RFC-3339), ?actor,
|
|
1539
|
+
// ?action, ?limit (default 100, capped to ring size).
|
|
1540
|
+
app.get("/api/audit", need("audit", "read"), (req, res) => {
|
|
1541
|
+
// Tenant scoping: a non-admin caller (no `users:delete`) sees
|
|
1542
|
+
// only their own tenant's entries. Admins see everything by
|
|
1543
|
+
// default but can ?tenant=acme to filter. This avoids leaking
|
|
1544
|
+
// other tenants' actor / target / path bytes through the audit
|
|
1545
|
+
// surface — the chain-hash protected ground truth is still
|
|
1546
|
+
// process-wide; the API view is per-tenant.
|
|
1547
|
+
const sess = req.session;
|
|
1548
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
1549
|
+
const callerTenant = sess?.tenant || "default";
|
|
1550
|
+
const requestedTenant = qstr(req.query.tenant);
|
|
1551
|
+
const tenantFilter = isAdmin ? requestedTenant : callerTenant;
|
|
1552
|
+
const entries = mgmtAudit.list({
|
|
1553
|
+
from: qstr(req.query.from),
|
|
1554
|
+
to: qstr(req.query.to),
|
|
1555
|
+
actor: qstr(req.query.actor),
|
|
1556
|
+
action: qstr(req.query.action),
|
|
1557
|
+
tenant: tenantFilter || undefined,
|
|
1558
|
+
limit: qstr(req.query.limit) ? parseInt(qstr(req.query.limit), 10) : undefined,
|
|
1559
|
+
});
|
|
1560
|
+
res.json({
|
|
1561
|
+
entries,
|
|
1562
|
+
tipHash: mgmtAudit.tipHash,
|
|
1563
|
+
persisted: !!process.env.OMCP_MGMT_AUDIT_FILE,
|
|
1564
|
+
// Tell the UI which tenant scope the view is currently showing
|
|
1565
|
+
// so a cross-tenant admin sees an explicit "(all tenants)" hint.
|
|
1566
|
+
scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
|
|
1567
|
+
});
|
|
1568
|
+
});
|
|
1569
|
+
// --- /api/usage — per-identity MCP rate-limit snapshot -----------------
|
|
1570
|
+
// Read-only view of the IdentityRateLimiter's bucket state. Gated by
|
|
1571
|
+
// need("audit","read") — the same role set that already sees the
|
|
1572
|
+
// audit log can see who is calling what. Anonymous /mcp traffic
|
|
1573
|
+
// never enters a bucket so it doesn't show up here.
|
|
1574
|
+
app.get("/api/usage", need("audit", "read"), (req, res) => {
|
|
1575
|
+
const sess = req.session;
|
|
1576
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
1577
|
+
const callerTenant = sess?.tenant || "default";
|
|
1578
|
+
const requestedTenant = qstr(req.query.tenant);
|
|
1579
|
+
const tenantFilter = isAdmin ? requestedTenant : callerTenant;
|
|
1580
|
+
const actorFilter = qstr(req.query.actor);
|
|
1581
|
+
// Union of identities known to either tracker. The tracker keys
|
|
1582
|
+
// are composite "<tenant> <name>"; we split them back out for the
|
|
1583
|
+
// response shape so the UI sees clean tenant + actor columns.
|
|
1584
|
+
const idSet = new Set([
|
|
1585
|
+
...toolRateLimiter.knownIdentities(),
|
|
1586
|
+
...tokenBudget.knownIdentities(),
|
|
1587
|
+
]);
|
|
1588
|
+
const now = Date.now();
|
|
1589
|
+
const identities = [...idSet]
|
|
1590
|
+
.map((id) => {
|
|
1591
|
+
const split = splitIdentityKey(id);
|
|
1592
|
+
if (tenantFilter && split.tenant !== tenantFilter)
|
|
1593
|
+
return null;
|
|
1594
|
+
if (actorFilter && split.actor !== actorFilter)
|
|
1595
|
+
return null;
|
|
1596
|
+
const r = toolRateLimiter.inspect(id, now);
|
|
1597
|
+
const b = tokenBudget.inspect(id, now);
|
|
1598
|
+
return {
|
|
1599
|
+
actor: split.actor,
|
|
1600
|
+
tenant: split.tenant,
|
|
1601
|
+
count: r.count,
|
|
1602
|
+
limit: r.limit,
|
|
1603
|
+
windowMs: r.windowMs,
|
|
1604
|
+
tokens: { used: b.used, limit: b.limit, windowMs: b.windowMs },
|
|
1605
|
+
};
|
|
1606
|
+
})
|
|
1607
|
+
.filter((x) => x !== null);
|
|
1608
|
+
res.json({
|
|
1609
|
+
identities,
|
|
1610
|
+
defaultLimit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
|
|
1611
|
+
windowMs: 60_000,
|
|
1612
|
+
tokens: {
|
|
1613
|
+
defaultLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
|
|
1614
|
+
windowMs: 24 * 60 * 60 * 1000,
|
|
1615
|
+
},
|
|
1616
|
+
// Same scoping breadcrumb /api/audit returns: which tenant
|
|
1617
|
+
// window the response is showing. null = "all tenants" (admin).
|
|
1618
|
+
scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
|
|
1619
|
+
});
|
|
1620
|
+
});
|
|
1621
|
+
// --- /api/auth/* — login + logout for basic mode -----------------------
|
|
1622
|
+
// Login: POST { username, password } → 200 + Set-Cookie on success, 401
|
|
1623
|
+
// on bad creds, 400 on missing fields, 503 in anonymous mode (the UI
|
|
1624
|
+
// shouldn't have rendered the modal at all in that case but we still
|
|
1625
|
+
// answer cleanly). Logout: POST → 204 + clears the cookie.
|
|
1626
|
+
const loginRateLimit = rateLimit({
|
|
1627
|
+
windowMs: 60_000,
|
|
1628
|
+
max: 20,
|
|
1629
|
+
standardHeaders: true,
|
|
1630
|
+
legacyHeaders: false,
|
|
1631
|
+
message: { error: "too many login attempts, slow down" },
|
|
1632
|
+
});
|
|
1633
|
+
// Cached users-file mtime — on every login we stat the file and
|
|
1634
|
+
// re-read when it's changed since the last check. Adding/removing
|
|
1635
|
+
// a user therefore takes effect on the next login attempt, no server
|
|
1636
|
+
// restart required. Cheap path: a single stat() per attempt; the
|
|
1637
|
+
// rate limit caps that at 20/min/IP anyway.
|
|
1638
|
+
let lastUsersMtimeMs = null;
|
|
1639
|
+
async function maybeReloadUsers() {
|
|
1640
|
+
const path = process.env.OMCP_USERS_FILE;
|
|
1641
|
+
if (!path)
|
|
1642
|
+
return;
|
|
1643
|
+
try {
|
|
1644
|
+
const { stat } = await import("node:fs/promises");
|
|
1645
|
+
const st = await stat(path);
|
|
1646
|
+
const mtime = st.mtimeMs;
|
|
1647
|
+
if (lastUsersMtimeMs === null || mtime !== lastUsersMtimeMs) {
|
|
1648
|
+
const fresh = await readUsersFile(path);
|
|
1649
|
+
if (fresh && fresh.users.length > 0) {
|
|
1650
|
+
usersStore = fresh;
|
|
1651
|
+
lastUsersMtimeMs = mtime;
|
|
1652
|
+
if (lastUsersMtimeMs !== null) {
|
|
1653
|
+
console.log(`[auth] OMCP_USERS_FILE changed — reloaded ${fresh.users.length} user(s)`);
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
catch {
|
|
1659
|
+
// File transiently unreadable — keep the cached store; logins
|
|
1660
|
+
// will continue to work with the last known set.
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
// Prime the cache so the first login doesn't log "changed" on every boot.
|
|
1664
|
+
if (authRuntime.mode === "basic") {
|
|
1665
|
+
const path = process.env.OMCP_USERS_FILE;
|
|
1666
|
+
if (path) {
|
|
1667
|
+
try {
|
|
1668
|
+
const { statSync } = await import("node:fs");
|
|
1669
|
+
lastUsersMtimeMs = statSync(path).mtimeMs;
|
|
1670
|
+
}
|
|
1671
|
+
catch { /* ignore — first login will pick it up */ }
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
app.post("/api/auth/login", loginRateLimit, async (req, res) => {
|
|
1675
|
+
if (authRuntime.mode !== "basic" || !sessionCfg || !usersStore) {
|
|
1676
|
+
res.status(503).json({ error: "auth mode does not accept logins" });
|
|
1677
|
+
return;
|
|
1678
|
+
}
|
|
1679
|
+
await maybeReloadUsers();
|
|
1680
|
+
const body = (req.body || {});
|
|
1681
|
+
const username = typeof body.username === "string" ? body.username.trim() : "";
|
|
1682
|
+
const password = typeof body.password === "string" ? body.password : "";
|
|
1683
|
+
if (!username || !password) {
|
|
1684
|
+
res.status(400).json({ error: "username and password are required" });
|
|
1685
|
+
return;
|
|
1686
|
+
}
|
|
1687
|
+
const user = authenticate(username, password, usersStore);
|
|
1688
|
+
if (!user) {
|
|
1689
|
+
res.status(401).json({ error: "invalid credentials" });
|
|
1690
|
+
return;
|
|
1691
|
+
}
|
|
1692
|
+
const { cookie } = issueSession({ sub: user.username, name: user.name, roles: user.roles, tenant: user.tenant }, sessionCfg);
|
|
1693
|
+
const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
|
|
1694
|
+
res.setHeader("Set-Cookie", setCookieHeader(cookie, sessionCfg, { secure }));
|
|
1695
|
+
res.json({
|
|
1696
|
+
ok: true,
|
|
1697
|
+
user: { sub: user.username, name: user.name, roles: user.roles ?? [] },
|
|
1698
|
+
});
|
|
1699
|
+
});
|
|
1700
|
+
// Same per-IP cap as login — defends against logout-as-disruption
|
|
1701
|
+
// (an attacker spamming logouts at a forged session for another tab).
|
|
1702
|
+
app.post("/api/auth/logout", loginRateLimit, (req, res) => {
|
|
1703
|
+
if (authRuntime.mode === "anonymous" || !sessionCfg) {
|
|
1704
|
+
res.status(204).end();
|
|
1705
|
+
return;
|
|
1706
|
+
}
|
|
1707
|
+
const secure = req.secure || (req.headers["x-forwarded-proto"] === "https");
|
|
1708
|
+
res.setHeader("Set-Cookie", clearCookieHeader(sessionCfg, { secure }));
|
|
1709
|
+
res.status(204).end();
|
|
1710
|
+
});
|
|
1711
|
+
// OIDC code-flow endpoints (login redirect, callback, logout) — only
|
|
1712
|
+
// mounted when OMCP_AUTH=oidc resolved cleanly. registerOidcRoutes is
|
|
1713
|
+
// a no-op at the type level when oidcRuntime is undefined; we guard
|
|
1714
|
+
// here so we don't even define the routes in basic/anonymous mode.
|
|
1715
|
+
if (authRuntime.mode === "oidc" && oidcRuntime && sessionCfg) {
|
|
1716
|
+
registerOidcRoutes(app, { sessionCfg, oidc: oidcRuntime });
|
|
1717
|
+
console.log("[auth] OIDC endpoints registered: /api/auth/oidc/{login,callback,logout}");
|
|
1718
|
+
}
|
|
1719
|
+
// Phase F21: SCIM 2.0 — opt-in. OMCP_SCIM_TOKEN gates access;
|
|
1720
|
+
// OMCP_SCIM_STORE points at the on-disk JSON (mode 0600, atomic).
|
|
1721
|
+
// Multi-replica deployments should plug the F8 SessionStore in
|
|
1722
|
+
// when F21b lands.
|
|
1723
|
+
const scimToken = process.env.OMCP_SCIM_TOKEN?.trim();
|
|
1724
|
+
if (scimToken) {
|
|
1725
|
+
const scimStorePath = process.env.OMCP_SCIM_STORE?.trim() || "/tmp/scim.json";
|
|
1726
|
+
const scimStore = new ScimStore(scimStorePath);
|
|
1727
|
+
await scimStore.load();
|
|
1728
|
+
registerScimRoutes(app, {
|
|
1729
|
+
store: scimStore,
|
|
1730
|
+
bearerToken: scimToken,
|
|
1731
|
+
audit: (ev) => void mgmtAudit.record({
|
|
1732
|
+
actor: { sub: `scim:${ev.actor}` },
|
|
1733
|
+
tenant: "default",
|
|
1734
|
+
resource: "users",
|
|
1735
|
+
action: ev.action.includes("delete") ? "delete" : "write",
|
|
1736
|
+
method: "SCIM",
|
|
1737
|
+
path: `/scim/v2/${ev.action}`,
|
|
1738
|
+
status: ev.status,
|
|
1739
|
+
target: ev.target,
|
|
1740
|
+
}).catch(() => undefined),
|
|
1741
|
+
});
|
|
1742
|
+
console.log("[scim] /scim/v2/* registered (store: %s)", scimStorePath);
|
|
1743
|
+
}
|
|
398
1744
|
// Connectors currently loaded into this server (builtin + filesystem
|
|
399
1745
|
// plugins), with manifest metadata — drives the UI "Connectors" page.
|
|
400
1746
|
app.get("/api/connectors", (_req, res) => {
|
|
@@ -477,7 +1823,7 @@ async function main() {
|
|
|
477
1823
|
// Only catalog tarballUrls are fetched (no arbitrary URL in the body)
|
|
478
1824
|
// to avoid SSRF. The connector persists to PLUGINS_DIR (back it with
|
|
479
1825
|
// a PVC on k8s so it survives restarts).
|
|
480
|
-
app.post("/api/connectors/install", installRateLimit, async (req, res) => {
|
|
1826
|
+
app.post("/api/connectors/install", installRateLimit, need("connectors", "write"), audit("connectors", "write"), async (req, res) => {
|
|
481
1827
|
if (process.env.ENABLE_UI_INSTALL !== "true") {
|
|
482
1828
|
return res.status(403).json({
|
|
483
1829
|
error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
|
|
@@ -546,7 +1892,7 @@ async function main() {
|
|
|
546
1892
|
// ALWAYS verified against PLUGIN_TRUST_ROOT (signature + integrity),
|
|
547
1893
|
// so an unsigned/tampered bundle is rejected. Body is the raw tarball
|
|
548
1894
|
// bytes (application/octet-stream). Persists to PLUGINS_DIR.
|
|
549
|
-
app.post("/api/connectors/upload", installRateLimit, express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
|
|
1895
|
+
app.post("/api/connectors/upload", installRateLimit, need("connectors", "write"), audit("connectors", "write"), express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
|
|
550
1896
|
if (process.env.ENABLE_UI_INSTALL !== "true") {
|
|
551
1897
|
return res.status(403).json({
|
|
552
1898
|
error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
|
|
@@ -586,9 +1932,13 @@ async function main() {
|
|
|
586
1932
|
rmSync(work, { recursive: true, force: true });
|
|
587
1933
|
}
|
|
588
1934
|
});
|
|
589
|
-
// Add a new source
|
|
590
|
-
|
|
591
|
-
|
|
1935
|
+
// Add a new source — tenant-aware. Non-admins can only create
|
|
1936
|
+
// sources in their own tenant; admins may set any tenant or leave
|
|
1937
|
+
// unset (global). Untagged inputs default to undefined (global) for
|
|
1938
|
+
// admins and to the caller's own tenant for non-admins, so a
|
|
1939
|
+
// tenant-bound user can't accidentally pollute the global pool.
|
|
1940
|
+
app.post("/api/sources", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
1941
|
+
const { name, type, url, enabled, auth, tls, tenant: bodyTenant } = req.body;
|
|
592
1942
|
if (!name || !type || !url) {
|
|
593
1943
|
res.status(400).json({ error: "name, type, and url are required" });
|
|
594
1944
|
return;
|
|
@@ -598,22 +1948,40 @@ async function main() {
|
|
|
598
1948
|
res.status(400).json({ error: urlErr });
|
|
599
1949
|
return;
|
|
600
1950
|
}
|
|
1951
|
+
const sess = req.session;
|
|
1952
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
1953
|
+
const callerTenant = sess?.tenant || "default";
|
|
1954
|
+
const resolvedTenant = isAdmin
|
|
1955
|
+
? (typeof bodyTenant === "string" && bodyTenant ? bodyTenant : undefined)
|
|
1956
|
+
: (typeof bodyTenant === "string" && bodyTenant && bodyTenant !== callerTenant
|
|
1957
|
+
? "__deny__"
|
|
1958
|
+
: callerTenant);
|
|
1959
|
+
if (resolvedTenant === "__deny__") {
|
|
1960
|
+
res.status(403).json({ error: "cannot create source in another tenant" });
|
|
1961
|
+
return;
|
|
1962
|
+
}
|
|
601
1963
|
const existing = registry.getSourceConfigs().find((s) => s.name === name);
|
|
602
1964
|
if (existing) {
|
|
603
1965
|
res.status(409).json({ error: `Source "${name}" already exists` });
|
|
604
1966
|
return;
|
|
605
1967
|
}
|
|
606
|
-
const source = { name, type, url, enabled: enabled !== false, auth, tls };
|
|
1968
|
+
const source = { name, type, url, enabled: enabled !== false, auth, tls, tenant: resolvedTenant };
|
|
607
1969
|
await registry.addSource(source);
|
|
608
1970
|
saveConfig(config = { ...config, sources: registry.getSourceConfigs() });
|
|
609
1971
|
res.status(201).json({ ok: true, source });
|
|
610
1972
|
});
|
|
611
|
-
// Update an existing source
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
1973
|
+
// Update an existing source — tenant-aware. Non-admins editing a
|
|
1974
|
+
// cross-tenant source get the same 404 they'd get for "no such
|
|
1975
|
+
// source" (no existence leak). Admins may move a source between
|
|
1976
|
+
// tenants by setting body.tenant; non-admins cannot.
|
|
1977
|
+
app.put("/api/sources/:name", need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
1978
|
+
const oldName = String(req.params.name);
|
|
1979
|
+
const { name, type, url, enabled, auth, tls, tenant: bodyTenant } = req.body;
|
|
615
1980
|
const existing = registry.getSourceConfigs().find((s) => s.name === oldName);
|
|
616
|
-
|
|
1981
|
+
const sess = req.session;
|
|
1982
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
1983
|
+
const callerTenant = sess?.tenant || "default";
|
|
1984
|
+
if (!existing || (!isAdmin && existing.tenant && existing.tenant !== callerTenant)) {
|
|
617
1985
|
res.status(404).json({ error: `Source "${oldName}" not found` });
|
|
618
1986
|
return;
|
|
619
1987
|
}
|
|
@@ -625,6 +1993,19 @@ async function main() {
|
|
|
625
1993
|
return;
|
|
626
1994
|
}
|
|
627
1995
|
}
|
|
1996
|
+
let nextTenant = existing.tenant;
|
|
1997
|
+
if (bodyTenant !== undefined) {
|
|
1998
|
+
if (!isAdmin) {
|
|
1999
|
+
// Non-admin attempting tenant reassignment — disallow.
|
|
2000
|
+
if (bodyTenant !== existing.tenant) {
|
|
2001
|
+
res.status(403).json({ error: "cannot change source tenant" });
|
|
2002
|
+
return;
|
|
2003
|
+
}
|
|
2004
|
+
}
|
|
2005
|
+
else {
|
|
2006
|
+
nextTenant = typeof bodyTenant === "string" && bodyTenant ? bodyTenant : undefined;
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
628
2009
|
const source = {
|
|
629
2010
|
name: name || oldName,
|
|
630
2011
|
type: type || existing.type,
|
|
@@ -632,16 +2013,20 @@ async function main() {
|
|
|
632
2013
|
enabled: enabled !== undefined ? enabled : existing.enabled,
|
|
633
2014
|
auth: auth !== undefined ? auth : existing.auth,
|
|
634
2015
|
tls: tls !== undefined ? tls : existing.tls,
|
|
2016
|
+
tenant: nextTenant,
|
|
635
2017
|
};
|
|
636
2018
|
await registry.updateSource(oldName, source);
|
|
637
2019
|
saveConfig(config = { ...config, sources: registry.getSourceConfigs() });
|
|
638
2020
|
res.json({ ok: true, source });
|
|
639
2021
|
});
|
|
640
|
-
// Delete a source
|
|
641
|
-
app.delete("/api/sources/:name", async (req, res) => {
|
|
642
|
-
const name = req.params.name;
|
|
2022
|
+
// Delete a source — same cross-tenant 404 posture.
|
|
2023
|
+
app.delete("/api/sources/:name", need("sources", "delete"), audit("sources", "delete"), async (req, res) => {
|
|
2024
|
+
const name = String(req.params.name);
|
|
643
2025
|
const existing = registry.getSourceConfigs().find((s) => s.name === name);
|
|
644
|
-
|
|
2026
|
+
const sess = req.session;
|
|
2027
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2028
|
+
const callerTenant = sess?.tenant || "default";
|
|
2029
|
+
if (!existing || (!isAdmin && existing.tenant && existing.tenant !== callerTenant)) {
|
|
645
2030
|
res.status(404).json({ error: `Source "${name}" not found` });
|
|
646
2031
|
return;
|
|
647
2032
|
}
|
|
@@ -650,7 +2035,7 @@ async function main() {
|
|
|
650
2035
|
res.json({ ok: true });
|
|
651
2036
|
});
|
|
652
2037
|
// Test a source connection (without saving)
|
|
653
|
-
app.post("/api/sources/test", installRateLimit, async (req, res) => {
|
|
2038
|
+
app.post("/api/sources/test", installRateLimit, need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
654
2039
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
655
2040
|
if (!type || !url) {
|
|
656
2041
|
res.status(400).json({ error: "type and url are required" });
|
|
@@ -672,10 +2057,13 @@ async function main() {
|
|
|
672
2057
|
res.json(result);
|
|
673
2058
|
});
|
|
674
2059
|
// Toggle source enabled/disabled
|
|
675
|
-
app.patch("/api/sources/:name/toggle", async (req, res) => {
|
|
676
|
-
const name = req.params.name;
|
|
2060
|
+
app.patch("/api/sources/:name/toggle", need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
2061
|
+
const name = String(req.params.name);
|
|
677
2062
|
const existing = registry.getSourceConfigs().find((s) => s.name === name);
|
|
678
|
-
|
|
2063
|
+
const sess = req.session;
|
|
2064
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2065
|
+
const callerTenant = sess?.tenant || "default";
|
|
2066
|
+
if (!existing || (!isAdmin && existing.tenant && existing.tenant !== callerTenant)) {
|
|
679
2067
|
res.status(404).json({ error: `Source "${name}" not found` });
|
|
680
2068
|
return;
|
|
681
2069
|
}
|
|
@@ -694,36 +2082,339 @@ async function main() {
|
|
|
694
2082
|
}
|
|
695
2083
|
}
|
|
696
2084
|
// List discovered services
|
|
697
|
-
app.get("/api/services", async (
|
|
2085
|
+
app.get("/api/services", async (req, res) => {
|
|
698
2086
|
try {
|
|
699
|
-
const
|
|
700
|
-
|
|
2087
|
+
const sess = req.session;
|
|
2088
|
+
const callerTenant = sess?.tenant || "default";
|
|
2089
|
+
// sessionContext threads the caller's tenant into the handler so
|
|
2090
|
+
// PR #331's per-tenant connector scoping fires for the dashboard
|
|
2091
|
+
// surface too (was previously bypassed with defaultContext()).
|
|
2092
|
+
const result = await listServicesHandler(registry, {}, sessionContext(sess));
|
|
2093
|
+
const parsed = parseToolResult(result);
|
|
2094
|
+
// Tenant-scope catalog enrichment so a viewer in tenant A
|
|
2095
|
+
// doesn't accidentally see acme's owner/SLO metadata on a
|
|
2096
|
+
// service that happens to share a name. Anonymous mode is
|
|
2097
|
+
// session-less so callerTenant is "default" → matches
|
|
2098
|
+
// entries with no tenant field too (pre-E7 behaviour).
|
|
2099
|
+
if (parsed?.services) {
|
|
2100
|
+
for (const s of parsed.services) {
|
|
2101
|
+
const entry = typeof s.name === "string" ? catalog.get(s.name, callerTenant) : undefined;
|
|
2102
|
+
if (entry)
|
|
2103
|
+
s.catalog = entry;
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
res.json(parsed);
|
|
701
2107
|
}
|
|
702
2108
|
catch {
|
|
703
2109
|
res.status(500).json({ error: "Failed to list services" });
|
|
704
2110
|
}
|
|
705
2111
|
});
|
|
2112
|
+
// Read-only view of the configured catalog. Gated by the same
|
|
2113
|
+
// "catalog:read" permission Phase E4 added to DEFAULT_POLICY.
|
|
2114
|
+
app.get("/api/catalog", need("catalog", "read"), (req, res) => {
|
|
2115
|
+
// Same scoping shape as /api/audit + /api/usage: non-admins see
|
|
2116
|
+
// only their own tenant's catalog entries; admins see all by
|
|
2117
|
+
// default and can ?tenant=X for an explicit drill-down.
|
|
2118
|
+
const sess = req.session;
|
|
2119
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2120
|
+
const callerTenant = sess?.tenant || "default";
|
|
2121
|
+
const requestedTenant = qstr(req.query.tenant);
|
|
2122
|
+
const tenantFilter = isAdmin ? requestedTenant : callerTenant;
|
|
2123
|
+
const services = catalog.list(tenantFilter || undefined);
|
|
2124
|
+
res.json({
|
|
2125
|
+
services,
|
|
2126
|
+
count: Object.keys(services).length,
|
|
2127
|
+
configured: !!process.env.OMCP_SERVICE_CATALOG_FILE,
|
|
2128
|
+
scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
|
|
2129
|
+
});
|
|
2130
|
+
});
|
|
2131
|
+
// --- /api/products — MCP Products catalogue ---------------------------
|
|
2132
|
+
// Same scoping / staging-visibility pattern as /api/catalog. Non-admins
|
|
2133
|
+
// see only their own tenant's PUBLISHED products; admins see all
|
|
2134
|
+
// tenants by default + staging.
|
|
2135
|
+
app.get("/api/products", need("products", "read"), async (req, res) => {
|
|
2136
|
+
// Pick up out-of-band edits before serving — see ProductsStore docs.
|
|
2137
|
+
await products.maybeReload();
|
|
2138
|
+
const sess = req.session;
|
|
2139
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2140
|
+
const callerTenant = sess?.tenant || "default";
|
|
2141
|
+
const requestedTenant = qstr(req.query.tenant);
|
|
2142
|
+
const tenantFilter = isAdmin ? requestedTenant : callerTenant;
|
|
2143
|
+
const includeStaging = isAdmin;
|
|
2144
|
+
res.json({
|
|
2145
|
+
products: products.list({ tenant: tenantFilter || undefined, includeStaging }),
|
|
2146
|
+
configured: !!process.env.OMCP_PRODUCTS_FILE,
|
|
2147
|
+
scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
|
|
2148
|
+
includesStaging: includeStaging,
|
|
2149
|
+
});
|
|
2150
|
+
});
|
|
2151
|
+
// Create a new product (REST convention: POST = create, 409 on
|
|
2152
|
+
// conflict). Same tenancy + typo-guard posture as PUT. The PUT
|
|
2153
|
+
// upsert path remains for the existing UI; new integrations that
|
|
2154
|
+
// want strict create-vs-update semantics use POST.
|
|
2155
|
+
app.post("/api/products", need("products", "write"), audit("products", "write"), async (req, res) => {
|
|
2156
|
+
await products.maybeReload();
|
|
2157
|
+
const sess = req.session;
|
|
2158
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2159
|
+
const callerTenant = sess?.tenant || "default";
|
|
2160
|
+
const body = req.body;
|
|
2161
|
+
if (!body || typeof body !== "object" || Array.isArray(body)) {
|
|
2162
|
+
res.status(400).json({ error: "body must be a product object" });
|
|
2163
|
+
return;
|
|
2164
|
+
}
|
|
2165
|
+
if (typeof body.id !== "string" || !body.id) {
|
|
2166
|
+
res.status(400).json({ error: "body.id is required" });
|
|
2167
|
+
return;
|
|
2168
|
+
}
|
|
2169
|
+
let validated;
|
|
2170
|
+
try {
|
|
2171
|
+
validated = validateProduct(body, `POST /api/products`);
|
|
2172
|
+
}
|
|
2173
|
+
catch (e) {
|
|
2174
|
+
if (e instanceof ProductsLoadError) {
|
|
2175
|
+
res.status(400).json({ error: e.message });
|
|
2176
|
+
return;
|
|
2177
|
+
}
|
|
2178
|
+
throw e;
|
|
2179
|
+
}
|
|
2180
|
+
if (validated.tools && validated.tools.length > 0) {
|
|
2181
|
+
const unknown = unknownToolNames(validated.tools);
|
|
2182
|
+
if (unknown.length > 0) {
|
|
2183
|
+
res.status(422).json({
|
|
2184
|
+
error: `unknown tool name(s) in product '${validated.id}': ${unknown.join(", ")}`,
|
|
2185
|
+
code: "OMCP_PRODUCT_UNKNOWN_TOOL",
|
|
2186
|
+
unknown,
|
|
2187
|
+
available: REGISTERED_TOOL_NAMES,
|
|
2188
|
+
});
|
|
2189
|
+
return;
|
|
2190
|
+
}
|
|
2191
|
+
}
|
|
2192
|
+
if (!isAdmin && (validated.tenant || "default") !== callerTenant) {
|
|
2193
|
+
res.status(403).json({ error: "cannot create product in another tenant" });
|
|
2194
|
+
return;
|
|
2195
|
+
}
|
|
2196
|
+
if (products.get(validated.id)) {
|
|
2197
|
+
res.status(409).json({ error: `product '${validated.id}' already exists; use PUT to update` });
|
|
2198
|
+
return;
|
|
2199
|
+
}
|
|
2200
|
+
const next = products.upsert(validated);
|
|
2201
|
+
if (process.env.OMCP_PRODUCTS_FILE) {
|
|
2202
|
+
try {
|
|
2203
|
+
await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
|
|
2204
|
+
await products.pinMtimeAfterWrite();
|
|
2205
|
+
}
|
|
2206
|
+
catch (e) {
|
|
2207
|
+
console.warn(`[products] POST ${validated.id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
res.status(201).json({ product: validated, persisted: !!process.env.OMCP_PRODUCTS_FILE });
|
|
2211
|
+
});
|
|
2212
|
+
// Upsert a product. Body is the same shape as a single entry
|
|
2213
|
+
// in OMCP_PRODUCTS_FILE. The URL-path id must match the body id
|
|
2214
|
+
// (defence-in-depth: the gate keys on body, the path keys the
|
|
2215
|
+
// audit entry). When OMCP_PRODUCTS_FILE is set we also write the
|
|
2216
|
+
// updated catalogue back to disk so the change survives a
|
|
2217
|
+
// restart; without the file, the upsert is in-memory only.
|
|
2218
|
+
app.put("/api/products/:id", need("products", "write"), audit("products", "write"), async (req, res) => {
|
|
2219
|
+
// Hot-reload before mutating so a concurrent on-disk edit isn't
|
|
2220
|
+
// silently clobbered by our in-memory snapshot.
|
|
2221
|
+
await products.maybeReload();
|
|
2222
|
+
const id = String(req.params.id);
|
|
2223
|
+
const sess = req.session;
|
|
2224
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2225
|
+
const callerTenant = sess?.tenant || "default";
|
|
2226
|
+
const body = req.body;
|
|
2227
|
+
if (!body || typeof body !== "object" || Array.isArray(body)) {
|
|
2228
|
+
res.status(400).json({ error: "body must be a product object" });
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
if (typeof body.id === "string" && body.id !== id) {
|
|
2232
|
+
res.status(400).json({ error: `body.id '${body.id}' does not match URL path '${id}'` });
|
|
2233
|
+
return;
|
|
2234
|
+
}
|
|
2235
|
+
// Force the id from the URL so the audit entry's target matches
|
|
2236
|
+
// the persisted record even if the operator omitted it from the
|
|
2237
|
+
// body.
|
|
2238
|
+
const payload = { ...body, id };
|
|
2239
|
+
let validated;
|
|
2240
|
+
try {
|
|
2241
|
+
validated = validateProduct(payload, `PUT /api/products/${id}`);
|
|
2242
|
+
}
|
|
2243
|
+
catch (e) {
|
|
2244
|
+
if (e instanceof ProductsLoadError) {
|
|
2245
|
+
res.status(400).json({ error: e.message });
|
|
2246
|
+
return;
|
|
2247
|
+
}
|
|
2248
|
+
throw e;
|
|
2249
|
+
}
|
|
2250
|
+
// Typo guard: a Product whose `tools` allow-list names tools
|
|
2251
|
+
// that don't actually register would bind a credential to an
|
|
2252
|
+
// empty /mcp tool surface (silent dead session). Reject with
|
|
2253
|
+
// 422 + a hint of valid tool names so the operator can see the
|
|
2254
|
+
// intended typo immediately.
|
|
2255
|
+
if (validated.tools && validated.tools.length > 0) {
|
|
2256
|
+
const unknown = unknownToolNames(validated.tools);
|
|
2257
|
+
if (unknown.length > 0) {
|
|
2258
|
+
res.status(422).json({
|
|
2259
|
+
error: `unknown tool name(s) in product '${id}': ${unknown.join(", ")}`,
|
|
2260
|
+
code: "OMCP_PRODUCT_UNKNOWN_TOOL",
|
|
2261
|
+
unknown,
|
|
2262
|
+
available: REGISTERED_TOOL_NAMES,
|
|
2263
|
+
});
|
|
2264
|
+
return;
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
// Tenant gate: non-admins can only write into their own tenant.
|
|
2268
|
+
if (!isAdmin && (validated.tenant || "default") !== callerTenant) {
|
|
2269
|
+
res.status(403).json({ error: "cannot write product into another tenant" });
|
|
2270
|
+
return;
|
|
2271
|
+
}
|
|
2272
|
+
// If an existing product belongs to a different tenant, a non-
|
|
2273
|
+
// admin overwrite would re-parent it — same 404-not-403 posture
|
|
2274
|
+
// as cross-tenant gets.
|
|
2275
|
+
const existing = products.get(id);
|
|
2276
|
+
if (existing && !isAdmin && (existing.tenant || "default") !== callerTenant) {
|
|
2277
|
+
res.status(404).json({ error: "not found" });
|
|
2278
|
+
return;
|
|
2279
|
+
}
|
|
2280
|
+
const next = products.upsert(validated);
|
|
2281
|
+
if (process.env.OMCP_PRODUCTS_FILE) {
|
|
2282
|
+
try {
|
|
2283
|
+
await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
|
|
2284
|
+
// Advance our mtime cursor past this write so the next
|
|
2285
|
+
// maybeReload() doesn't treat our own change as an external
|
|
2286
|
+
// edit and re-read what we just persisted.
|
|
2287
|
+
await products.pinMtimeAfterWrite();
|
|
2288
|
+
}
|
|
2289
|
+
catch (e) {
|
|
2290
|
+
console.warn(`[products] PUT ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2293
|
+
res.json({ product: validated, persisted: !!process.env.OMCP_PRODUCTS_FILE });
|
|
2294
|
+
});
|
|
2295
|
+
app.delete("/api/products/:id", need("products", "delete"), audit("products", "delete"), async (req, res) => {
|
|
2296
|
+
await products.maybeReload();
|
|
2297
|
+
const id = String(req.params.id);
|
|
2298
|
+
const sess = req.session;
|
|
2299
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2300
|
+
const callerTenant = sess?.tenant || "default";
|
|
2301
|
+
const existing = products.get(id);
|
|
2302
|
+
if (!existing) {
|
|
2303
|
+
res.status(404).json({ error: "not found" });
|
|
2304
|
+
return;
|
|
2305
|
+
}
|
|
2306
|
+
if (!isAdmin && (existing.tenant || "default") !== callerTenant) {
|
|
2307
|
+
res.status(404).json({ error: "not found" });
|
|
2308
|
+
return;
|
|
2309
|
+
}
|
|
2310
|
+
const { file: next } = products.delete(id);
|
|
2311
|
+
if (process.env.OMCP_PRODUCTS_FILE) {
|
|
2312
|
+
try {
|
|
2313
|
+
await writeProductsFile(process.env.OMCP_PRODUCTS_FILE, next);
|
|
2314
|
+
await products.pinMtimeAfterWrite();
|
|
2315
|
+
}
|
|
2316
|
+
catch (e) {
|
|
2317
|
+
console.warn(`[products] DELETE ${id}: failed to persist to ${process.env.OMCP_PRODUCTS_FILE}: ${e.message} — in-memory state is still updated`);
|
|
2318
|
+
}
|
|
2319
|
+
}
|
|
2320
|
+
res.status(204).end();
|
|
2321
|
+
});
|
|
2322
|
+
// Single product by id. Non-admins get a 404 (not 403) on a
|
|
2323
|
+
// cross-tenant probe so the existence of the product isn't leaked
|
|
2324
|
+
// — same posture as the rest of the tenancy layer.
|
|
2325
|
+
app.get("/api/products/:id", need("products", "read"), async (req, res) => {
|
|
2326
|
+
await products.maybeReload();
|
|
2327
|
+
const sess = req.session;
|
|
2328
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2329
|
+
const callerTenant = sess?.tenant || "default";
|
|
2330
|
+
const tenantFilter = isAdmin ? undefined : callerTenant;
|
|
2331
|
+
const id = String(req.params.id);
|
|
2332
|
+
const p = products.get(id, tenantFilter);
|
|
2333
|
+
if (!p) {
|
|
2334
|
+
res.status(404).json({ error: "not found" });
|
|
2335
|
+
return;
|
|
2336
|
+
}
|
|
2337
|
+
// Non-admins also don't see staging products even if they happen
|
|
2338
|
+
// to belong to the same tenant.
|
|
2339
|
+
if (!isAdmin && p.status === "staging") {
|
|
2340
|
+
res.status(404).json({ error: "not found" });
|
|
2341
|
+
return;
|
|
2342
|
+
}
|
|
2343
|
+
res.json(p);
|
|
2344
|
+
});
|
|
2345
|
+
// Agent preview — what would the /mcp tools/list response look
|
|
2346
|
+
// like for a credential bound to this product? Same RBAC + tenant
|
|
2347
|
+
// gate as the singular GET above. The body mirrors the actual
|
|
2348
|
+
// tools/list shape (name + description + category), filtered the
|
|
2349
|
+
// same way the /mcp transport filters it via allowsTool +
|
|
2350
|
+
// registerTool — so the UI's Review pane shows the exact set the
|
|
2351
|
+
// agent will see, not an approximation. Branding metadata travels
|
|
2352
|
+
// alongside so the preview can render with the product's identity.
|
|
2353
|
+
app.get("/api/products/:id/preview", need("products", "read"), async (req, res) => {
|
|
2354
|
+
await products.maybeReload();
|
|
2355
|
+
const sess = req.session;
|
|
2356
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2357
|
+
const callerTenant = sess?.tenant || "default";
|
|
2358
|
+
const tenantFilter = isAdmin ? undefined : callerTenant;
|
|
2359
|
+
const id = String(req.params.id);
|
|
2360
|
+
const p = products.get(id, tenantFilter);
|
|
2361
|
+
if (!p) {
|
|
2362
|
+
res.status(404).json({ error: "not found" });
|
|
2363
|
+
return;
|
|
2364
|
+
}
|
|
2365
|
+
if (!isAdmin && p.status === "staging") {
|
|
2366
|
+
res.status(404).json({ error: "not found" });
|
|
2367
|
+
return;
|
|
2368
|
+
}
|
|
2369
|
+
const allowList = p.tools && p.tools.length > 0 ? p.tools : undefined;
|
|
2370
|
+
const filteredTools = REGISTERED_TOOLS.filter((t) => allowsTool(allowList, t.name));
|
|
2371
|
+
res.json({
|
|
2372
|
+
product: { id: p.id, name: p.name, version: p.version, branding: p.branding, tenant: p.tenant, status: p.status },
|
|
2373
|
+
// unrestricted = true when the product has no tools allow-list,
|
|
2374
|
+
// i.e. the bound agent sees every registered tool. UI uses this
|
|
2375
|
+
// to render a distinct "no filter" preview banner.
|
|
2376
|
+
unrestricted: !allowList,
|
|
2377
|
+
tools: filteredTools,
|
|
2378
|
+
});
|
|
2379
|
+
});
|
|
706
2380
|
// Health endpoint for UI dashboard
|
|
707
2381
|
app.get("/api/health/:service", async (req, res) => {
|
|
708
2382
|
try {
|
|
709
|
-
const
|
|
710
|
-
|
|
2383
|
+
const sess = req.session;
|
|
2384
|
+
const callerTenant = sess?.tenant || "default";
|
|
2385
|
+
const service = String(req.params.service);
|
|
2386
|
+
const result = await getServiceHealthHandler(registry, { service }, sessionContext(sess));
|
|
2387
|
+
const parsed = parseToolResult(result);
|
|
2388
|
+
const entry = catalog.get(service, callerTenant);
|
|
2389
|
+
if (entry && parsed && typeof parsed === "object")
|
|
2390
|
+
parsed.catalog = entry;
|
|
2391
|
+
res.json(parsed);
|
|
711
2392
|
}
|
|
712
2393
|
catch {
|
|
713
2394
|
res.status(500).json({ error: "Failed to get service health" });
|
|
714
2395
|
}
|
|
715
2396
|
});
|
|
716
2397
|
// Health for all services
|
|
717
|
-
app.get("/api/health", async (
|
|
2398
|
+
app.get("/api/health", async (req, res) => {
|
|
718
2399
|
try {
|
|
719
|
-
const
|
|
2400
|
+
const sess = req.session;
|
|
2401
|
+
const callerTenant = sess?.tenant || "default";
|
|
2402
|
+
const ctx = sessionContext(sess);
|
|
2403
|
+
const servicesResult = await listServicesHandler(registry, {}, ctx);
|
|
720
2404
|
const parsed = parseToolResult(servicesResult);
|
|
721
2405
|
const services = parsed?.services || [];
|
|
722
2406
|
const health = {};
|
|
723
2407
|
for (const svc of services) {
|
|
724
2408
|
try {
|
|
725
|
-
const result = await getServiceHealthHandler(registry, { service: svc.name },
|
|
726
|
-
|
|
2409
|
+
const result = await getServiceHealthHandler(registry, { service: svc.name }, ctx);
|
|
2410
|
+
const h = parseToolResult(result);
|
|
2411
|
+
// Same tenant scoping as /api/services to avoid the
|
|
2412
|
+
// dashboard cross-tenant catalog leak the reviewer
|
|
2413
|
+
// caught in slice 3.
|
|
2414
|
+
const entry = catalog.get(svc.name, callerTenant);
|
|
2415
|
+
if (entry && h && typeof h === "object")
|
|
2416
|
+
h.catalog = entry;
|
|
2417
|
+
health[svc.name] = h;
|
|
727
2418
|
}
|
|
728
2419
|
catch {
|
|
729
2420
|
health[svc.name] = { error: "failed to fetch health" };
|
|
@@ -739,12 +2430,18 @@ async function main() {
|
|
|
739
2430
|
// Returns the union of topology snapshots across all topology-capable
|
|
740
2431
|
// connectors (today only "kubernetes"). One JSON document so the UI can
|
|
741
2432
|
// render summary + grouped views without N round-trips.
|
|
742
|
-
app.get("/api/topology", async (
|
|
2433
|
+
app.get("/api/topology", async (req, res) => {
|
|
743
2434
|
try {
|
|
2435
|
+
const sess = req.session;
|
|
2436
|
+
const callerTenant = sess?.tenant || "default";
|
|
744
2437
|
const sources = [];
|
|
745
2438
|
const allResources = [];
|
|
746
2439
|
const allEdges = [];
|
|
747
|
-
|
|
2440
|
+
// Tenant-scoped: non-anonymous callers only see topology from
|
|
2441
|
+
// connectors their tenant can reach. Anonymous mode keeps the
|
|
2442
|
+
// global view (single-tenant default).
|
|
2443
|
+
const connectors = sess ? registry.getByTenant(callerTenant) : registry.getAll();
|
|
2444
|
+
for (const c of connectors) {
|
|
748
2445
|
if (!isTopologyProvider(c))
|
|
749
2446
|
continue;
|
|
750
2447
|
const snap = await c.getTopologySnapshot();
|
|
@@ -771,7 +2468,7 @@ async function main() {
|
|
|
771
2468
|
res.json(config.settings);
|
|
772
2469
|
});
|
|
773
2470
|
// Update general settings
|
|
774
|
-
app.put("/api/settings", (req, res) => {
|
|
2471
|
+
app.put("/api/settings", need("settings", "write"), audit("settings", "write"), (req, res) => {
|
|
775
2472
|
config = { ...config, settings: { ...config.settings, ...req.body } };
|
|
776
2473
|
saveConfig(config);
|
|
777
2474
|
res.json({ ok: true, settings: config.settings });
|
|
@@ -787,7 +2484,7 @@ async function main() {
|
|
|
787
2484
|
app.get("/api/health-thresholds", (_req, res) => {
|
|
788
2485
|
res.json(config.healthThresholds);
|
|
789
2486
|
});
|
|
790
|
-
app.put("/api/health-thresholds", (req, res) => {
|
|
2487
|
+
app.put("/api/health-thresholds", need("health", "write"), audit("health", "write"), (req, res) => {
|
|
791
2488
|
config = { ...config, healthThresholds: { ...config.healthThresholds, ...req.body } };
|
|
792
2489
|
applyConfigToRuntime(config, registry);
|
|
793
2490
|
saveConfig(config);
|
|
@@ -796,9 +2493,19 @@ async function main() {
|
|
|
796
2493
|
// --- Per-Source Metrics API ---
|
|
797
2494
|
// Get metrics for a source (active metrics or defaults)
|
|
798
2495
|
app.get("/api/sources/:name/metrics", (req, res) => {
|
|
799
|
-
const
|
|
2496
|
+
const name = String(req.params.name);
|
|
2497
|
+
const sess = req.session;
|
|
2498
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2499
|
+
const callerTenant = sess?.tenant || "default";
|
|
2500
|
+
// Tenant-aware: getByNameForTenant returns undefined for both
|
|
2501
|
+
// "doesn't exist" and "cross-tenant" — same no-leak posture as
|
|
2502
|
+
// /api/sources GET/PUT/DELETE. Anonymous / admin keep the
|
|
2503
|
+
// single-tenant behaviour by falling back to getByName.
|
|
2504
|
+
const connector = (sess && !isAdmin)
|
|
2505
|
+
? registry.getByNameForTenant(name, callerTenant)
|
|
2506
|
+
: registry.getByName(name);
|
|
800
2507
|
if (!connector) {
|
|
801
|
-
res.status(404).json({ error: `Source "${
|
|
2508
|
+
res.status(404).json({ error: `Source "${name}" not found` });
|
|
802
2509
|
return;
|
|
803
2510
|
}
|
|
804
2511
|
res.json({
|
|
@@ -806,11 +2513,15 @@ async function main() {
|
|
|
806
2513
|
defaults: connector.getDefaultMetrics(),
|
|
807
2514
|
});
|
|
808
2515
|
});
|
|
809
|
-
// Update metrics for a source
|
|
810
|
-
app.put("/api/sources/:name/metrics", async (req, res) => {
|
|
811
|
-
const name = req.params.name;
|
|
2516
|
+
// Update metrics for a source — tenant-aware mutation.
|
|
2517
|
+
app.put("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
2518
|
+
const name = String(req.params.name);
|
|
812
2519
|
const sourceIdx = config.sources.findIndex((s) => s.name === name);
|
|
813
|
-
|
|
2520
|
+
const sess = req.session;
|
|
2521
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2522
|
+
const callerTenant = sess?.tenant || "default";
|
|
2523
|
+
const src = sourceIdx >= 0 ? config.sources[sourceIdx] : undefined;
|
|
2524
|
+
if (!src || (!isAdmin && src.tenant && src.tenant !== callerTenant)) {
|
|
814
2525
|
res.status(404).json({ error: `Source "${name}" not found` });
|
|
815
2526
|
return;
|
|
816
2527
|
}
|
|
@@ -820,11 +2531,15 @@ async function main() {
|
|
|
820
2531
|
saveConfig(config);
|
|
821
2532
|
res.json({ ok: true });
|
|
822
2533
|
});
|
|
823
|
-
// Reset a source's metrics to connector defaults
|
|
824
|
-
app.delete("/api/sources/:name/metrics", async (req, res) => {
|
|
825
|
-
const name = req.params.name;
|
|
2534
|
+
// Reset a source's metrics to connector defaults — tenant-aware.
|
|
2535
|
+
app.delete("/api/sources/:name/metrics", need("sources", "write"), audit("sources", "write"), async (req, res) => {
|
|
2536
|
+
const name = String(req.params.name);
|
|
826
2537
|
const sourceIdx = config.sources.findIndex((s) => s.name === name);
|
|
827
|
-
|
|
2538
|
+
const sess = req.session;
|
|
2539
|
+
const isAdmin = hasPermission(sess?.roles, "users", "delete");
|
|
2540
|
+
const callerTenant = sess?.tenant || "default";
|
|
2541
|
+
const src = sourceIdx >= 0 ? config.sources[sourceIdx] : undefined;
|
|
2542
|
+
if (!src || (!isAdmin && src.tenant && src.tenant !== callerTenant)) {
|
|
828
2543
|
res.status(404).json({ error: `Source "${name}" not found` });
|
|
829
2544
|
return;
|
|
830
2545
|
}
|
|
@@ -846,6 +2561,12 @@ async function main() {
|
|
|
846
2561
|
// MCP Streamable HTTP transport — stateful sessions
|
|
847
2562
|
const transports = new Map();
|
|
848
2563
|
const sessionLastActive = new Map();
|
|
2564
|
+
// Phase F9: per-session tag identifying the virtual-server slug a
|
|
2565
|
+
// session was issued under (or undefined for the root /mcp surface).
|
|
2566
|
+
// Used to prevent a session minted on /mcp/v/foo from being probed
|
|
2567
|
+
// via /mcp/v/bar — the GET/DELETE handlers refuse the cross-product
|
|
2568
|
+
// lookup.
|
|
2569
|
+
const sessionProduct = new Map();
|
|
849
2570
|
const SESSION_TTL_MS = 30 * 60 * 1000; // 30 min idle timeout
|
|
850
2571
|
// Clean up idle sessions every 5 minutes
|
|
851
2572
|
setInterval(() => {
|
|
@@ -854,6 +2575,7 @@ async function main() {
|
|
|
854
2575
|
if (now - lastActive > SESSION_TTL_MS) {
|
|
855
2576
|
transports.delete(sid);
|
|
856
2577
|
sessionLastActive.delete(sid);
|
|
2578
|
+
sessionProduct.delete(sid);
|
|
857
2579
|
console.log(`Session ${sid} expired (idle)`);
|
|
858
2580
|
}
|
|
859
2581
|
}
|
|
@@ -861,9 +2583,68 @@ async function main() {
|
|
|
861
2583
|
}, 5 * 60 * 1000);
|
|
862
2584
|
// Single-tenant auth gate. No credentials configured → anonymous (current
|
|
863
2585
|
// behaviour, fully backward compatible). Configured → require a valid
|
|
2586
|
+
// Per-identity sliding-window rate limit on the MCP HTTP transport.
|
|
2587
|
+
// Each request from a named bearer-token caller increments that
|
|
2588
|
+
// caller's bucket; once the per-window cap is hit the server replies
|
|
2589
|
+
// 429 with a Retry-After. Anonymous /mcp traffic (no OMCP_API_KEYS
|
|
2590
|
+
// configured) bypasses this — the global express-rate-limit IP gate
|
|
2591
|
+
// still applies. Override via OMCP_TOOL_RATE_PER_MIN.
|
|
2592
|
+
// Per-credential cap overrides: OMCP_KEY_RATE_PER_MIN="agent=600;ci=240"
|
|
2593
|
+
// wins over the global OMCP_TOOL_RATE_PER_MIN for the named credentials.
|
|
2594
|
+
// The bucket identity is "<tenant> <credName>"; the override map keys on
|
|
2595
|
+
// credName, so the lookup pulls the cred-name back out of the composite.
|
|
2596
|
+
const keyRateLimits = parseKeyRateLimits(process.env.OMCP_KEY_RATE_PER_MIN);
|
|
2597
|
+
const toolRateLimiter = new IdentityRateLimiter({
|
|
2598
|
+
limit: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
|
|
2599
|
+
limitFor: keyRateLimits.size === 0 ? undefined : (identity) => {
|
|
2600
|
+
// Composite identity is "<tenant> <credName>" — split on the
|
|
2601
|
+
// single space that gateCtx put there (NUL would be safer but
|
|
2602
|
+
// would break existing /api/usage actor labels; cred names are
|
|
2603
|
+
// operator-set and don't contain spaces in practice).
|
|
2604
|
+
const sp = identity.indexOf(" ");
|
|
2605
|
+
const credName = sp >= 0 ? identity.slice(sp + 1) : identity;
|
|
2606
|
+
return keyRateLimits.get(credName);
|
|
2607
|
+
},
|
|
2608
|
+
});
|
|
2609
|
+
// Per-identity tracker key. Composes tenant + principalId so two
|
|
2610
|
+
// credentials of the same name in different tenants don't share
|
|
2611
|
+
// a bucket. Surface-level fields in /api/usage are still split
|
|
2612
|
+
// back out (see the row builder there) so the UI shows clean
|
|
2613
|
+
// actor + tenant columns.
|
|
2614
|
+
const identityKey = (ctx) => `${ctx.tenant}${ctx.principalId}`;
|
|
2615
|
+
function splitIdentityKey(key) {
|
|
2616
|
+
const i = key.indexOf("");
|
|
2617
|
+
if (i < 0)
|
|
2618
|
+
return { tenant: "default", actor: key };
|
|
2619
|
+
return { tenant: key.slice(0, i), actor: key.slice(i + 1) };
|
|
2620
|
+
}
|
|
2621
|
+
// Token-budget: per-identity 24h rolling daily cap on tokens pulled
|
|
2622
|
+
// through the MCP tool layer. Off by default (OMCP_TOOL_DAILY_TOKENS
|
|
2623
|
+
// unset/zero/negative). When configured, big-data tools
|
|
2624
|
+
// (query_logs / query_metrics / get_service_health) charge the
|
|
2625
|
+
// estimated response size against the cap; over-cap calls return a
|
|
2626
|
+
// structured OMCP_TOKEN_BUDGET_EXCEEDED payload instead of data.
|
|
2627
|
+
const tokenBudget = new TokenBudget({
|
|
2628
|
+
dailyLimit: resolveDailyTokenLimit(process.env.OMCP_TOOL_DAILY_TOKENS),
|
|
2629
|
+
filePath: process.env.OMCP_TOKEN_BUDGET_FILE?.trim() || undefined,
|
|
2630
|
+
});
|
|
2631
|
+
// AWAIT bootstrap before any tool call can arrive: a void-fired
|
|
2632
|
+
// bootstrap raced with /mcp requests would silently overwrite
|
|
2633
|
+
// post-boot charges with the on-disk snapshot when it later
|
|
2634
|
+
// resolved. The file is small (KB range) so the wait is
|
|
2635
|
+
// negligible; a missing file returns immediately.
|
|
2636
|
+
await tokenBudget.bootstrap();
|
|
2637
|
+
// Flush on graceful shutdown so the debounce-window of pending
|
|
2638
|
+
// charges isn't dropped on `kubectl rollout restart` etc. The
|
|
2639
|
+
// process keeps running while we wait — the snapshot is small.
|
|
2640
|
+
for (const sig of ["SIGTERM", "SIGINT"]) {
|
|
2641
|
+
process.once(sig, () => {
|
|
2642
|
+
void tokenBudget.flushNow().catch(() => { });
|
|
2643
|
+
});
|
|
2644
|
+
}
|
|
864
2645
|
// Bearer/X-API-Key on every /mcp request; resolve the principal + its
|
|
865
2646
|
// coarse source allow-list into the RequestContext.
|
|
866
|
-
function gateCtx(req, res) {
|
|
2647
|
+
async function gateCtx(req, res) {
|
|
867
2648
|
if (!credentialsConfigured())
|
|
868
2649
|
return defaultContext();
|
|
869
2650
|
const cred = resolveToken(extractToken(req.headers), loadCredentials());
|
|
@@ -873,10 +2654,54 @@ async function main() {
|
|
|
873
2654
|
.json({ error: "unauthorized: valid Bearer token or X-API-Key required" });
|
|
874
2655
|
return null;
|
|
875
2656
|
}
|
|
876
|
-
|
|
2657
|
+
// Composite tenant:cred-name key so two creds with the same
|
|
2658
|
+
// name in different tenants don't share a bucket.
|
|
2659
|
+
const credTenant = (cred.tenant || "default");
|
|
2660
|
+
const decision = toolRateLimiter.check(`${credTenant} ${cred.name}`);
|
|
2661
|
+
// Standard RateLimit response headers — let well-behaved clients
|
|
2662
|
+
// self-pace before they hit a 429. Emitted on BOTH allowed and
|
|
2663
|
+
// denied paths so the caller always sees the live state.
|
|
2664
|
+
res.setHeader("X-RateLimit-Limit", String(decision.limit));
|
|
2665
|
+
res.setHeader("X-RateLimit-Remaining", String(Math.max(0, decision.limit - decision.count)));
|
|
2666
|
+
res.setHeader("X-RateLimit-Window-Ms", String(decision.windowMs));
|
|
2667
|
+
if (!decision.allowed) {
|
|
2668
|
+
res.setHeader("Retry-After", String(decision.retryAfterSeconds));
|
|
2669
|
+
res.status(429).json({
|
|
2670
|
+
error: "rate limit exceeded for identity",
|
|
2671
|
+
code: "OMCP_IDENTITY_RATE_LIMIT",
|
|
2672
|
+
retryAfterSeconds: decision.retryAfterSeconds,
|
|
2673
|
+
limit: decision.limit,
|
|
2674
|
+
windowMs: decision.windowMs,
|
|
2675
|
+
});
|
|
2676
|
+
return null;
|
|
2677
|
+
}
|
|
2678
|
+
// Resolve the credential's bound Product (OMCP_KEY_PRODUCTS) into
|
|
2679
|
+
// a concrete tools allow-list. Cross-tenant Products are invisible
|
|
2680
|
+
// — products.get() returns undefined when the productId belongs to
|
|
2681
|
+
// another tenant, mirroring the rest of the tenancy layer. A bound
|
|
2682
|
+
// Product whose own `tools` field is absent / empty leaves the
|
|
2683
|
+
// allow-list undefined (== unrestricted), matching the YAML
|
|
2684
|
+
// loader's "no tools key = no restriction" semantics.
|
|
2685
|
+
let allowedTools;
|
|
2686
|
+
if (cred.productId) {
|
|
2687
|
+
// Pick up out-of-band edits to OMCP_PRODUCTS_FILE before each
|
|
2688
|
+
// /mcp request — cheap (one stat), keeps the binding live.
|
|
2689
|
+
// Best-effort: if the catalogue reload fails we keep the prior
|
|
2690
|
+
// good state (the store handles that internally) rather than
|
|
2691
|
+
// failing the request.
|
|
2692
|
+
await products.maybeReload().catch(() => undefined);
|
|
2693
|
+
const p = products.get(cred.productId, credTenant);
|
|
2694
|
+
if (p && p.tools && p.tools.length > 0)
|
|
2695
|
+
allowedTools = p.tools.slice();
|
|
2696
|
+
}
|
|
2697
|
+
return principalContext(cred.name, cred.allowedSources, {
|
|
2698
|
+
allowBypassRedaction: cred.bypassRedaction,
|
|
2699
|
+
tenant: cred.tenant,
|
|
2700
|
+
allowedTools,
|
|
2701
|
+
});
|
|
877
2702
|
}
|
|
878
2703
|
app.post("/mcp", async (req, res) => {
|
|
879
|
-
const ctx = gateCtx(req, res);
|
|
2704
|
+
const ctx = await gateCtx(req, res);
|
|
880
2705
|
if (!ctx)
|
|
881
2706
|
return;
|
|
882
2707
|
const sessionId = req.headers["mcp-session-id"];
|
|
@@ -912,7 +2737,7 @@ async function main() {
|
|
|
912
2737
|
mcpActiveSessions.set(transports.size);
|
|
913
2738
|
});
|
|
914
2739
|
app.get("/mcp", async (req, res) => {
|
|
915
|
-
if (!gateCtx(req, res))
|
|
2740
|
+
if (!(await gateCtx(req, res)))
|
|
916
2741
|
return;
|
|
917
2742
|
const sessionId = req.headers["mcp-session-id"];
|
|
918
2743
|
const transport = transports.get(sessionId);
|
|
@@ -923,7 +2748,7 @@ async function main() {
|
|
|
923
2748
|
await transport.handleRequest(req, res);
|
|
924
2749
|
});
|
|
925
2750
|
app.delete("/mcp", async (req, res) => {
|
|
926
|
-
if (!gateCtx(req, res))
|
|
2751
|
+
if (!(await gateCtx(req, res)))
|
|
927
2752
|
return;
|
|
928
2753
|
const sessionId = req.headers["mcp-session-id"];
|
|
929
2754
|
const transport = transports.get(sessionId);
|
|
@@ -931,18 +2756,244 @@ async function main() {
|
|
|
931
2756
|
await transport.handleRequest(req, res);
|
|
932
2757
|
transports.delete(sessionId);
|
|
933
2758
|
sessionLastActive.delete(sessionId);
|
|
2759
|
+
sessionProduct.delete(sessionId);
|
|
2760
|
+
}
|
|
2761
|
+
else {
|
|
2762
|
+
res.status(400).json({ error: "No active session" });
|
|
2763
|
+
}
|
|
2764
|
+
});
|
|
2765
|
+
// Phase F9: virtual servers — every Product gets its own MCP
|
|
2766
|
+
// endpoint at /mcp/v/<slug> that exposes only the tools bound to
|
|
2767
|
+
// that Product, with the caller's existing tenant + RBAC scoping
|
|
2768
|
+
// preserved. The narrow ctx flows into createMcpServer's
|
|
2769
|
+
// registerTool gate, so the surface a /mcp/v/<slug> client sees is
|
|
2770
|
+
// strictly product.tools (intersected with any pre-existing
|
|
2771
|
+
// allowedTools the credential already carries).
|
|
2772
|
+
function intersectAllowed(a, b) {
|
|
2773
|
+
if (!a)
|
|
2774
|
+
return b;
|
|
2775
|
+
if (!b)
|
|
2776
|
+
return a;
|
|
2777
|
+
const bSet = new Set(b);
|
|
2778
|
+
return a.filter((t) => bSet.has(t));
|
|
2779
|
+
}
|
|
2780
|
+
async function resolveVirtualProduct(req, res, baseCtx) {
|
|
2781
|
+
const slug = req.params.slug;
|
|
2782
|
+
if (!slug || typeof slug !== "string") {
|
|
2783
|
+
res.status(404).json({ error: "virtual server not found" });
|
|
2784
|
+
return null;
|
|
2785
|
+
}
|
|
2786
|
+
// Hot-reload aware so newly-published products are visible
|
|
2787
|
+
// without restart (same pattern /mcp uses for product changes).
|
|
2788
|
+
await products.maybeReload().catch(() => undefined);
|
|
2789
|
+
const tenant = baseCtx.tenant || "default";
|
|
2790
|
+
const product = products.get(slug, tenant);
|
|
2791
|
+
if (!product || product.status === "staging") {
|
|
2792
|
+
// 404 (not 403) for cross-tenant or missing — matches the
|
|
2793
|
+
// existence-hiding stance of the rest of the tenancy layer.
|
|
2794
|
+
res.status(404).json({ error: "virtual server not found" });
|
|
2795
|
+
return null;
|
|
2796
|
+
}
|
|
2797
|
+
const allowedTools = intersectAllowed(baseCtx.allowedTools, product.tools);
|
|
2798
|
+
const ctx = { ...baseCtx, allowedTools };
|
|
2799
|
+
return { product, ctx };
|
|
2800
|
+
}
|
|
2801
|
+
app.post("/mcp/v/:slug", async (req, res) => {
|
|
2802
|
+
const baseCtx = await gateCtx(req, res);
|
|
2803
|
+
if (!baseCtx)
|
|
2804
|
+
return;
|
|
2805
|
+
const resolved = await resolveVirtualProduct(req, res, baseCtx);
|
|
2806
|
+
if (!resolved)
|
|
2807
|
+
return;
|
|
2808
|
+
const { ctx, product } = resolved;
|
|
2809
|
+
const sessionId = req.headers["mcp-session-id"];
|
|
2810
|
+
let transport;
|
|
2811
|
+
if (sessionId && transports.has(sessionId)) {
|
|
2812
|
+
// Cross-product session probe is rejected: the session is
|
|
2813
|
+
// bound to whichever virtual server issued it.
|
|
2814
|
+
if (sessionProduct.get(sessionId) !== product.id) {
|
|
2815
|
+
res.status(404).json({ error: "virtual server not found" });
|
|
2816
|
+
return;
|
|
2817
|
+
}
|
|
2818
|
+
transport = transports.get(sessionId);
|
|
2819
|
+
}
|
|
2820
|
+
else {
|
|
2821
|
+
transport = new StreamableHTTPServerTransport({
|
|
2822
|
+
sessionIdGenerator: () => randomUUID(),
|
|
2823
|
+
});
|
|
2824
|
+
transport.onclose = () => {
|
|
2825
|
+
for (const [sid, t] of transports) {
|
|
2826
|
+
if (t === transport) {
|
|
2827
|
+
transports.delete(sid);
|
|
2828
|
+
sessionProduct.delete(sid);
|
|
2829
|
+
break;
|
|
2830
|
+
}
|
|
2831
|
+
}
|
|
2832
|
+
mcpActiveSessions.set(transports.size);
|
|
2833
|
+
};
|
|
2834
|
+
const sessionMcpServer = createMcpServer(ctx);
|
|
2835
|
+
await sessionMcpServer.connect(transport);
|
|
2836
|
+
}
|
|
2837
|
+
await transport.handleRequest(req, res, req.body);
|
|
2838
|
+
const sid = res.getHeader("mcp-session-id");
|
|
2839
|
+
if (sid) {
|
|
2840
|
+
if (!transports.has(sid)) {
|
|
2841
|
+
transports.set(sid, transport);
|
|
2842
|
+
sessionProduct.set(sid, product.id);
|
|
2843
|
+
}
|
|
2844
|
+
sessionLastActive.set(sid, Date.now());
|
|
2845
|
+
}
|
|
2846
|
+
mcpActiveSessions.set(transports.size);
|
|
2847
|
+
});
|
|
2848
|
+
app.get("/mcp/v/:slug", async (req, res) => {
|
|
2849
|
+
const baseCtx = await gateCtx(req, res);
|
|
2850
|
+
if (!baseCtx)
|
|
2851
|
+
return;
|
|
2852
|
+
const resolved = await resolveVirtualProduct(req, res, baseCtx);
|
|
2853
|
+
if (!resolved)
|
|
2854
|
+
return;
|
|
2855
|
+
const sessionId = req.headers["mcp-session-id"];
|
|
2856
|
+
const transport = transports.get(sessionId);
|
|
2857
|
+
if (!transport || sessionProduct.get(sessionId) !== resolved.product.id) {
|
|
2858
|
+
res.status(400).json({ error: "No active session" });
|
|
2859
|
+
return;
|
|
2860
|
+
}
|
|
2861
|
+
await transport.handleRequest(req, res);
|
|
2862
|
+
});
|
|
2863
|
+
app.delete("/mcp/v/:slug", async (req, res) => {
|
|
2864
|
+
const baseCtx = await gateCtx(req, res);
|
|
2865
|
+
if (!baseCtx)
|
|
2866
|
+
return;
|
|
2867
|
+
const resolved = await resolveVirtualProduct(req, res, baseCtx);
|
|
2868
|
+
if (!resolved)
|
|
2869
|
+
return;
|
|
2870
|
+
const sessionId = req.headers["mcp-session-id"];
|
|
2871
|
+
const transport = transports.get(sessionId);
|
|
2872
|
+
if (transport && sessionProduct.get(sessionId) === resolved.product.id) {
|
|
2873
|
+
await transport.handleRequest(req, res);
|
|
2874
|
+
transports.delete(sessionId);
|
|
2875
|
+
sessionLastActive.delete(sessionId);
|
|
2876
|
+
sessionProduct.delete(sessionId);
|
|
934
2877
|
}
|
|
935
2878
|
else {
|
|
936
2879
|
res.status(400).json({ error: "No active session" });
|
|
937
2880
|
}
|
|
938
2881
|
});
|
|
2882
|
+
// Bearer-token resolver for WebSocket upgrade requests. Browsers
|
|
2883
|
+
// can't set Authorization on a WS handshake, so we accept the token
|
|
2884
|
+
// from any of: Authorization: Bearer X, ?token=X, or the
|
|
2885
|
+
// Sec-WebSocket-Protocol subprotocol "bearer.X" (echoed back by the
|
|
2886
|
+
// server when accepted so clients see which subprotocol won).
|
|
2887
|
+
function extractWsToken(req) {
|
|
2888
|
+
const auth = req.headers["authorization"];
|
|
2889
|
+
if (typeof auth === "string") {
|
|
2890
|
+
const m = auth.match(/^Bearer\s+(.+)$/i);
|
|
2891
|
+
if (m)
|
|
2892
|
+
return { token: m[1] };
|
|
2893
|
+
}
|
|
2894
|
+
try {
|
|
2895
|
+
const url = new URL(req.url ?? "/", "http://localhost");
|
|
2896
|
+
const q = url.searchParams.get("token");
|
|
2897
|
+
if (q)
|
|
2898
|
+
return { token: q };
|
|
2899
|
+
}
|
|
2900
|
+
catch {
|
|
2901
|
+
/* malformed URL */
|
|
2902
|
+
}
|
|
2903
|
+
const sp = req.headers["sec-websocket-protocol"];
|
|
2904
|
+
if (typeof sp === "string") {
|
|
2905
|
+
const offered = sp.split(",").map((s) => s.trim());
|
|
2906
|
+
const bearer = offered.find((p) => p.startsWith("bearer."));
|
|
2907
|
+
if (bearer)
|
|
2908
|
+
return { token: bearer.slice("bearer.".length), selectedSubprotocol: bearer };
|
|
2909
|
+
}
|
|
2910
|
+
return {};
|
|
2911
|
+
}
|
|
2912
|
+
async function gateWsCtx(req) {
|
|
2913
|
+
const { token, selectedSubprotocol } = extractWsToken(req);
|
|
2914
|
+
if (!credentialsConfigured()) {
|
|
2915
|
+
return { ctx: defaultContext(), selectedSubprotocol };
|
|
2916
|
+
}
|
|
2917
|
+
if (!token) {
|
|
2918
|
+
return { reject: 4401, reason: "unauthorized: token required" };
|
|
2919
|
+
}
|
|
2920
|
+
const cred = resolveToken(token, loadCredentials());
|
|
2921
|
+
if (!cred) {
|
|
2922
|
+
return { reject: 4401, reason: "unauthorized: invalid token" };
|
|
2923
|
+
}
|
|
2924
|
+
const credTenant = cred.tenant || "default";
|
|
2925
|
+
const decision = toolRateLimiter.check(`${credTenant} ${cred.name}`);
|
|
2926
|
+
if (!decision.allowed) {
|
|
2927
|
+
return { reject: 4429, reason: "rate limit exceeded for identity" };
|
|
2928
|
+
}
|
|
2929
|
+
let allowedTools;
|
|
2930
|
+
if (cred.productId) {
|
|
2931
|
+
await products.maybeReload().catch(() => undefined);
|
|
2932
|
+
const p = products.get(cred.productId, credTenant);
|
|
2933
|
+
if (p && p.tools && p.tools.length > 0)
|
|
2934
|
+
allowedTools = p.tools.slice();
|
|
2935
|
+
}
|
|
2936
|
+
return {
|
|
2937
|
+
ctx: principalContext(cred.name, cred.allowedSources, {
|
|
2938
|
+
allowBypassRedaction: cred.bypassRedaction,
|
|
2939
|
+
tenant: cred.tenant,
|
|
2940
|
+
allowedTools,
|
|
2941
|
+
}),
|
|
2942
|
+
selectedSubprotocol,
|
|
2943
|
+
};
|
|
2944
|
+
}
|
|
939
2945
|
const PORT = parseInt(process.env.PORT || "3000");
|
|
940
|
-
app.listen(PORT, () => {
|
|
2946
|
+
const httpServer = app.listen(PORT, () => {
|
|
941
2947
|
ready = true;
|
|
942
2948
|
console.log(`observability-mcp server running on port ${PORT}`);
|
|
943
2949
|
console.log(` MCP endpoint: http://localhost:${PORT}/mcp`);
|
|
2950
|
+
console.log(` MCP (WS): ws://localhost:${PORT}/mcp/ws`);
|
|
944
2951
|
console.log(` Web UI: http://localhost:${PORT}`);
|
|
945
2952
|
console.log(` Connectors: ${registry.getAll().map((c) => c.name).join(", ")}`);
|
|
946
2953
|
});
|
|
2954
|
+
// Mount the WebSocket MCP transport. One McpServer instance per
|
|
2955
|
+
// accepted socket; per-connection state is carried in
|
|
2956
|
+
// WebSocketServerTransport.sessionId so concurrent clients stay
|
|
2957
|
+
// isolated. Dynamic import so the `ws` package only loads on
|
|
2958
|
+
// platforms that actually use this transport.
|
|
2959
|
+
const { WebSocketServer } = await import("ws");
|
|
2960
|
+
const wss = new WebSocketServer({ noServer: true });
|
|
2961
|
+
httpServer.on("upgrade", async (req, socket, head) => {
|
|
2962
|
+
if (!req.url) {
|
|
2963
|
+
socket.destroy();
|
|
2964
|
+
return;
|
|
2965
|
+
}
|
|
2966
|
+
const path = req.url.split("?")[0];
|
|
2967
|
+
if (path !== "/mcp/ws") {
|
|
2968
|
+
socket.destroy();
|
|
2969
|
+
return;
|
|
2970
|
+
}
|
|
2971
|
+
const auth = await gateWsCtx(req);
|
|
2972
|
+
if ("reject" in auth) {
|
|
2973
|
+
// Custom 4xxx codes during upgrade aren't expressible via HTTP
|
|
2974
|
+
// status, so we accept the upgrade just long enough to close
|
|
2975
|
+
// with the WS-level close code that carries our reason.
|
|
2976
|
+
wss.handleUpgrade(req, socket, head, (ws) => {
|
|
2977
|
+
ws.close(auth.reject === 4429 ? 1013 : 1008, auth.reason);
|
|
2978
|
+
});
|
|
2979
|
+
return;
|
|
2980
|
+
}
|
|
2981
|
+
wss.handleUpgrade(req, socket, head, async (ws) => {
|
|
2982
|
+
try {
|
|
2983
|
+
const transport = new WebSocketServerTransport(ws);
|
|
2984
|
+
const sessionMcpServer = createMcpServer(auth.ctx);
|
|
2985
|
+
await sessionMcpServer.connect(transport);
|
|
2986
|
+
}
|
|
2987
|
+
catch (err) {
|
|
2988
|
+
console.warn("WS /mcp/ws session setup failed:", err);
|
|
2989
|
+
try {
|
|
2990
|
+
ws.close(1011, "server error");
|
|
2991
|
+
}
|
|
2992
|
+
catch {
|
|
2993
|
+
/* socket already gone */
|
|
2994
|
+
}
|
|
2995
|
+
}
|
|
2996
|
+
});
|
|
2997
|
+
});
|
|
947
2998
|
}
|
|
948
2999
|
main().catch(console.error);
|