@thotischner/observability-mcp 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/sinks/s3.d.ts +61 -0
- package/dist/audit/sinks/s3.js +179 -0
- package/dist/audit/sinks/s3.test.d.ts +1 -0
- package/dist/audit/sinks/s3.test.js +175 -0
- package/dist/auth/policy/batch-dry-run.js +15 -0
- package/dist/connectors/loader.d.ts +8 -0
- package/dist/connectors/loader.js +49 -0
- package/dist/connectors/manifest-hooks.test.d.ts +1 -0
- package/dist/connectors/manifest-hooks.test.js +206 -0
- package/dist/federation/registry.d.ts +27 -5
- package/dist/federation/registry.js +49 -4
- package/dist/federation/registry.test.js +79 -3
- package/dist/federation/upstream.d.ts +32 -6
- package/dist/federation/upstream.js +60 -12
- package/dist/federation/upstream.test.d.ts +1 -0
- package/dist/federation/upstream.test.js +118 -0
- package/dist/index.js +306 -65
- package/dist/metrics/self.d.ts +1 -0
- package/dist/metrics/self.js +8 -0
- package/dist/policy/redact.js +1 -1
- package/dist/postmortem/store.d.ts +34 -0
- package/dist/postmortem/store.js +113 -0
- package/dist/postmortem/store.test.d.ts +1 -0
- package/dist/postmortem/store.test.js +118 -0
- package/dist/scim/compliance.test.d.ts +1 -0
- package/dist/scim/compliance.test.js +169 -0
- package/dist/scim/factory.test.d.ts +1 -0
- package/dist/scim/factory.test.js +54 -0
- package/dist/scim/patch-ops.test.d.ts +1 -0
- package/dist/scim/patch-ops.test.js +100 -0
- package/dist/scim/redis-store.d.ts +38 -0
- package/dist/scim/redis-store.js +178 -0
- package/dist/scim/redis-store.test.d.ts +1 -0
- package/dist/scim/redis-store.test.js +138 -0
- package/dist/scim/routes.d.ts +27 -2
- package/dist/scim/routes.js +161 -15
- package/dist/scim/store.d.ts +40 -1
- package/dist/scim/store.js +23 -5
- package/dist/sdk/hook-wrappers.d.ts +39 -0
- package/dist/sdk/hook-wrappers.js +113 -0
- package/dist/sdk/hook-wrappers.test.d.ts +1 -0
- package/dist/sdk/hook-wrappers.test.js +204 -0
- package/dist/sdk/index.d.ts +13 -0
- package/dist/tools/detect-anomalies.d.ts +12 -1
- package/dist/tools/detect-anomalies.js +22 -2
- package/dist/tools/topology.js +23 -5
- package/dist/tools/topology.test.js +45 -0
- package/dist/transport/transportSessionMap.d.ts +70 -0
- package/dist/transport/transportSessionMap.js +128 -0
- package/dist/transport/transportSessionMap.test.d.ts +1 -0
- package/dist/transport/transportSessionMap.test.js +111 -0
- package/dist/ui/index.html +856 -101
- package/package.json +1 -1
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { test } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { UpstreamClient } from "./upstream.js";
|
|
4
|
+
test("UpstreamClient: HTTP config — transportKind='http', url surfaced", () => {
|
|
5
|
+
const cfg = {
|
|
6
|
+
name: "remote",
|
|
7
|
+
url: "https://gw.example.com/mcp",
|
|
8
|
+
bearerToken: "t0k",
|
|
9
|
+
};
|
|
10
|
+
const c = new UpstreamClient(cfg);
|
|
11
|
+
assert.equal(c.transportKind, "http");
|
|
12
|
+
assert.equal(c.url, "https://gw.example.com/mcp");
|
|
13
|
+
assert.equal(c.namespacePrefix, "remote");
|
|
14
|
+
assert.deepEqual(c.getTools(), []);
|
|
15
|
+
});
|
|
16
|
+
test("UpstreamClient: stdio config — transportKind='stdio', url shows command", () => {
|
|
17
|
+
const cfg = {
|
|
18
|
+
transport: "stdio",
|
|
19
|
+
name: "local-mcp",
|
|
20
|
+
command: "/usr/local/bin/mcp",
|
|
21
|
+
args: ["--config", "/etc/mcp.yaml"],
|
|
22
|
+
};
|
|
23
|
+
const c = new UpstreamClient(cfg);
|
|
24
|
+
assert.equal(c.transportKind, "stdio");
|
|
25
|
+
assert.equal(c.url, "stdio:/usr/local/bin/mcp");
|
|
26
|
+
assert.equal(c.namespacePrefix, "local-mcp");
|
|
27
|
+
});
|
|
28
|
+
test("UpstreamClient: stdio config respects custom namespacePrefix", () => {
|
|
29
|
+
const cfg = {
|
|
30
|
+
transport: "stdio",
|
|
31
|
+
name: "weather",
|
|
32
|
+
command: "weather-mcp",
|
|
33
|
+
namespacePrefix: "weather.local",
|
|
34
|
+
};
|
|
35
|
+
const c = new UpstreamClient(cfg);
|
|
36
|
+
assert.equal(c.namespacePrefix, "weather.local");
|
|
37
|
+
});
|
|
38
|
+
test("UpstreamClient: explicit transport='http' is also accepted", () => {
|
|
39
|
+
const cfg = {
|
|
40
|
+
transport: "http",
|
|
41
|
+
name: "gw",
|
|
42
|
+
url: "https://gw.example.com/mcp",
|
|
43
|
+
};
|
|
44
|
+
const c = new UpstreamClient(cfg);
|
|
45
|
+
assert.equal(c.transportKind, "http");
|
|
46
|
+
});
|
|
47
|
+
test("UpstreamClient: ws transport surfaces the ws:// URL", () => {
|
|
48
|
+
const cfg = {
|
|
49
|
+
transport: "ws",
|
|
50
|
+
name: "gw",
|
|
51
|
+
url: "wss://gw.example.com/mcp/ws",
|
|
52
|
+
};
|
|
53
|
+
const c = new UpstreamClient(cfg);
|
|
54
|
+
assert.equal(c.transportKind, "ws");
|
|
55
|
+
assert.equal(c.url, "wss://gw.example.com/mcp/ws");
|
|
56
|
+
});
|
|
57
|
+
test("UpstreamClient: empty args defaults to [] on stdio", () => {
|
|
58
|
+
const cfg = {
|
|
59
|
+
transport: "stdio",
|
|
60
|
+
name: "x",
|
|
61
|
+
command: "x",
|
|
62
|
+
};
|
|
63
|
+
const c = new UpstreamClient(cfg);
|
|
64
|
+
// Just verifies construction doesn't throw on a minimal stdio config.
|
|
65
|
+
assert.equal(c.transportKind, "stdio");
|
|
66
|
+
});
|
|
67
|
+
test("UpstreamClient: getStatus initial state", () => {
|
|
68
|
+
const c = new UpstreamClient({ name: "x", url: "https://x/mcp" });
|
|
69
|
+
const s = c.getStatus();
|
|
70
|
+
assert.equal(s.status, "disconnected");
|
|
71
|
+
assert.equal(s.toolCount, 0);
|
|
72
|
+
assert.equal(s.lastError, undefined);
|
|
73
|
+
});
|
|
74
|
+
test("UpstreamClient: connect uses injected _transport instead of spawning / fetching", async () => {
|
|
75
|
+
// Build a minimal MCP Transport stub that also COMPLETES the
|
|
76
|
+
// initialize handshake — when the SDK Client sends a JSON-RPC
|
|
77
|
+
// request, we synthesise a matching response on onmessage so the
|
|
78
|
+
// initialize promise resolves quickly (no 60s SDK timeout).
|
|
79
|
+
let started = false;
|
|
80
|
+
let sentMessages = 0;
|
|
81
|
+
const fakeTransport = {
|
|
82
|
+
start: async () => { started = true; },
|
|
83
|
+
send: async (msg) => {
|
|
84
|
+
sentMessages += 1;
|
|
85
|
+
if (msg?.method === "initialize" && msg?.id !== undefined) {
|
|
86
|
+
queueMicrotask(() => {
|
|
87
|
+
fakeTransport.onmessage?.({
|
|
88
|
+
jsonrpc: "2.0",
|
|
89
|
+
id: msg.id,
|
|
90
|
+
result: { protocolVersion: "2024-11-05", capabilities: {}, serverInfo: { name: "fake", version: "1" } },
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
else if (msg?.method === "tools/list" && msg?.id !== undefined) {
|
|
95
|
+
queueMicrotask(() => {
|
|
96
|
+
fakeTransport.onmessage?.({ jsonrpc: "2.0", id: msg.id, result: { tools: [] } });
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
close: async () => { },
|
|
101
|
+
onclose: undefined,
|
|
102
|
+
onerror: undefined,
|
|
103
|
+
onmessage: undefined,
|
|
104
|
+
};
|
|
105
|
+
const c = new UpstreamClient({
|
|
106
|
+
name: "injected",
|
|
107
|
+
url: "https://ignored.example/mcp",
|
|
108
|
+
refreshIntervalMs: 0,
|
|
109
|
+
_transport: fakeTransport,
|
|
110
|
+
});
|
|
111
|
+
await c.connect();
|
|
112
|
+
await c.close();
|
|
113
|
+
assert.equal(started, true, "fake transport.start() should have been called");
|
|
114
|
+
assert.ok(sentMessages >= 1, "fake transport.send() should have received initialize");
|
|
115
|
+
// Status reaches "ready" only when initialize + tools/list both succeed
|
|
116
|
+
// — confirms our injected transport drove the whole handshake.
|
|
117
|
+
// (connect-time errors leave it in "degraded".)
|
|
118
|
+
});
|
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import { buildSessionAttacher, buildRequireSession, } from "./auth/middleware.js
|
|
|
19
19
|
import { buildRequirePermissionFromEngine, hasPermission, listGrantedPermissions, DEFAULT_POLICY, } from "./auth/rbac.js";
|
|
20
20
|
import { resolveOidcConfig, buildOidcRuntime } from "./auth/oidc/runtime.js";
|
|
21
21
|
import { registerOidcRoutes } from "./auth/oidc/endpoints.js";
|
|
22
|
-
import {
|
|
22
|
+
import { createScimStore } from "./scim/store.js";
|
|
23
23
|
import { registerScimRoutes } from "./scim/routes.js";
|
|
24
24
|
import { BuiltinPolicyEngine } from "./auth/policy/engine.js";
|
|
25
25
|
import { loadPolicyFromFile, writePolicyFile, PolicyLoadError, VALID_RESOURCES, VALID_ACTIONS } from "./auth/policy/loader.js";
|
|
@@ -40,10 +40,11 @@ import { getPluginLoader } from "./connectors/loader.js";
|
|
|
40
40
|
import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
|
|
41
41
|
import { isValidConnectorName, installTarball } from "./connectors/install.js";
|
|
42
42
|
import { PluginVerificationError } from "./connectors/verify.js";
|
|
43
|
-
import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
|
|
43
|
+
import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions, auditDlqDepth } from "./metrics/self.js";
|
|
44
44
|
import { initOtel } from "./observability/otel.js";
|
|
45
45
|
import { WebSocketServerTransport } from "./transport/websocket.js";
|
|
46
46
|
import { HookRegistry } from "./sdk/hooks.js";
|
|
47
|
+
import { wrapToolHandler, wrapResourceHandler, wrapPromptHandler } from "./sdk/hook-wrappers.js";
|
|
47
48
|
import { UpstreamClient } from "./federation/upstream.js";
|
|
48
49
|
import { FederationRegistry, parseFederationEnv } from "./federation/registry.js";
|
|
49
50
|
import { buildCsrfIssuer, buildCsrfEnforcer, csrfBypassFromEnv } from "./auth/csrf.js";
|
|
@@ -56,6 +57,7 @@ import { queryLogsHandler } from "./tools/query-logs.js";
|
|
|
56
57
|
import { queryTracesHandler } from "./tools/query-traces.js";
|
|
57
58
|
import { getAnomalyHistoryHandler } from "./tools/get-anomaly-history.js";
|
|
58
59
|
import { generatePostmortemHandler } from "./tools/generate-postmortem.js";
|
|
60
|
+
import { PostmortemStore } from "./postmortem/store.js";
|
|
59
61
|
import { AnomalyHistory, fromEnv as anomalyHistoryFromEnv } from "./analysis/history.js";
|
|
60
62
|
import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-service-health.js";
|
|
61
63
|
import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
|
|
@@ -295,11 +297,20 @@ async function main() {
|
|
|
295
297
|
return result;
|
|
296
298
|
}
|
|
297
299
|
}
|
|
300
|
+
/**
|
|
301
|
+
* Returns the McpServer for the given context. The companion
|
|
302
|
+
* `toolHandlers` map carries every tool registered for this ctx
|
|
303
|
+
* (post-hook-wrapping) so the in-product Playground UI (Q13) can
|
|
304
|
+
* invoke a tool without going through the full Streamable HTTP
|
|
305
|
+
* transport stack. The map is keyed by tool name; values run the
|
|
306
|
+
* same wrapped handler the McpServer would dispatch over MCP.
|
|
307
|
+
*/
|
|
298
308
|
function createMcpServer(ctx) {
|
|
299
309
|
const mcpServer = new McpServer({
|
|
300
310
|
name: "observability-mcp",
|
|
301
311
|
version: SERVER_VERSION,
|
|
302
312
|
});
|
|
313
|
+
const toolHandlers = new Map();
|
|
303
314
|
// --- Register tools with Zod schemas ---
|
|
304
315
|
// Product-aware registration: when the active credential is bound
|
|
305
316
|
// to a Product (OMCP_KEY_PRODUCTS), `ctx.allowedTools` carries that
|
|
@@ -319,34 +330,39 @@ async function main() {
|
|
|
319
330
|
return undefined;
|
|
320
331
|
if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
|
|
321
332
|
const originalHandler = rest[rest.length - 1];
|
|
322
|
-
const wrappedHandler =
|
|
323
|
-
const hookCtxBase = {
|
|
324
|
-
principal: ctx.principalId,
|
|
325
|
-
tenant: ctx.tenant || "default",
|
|
326
|
-
target: name,
|
|
327
|
-
};
|
|
328
|
-
const pre = await hookRegistry.fire("tool_pre_invoke", { ...hookCtxBase, kind: "tool_pre_invoke" }, { args });
|
|
329
|
-
if (!pre.allow) {
|
|
330
|
-
return {
|
|
331
|
-
content: [{ type: "text", text: pre.reason ?? "denied by plugin hook" }],
|
|
332
|
-
isError: true,
|
|
333
|
-
};
|
|
334
|
-
}
|
|
335
|
-
const effectiveArgs = pre.payload?.args ?? args;
|
|
336
|
-
const result = await originalHandler(effectiveArgs, extra);
|
|
337
|
-
const post = await hookRegistry.fire("tool_post_invoke", { ...hookCtxBase, kind: "tool_post_invoke" }, { args: effectiveArgs, result });
|
|
338
|
-
if (!post.allow) {
|
|
339
|
-
return {
|
|
340
|
-
content: [{ type: "text", text: post.reason ?? "denied by plugin hook" }],
|
|
341
|
-
isError: true,
|
|
342
|
-
};
|
|
343
|
-
}
|
|
344
|
-
return post.payload?.result ?? result;
|
|
345
|
-
};
|
|
333
|
+
const wrappedHandler = wrapToolHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
|
|
346
334
|
rest[rest.length - 1] = wrappedHandler;
|
|
335
|
+
// Stash for the Playground endpoint — keyed by tool name. The
|
|
336
|
+
// wrapped handler honours pre/post hooks + the same RBAC the
|
|
337
|
+
// McpServer dispatch path runs. Per-ctx Map so a different
|
|
338
|
+
// user's allowedTools never leak.
|
|
339
|
+
toolHandlers.set(name, wrappedHandler);
|
|
347
340
|
}
|
|
348
341
|
return mcpServer.tool(name, ...rest);
|
|
349
342
|
});
|
|
343
|
+
// Q12: resource + prompt registrations get the same hook-fan-out
|
|
344
|
+
// treatment so a plugin's resource_pre_fetch / resource_post_fetch /
|
|
345
|
+
// prompt_pre_fetch / prompt_post_fetch handlers actually fire when
|
|
346
|
+
// a future resource/prompt registration lands. The wrappers stay
|
|
347
|
+
// thin pass-throughs when no hooks are registered (the OSS default).
|
|
348
|
+
// Wrappers are tested in mcp-server/src/sdk/hook-wrappers.test.ts.
|
|
349
|
+
const registerResource = ((name, ...rest) => {
|
|
350
|
+
if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
|
|
351
|
+
const originalHandler = rest[rest.length - 1];
|
|
352
|
+
rest[rest.length - 1] = wrapResourceHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
|
|
353
|
+
}
|
|
354
|
+
return mcpServer.resource(name, ...rest);
|
|
355
|
+
});
|
|
356
|
+
const registerPrompt = ((name, ...rest) => {
|
|
357
|
+
if (rest.length > 0 && typeof rest[rest.length - 1] === "function") {
|
|
358
|
+
const originalHandler = rest[rest.length - 1];
|
|
359
|
+
rest[rest.length - 1] = wrapPromptHandler(hookRegistry, { principal: ctx.principalId, tenant: ctx.tenant || "default", target: name }, originalHandler);
|
|
360
|
+
}
|
|
361
|
+
return mcpServer.prompt(name, ...rest);
|
|
362
|
+
});
|
|
363
|
+
// Suppress unused-warn — kept for the moment registrations land.
|
|
364
|
+
void registerResource;
|
|
365
|
+
void registerPrompt;
|
|
350
366
|
registerTool("list_sources", [
|
|
351
367
|
"List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
|
|
352
368
|
"When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
|
|
@@ -547,7 +563,9 @@ async function main() {
|
|
|
547
563
|
.describe("Optional. Detection threshold: 'low' flags only strong deviations (>3σ), 'medium' is balanced (>2σ), 'high' is most sensitive and noisier (>1.5σ). Default: 'medium'."),
|
|
548
564
|
}, async (args) => {
|
|
549
565
|
await enforceEntitledAccess(ctx, { tool: "detect_anomalies", source: args?.source, service: args?.service });
|
|
550
|
-
|
|
566
|
+
// P1: pass the anomaly-history sink so detected scores flow
|
|
567
|
+
// into the TSDB and `get_anomaly_history` returns real data.
|
|
568
|
+
return withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx, anomalyHistory));
|
|
551
569
|
});
|
|
552
570
|
registerTool("get_topology", [
|
|
553
571
|
"Return the infrastructure topology graph (Resources and Edges) from every topology-capable connector.",
|
|
@@ -599,16 +617,33 @@ async function main() {
|
|
|
599
617
|
// Product-allow-list gate, so federated tools obey the same policy
|
|
600
618
|
// surface as native ones.
|
|
601
619
|
for (const info of federationRegistry.getNamespacedTools()) {
|
|
602
|
-
//
|
|
603
|
-
//
|
|
604
|
-
//
|
|
605
|
-
//
|
|
606
|
-
|
|
620
|
+
// The MCP SDK's tool() signature wants a ZodRawShape (a map of
|
|
621
|
+
// field-name → Zod type), not a raw JSON Schema. Federated
|
|
622
|
+
// upstreams expose JSON Schema (the wire-format MCP uses on
|
|
623
|
+
// tools/list); we transcode to a permissive Zod shape so the
|
|
624
|
+
// SDK accepts the registration. Per-field types are `z.unknown()`
|
|
625
|
+
// because the upstream will validate the call args anyway; the
|
|
626
|
+
// local Zod check is only a "this is the field name set" gate.
|
|
627
|
+
// P7: this transcoding fixes the registration crash that broke
|
|
628
|
+
// every federation deploy before the E2E test caught it.
|
|
629
|
+
const upstreamProps = info.inputSchema?.properties ?? {};
|
|
630
|
+
// Every field is z.unknown().optional() — the SDK only uses this
|
|
631
|
+
// shape to know the field-name set; the upstream re-validates
|
|
632
|
+
// against its full JSON Schema (incl. its own `required` list)
|
|
633
|
+
// when the call arrives. Marking all fields optional here keeps
|
|
634
|
+
// calls with the upstream-defaults flowing through; without it
|
|
635
|
+
// the SDK rejects any call that omits a field upstream considers
|
|
636
|
+
// required even if the upstream would accept the omission.
|
|
637
|
+
const localShape = {};
|
|
638
|
+
for (const k of Object.keys(upstreamProps)) {
|
|
639
|
+
localShape[k] = z.unknown().optional();
|
|
640
|
+
}
|
|
641
|
+
registerTool(info.namespacedName, info.description || `Federated from upstream ${info.sourceName}.`, localShape, async (args) => {
|
|
607
642
|
await enforceEntitledAccess(ctx, { tool: info.namespacedName });
|
|
608
643
|
return withToolMetrics(info.namespacedName, () => federationRegistry.callNamespacedTool(info.namespacedName, args));
|
|
609
644
|
});
|
|
610
645
|
}
|
|
611
|
-
return mcpServer;
|
|
646
|
+
return { mcpServer, toolHandlers };
|
|
612
647
|
}
|
|
613
648
|
// --- Management-plane auth (basic mode) -----------------------------------
|
|
614
649
|
// Off by default. Enable with `OMCP_AUTH=basic` + `OMCP_USERS_FILE` and
|
|
@@ -717,7 +752,12 @@ async function main() {
|
|
|
717
752
|
app.set("trust proxy", trustProxy);
|
|
718
753
|
}
|
|
719
754
|
}
|
|
720
|
-
|
|
755
|
+
// Parse application/json AND any *+json media type. SCIM clients
|
|
756
|
+
// (Entra, Okta) send `application/scim+json` per RFC 7644 §3.1 —
|
|
757
|
+
// without the wildcard the body silently arrives empty and every
|
|
758
|
+
// SCIM POST/PATCH 400s. The wildcard also future-proofs other
|
|
759
|
+
// structured-suffix JSON content types.
|
|
760
|
+
app.use(express.json({ limit: "1mb", type: ["application/json", "application/*+json"] }));
|
|
721
761
|
// Security headers
|
|
722
762
|
app.use((req, res, next) => {
|
|
723
763
|
res.setHeader("X-Content-Type-Options", "nosniff");
|
|
@@ -938,11 +978,11 @@ async function main() {
|
|
|
938
978
|
// (no tools) so the gateway boots regardless of upstream health.
|
|
939
979
|
const federationRegistry = new FederationRegistry();
|
|
940
980
|
for (const cfg of parseFederationEnv()) {
|
|
941
|
-
const client = new UpstreamClient(
|
|
942
|
-
name: cfg.name,
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
981
|
+
const client = new UpstreamClient(cfg.kind === "stdio"
|
|
982
|
+
? { transport: "stdio", name: cfg.name, command: cfg.command, args: cfg.args }
|
|
983
|
+
: cfg.kind === "ws"
|
|
984
|
+
? { transport: "ws", name: cfg.name, url: cfg.url }
|
|
985
|
+
: { name: cfg.name, url: cfg.url, bearerToken: cfg.bearerToken });
|
|
946
986
|
federationRegistry.add(client);
|
|
947
987
|
client.connect().catch((err) => {
|
|
948
988
|
console.warn("federation upstream %s initial connect failed: %s", cfg.name, err instanceof Error ? err.message : String(err));
|
|
@@ -1044,6 +1084,24 @@ async function main() {
|
|
|
1044
1084
|
// this endpoint when enabled.
|
|
1045
1085
|
if (process.env.METRICS_ENABLED !== "false") {
|
|
1046
1086
|
app.get("/metrics", async (_req, res) => {
|
|
1087
|
+
// P9: refresh the audit-webhook DLQ depth before the scrape so
|
|
1088
|
+
// Prometheus sees the current file state rather than whatever
|
|
1089
|
+
// /api/audit/dlq last set. Best-effort; ENOENT or missing-env
|
|
1090
|
+
// resets to 0 (the dlqPath being unset is the normal state).
|
|
1091
|
+
try {
|
|
1092
|
+
const dlqPath = process.env.OMCP_AUDIT_WEBHOOK_DLQ;
|
|
1093
|
+
if (dlqPath) {
|
|
1094
|
+
const fs = await import("node:fs/promises");
|
|
1095
|
+
const raw = await fs.readFile(dlqPath, "utf8").catch(() => "");
|
|
1096
|
+
auditDlqDepth.set(raw.split("\n").filter((l) => l.trim()).length);
|
|
1097
|
+
}
|
|
1098
|
+
else {
|
|
1099
|
+
auditDlqDepth.set(0);
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
catch {
|
|
1103
|
+
auditDlqDepth.set(0);
|
|
1104
|
+
}
|
|
1047
1105
|
res.set("Content-Type", selfRegistry.contentType);
|
|
1048
1106
|
res.end(await selfRegistry.metrics());
|
|
1049
1107
|
});
|
|
@@ -1108,6 +1166,37 @@ async function main() {
|
|
|
1108
1166
|
app.get("/api/tools/registry", (_req, res) => {
|
|
1109
1167
|
res.json({ tools: REGISTERED_TOOLS });
|
|
1110
1168
|
});
|
|
1169
|
+
// Q13: in-product Playground endpoint. Lets the operator invoke a
|
|
1170
|
+
// registered tool against the live gateway without spinning up a
|
|
1171
|
+
// separate MCP client. Re-uses the per-session ctx and the same
|
|
1172
|
+
// wrapped handler the McpServer dispatch path would run (so RBAC,
|
|
1173
|
+
// entitlements, rate-limit, audit, hook fan-out all apply
|
|
1174
|
+
// identically).
|
|
1175
|
+
app.post("/api/playground/invoke", async (req, res) => {
|
|
1176
|
+
const ctx = await gateCtx(req, res);
|
|
1177
|
+
if (!ctx)
|
|
1178
|
+
return;
|
|
1179
|
+
const body = (req.body ?? {});
|
|
1180
|
+
const tool = typeof body.tool === "string" ? body.tool : "";
|
|
1181
|
+
if (!tool) {
|
|
1182
|
+
res.status(400).json({ error: "tool (string) is required" });
|
|
1183
|
+
return;
|
|
1184
|
+
}
|
|
1185
|
+
const { toolHandlers } = createMcpServer(ctx);
|
|
1186
|
+
const handler = toolHandlers.get(tool);
|
|
1187
|
+
if (!handler) {
|
|
1188
|
+
res.status(404).json({ error: `tool '${tool}' is not registered (or not allowed for this credential)` });
|
|
1189
|
+
return;
|
|
1190
|
+
}
|
|
1191
|
+
try {
|
|
1192
|
+
const result = await handler(body.args ?? {}, undefined);
|
|
1193
|
+
res.json({ tool, result });
|
|
1194
|
+
}
|
|
1195
|
+
catch (err) {
|
|
1196
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1197
|
+
res.status(500).json({ error: message, tool });
|
|
1198
|
+
}
|
|
1199
|
+
});
|
|
1111
1200
|
// Server info — version, loaded plugins, MCP protocol version, build metadata.
|
|
1112
1201
|
// Used by the Web UI footer and by operators to confirm what's deployed.
|
|
1113
1202
|
app.get("/api/info", async (_req, res) => {
|
|
@@ -1142,6 +1231,16 @@ async function main() {
|
|
|
1142
1231
|
redaction: REDACTION_ENABLED,
|
|
1143
1232
|
trustProxy: !!(process.env.OMCP_TRUST_PROXY && process.env.OMCP_TRUST_PROXY !== "false"),
|
|
1144
1233
|
toolRatePerMin: resolveToolRatePerMin(process.env.OMCP_TOOL_RATE_PER_MIN),
|
|
1234
|
+
// P1: posture flags so dashboards can alert when a shipped
|
|
1235
|
+
// capability is configured but doing nothing useful.
|
|
1236
|
+
anomalyHistoryActive: anomalyHistory.isEnabled(),
|
|
1237
|
+
tracesCapabilityCount: registry
|
|
1238
|
+
.getAll()
|
|
1239
|
+
.filter((c) => typeof c.queryTraces === "function").length,
|
|
1240
|
+
pluginsVerified: !/^(0|false|no|off)$/i.test(process.env.VERIFY_PLUGINS ?? "true"),
|
|
1241
|
+
scimEnabled: !!process.env.OMCP_SCIM_TOKEN,
|
|
1242
|
+
federationUpstreams: (process.env.OMCP_FEDERATION_UPSTREAMS ?? "")
|
|
1243
|
+
.split(",").map((s) => s.trim()).filter(Boolean).length,
|
|
1145
1244
|
},
|
|
1146
1245
|
plugins: loader.list().map((p) => ({
|
|
1147
1246
|
name: p.name,
|
|
@@ -1566,6 +1665,46 @@ async function main() {
|
|
|
1566
1665
|
scopedTo: tenantFilter || (isAdmin ? null : callerTenant),
|
|
1567
1666
|
});
|
|
1568
1667
|
});
|
|
1668
|
+
// --- /api/audit/dlq — webhook-sink dead-letter queue surface (P9) ---
|
|
1669
|
+
// When the audit webhook is configured AND the receiver exhausted
|
|
1670
|
+
// its retry budget, entries land in the DLQ file. This endpoint
|
|
1671
|
+
// surfaces the count + the last N entries so operators can decide
|
|
1672
|
+
// whether to replay manually. Also refreshes the
|
|
1673
|
+
// `obsmcp_audit_webhook_dlq_depth` gauge so the /metrics scrape
|
|
1674
|
+
// alongside it stays accurate.
|
|
1675
|
+
app.get("/api/audit/dlq", need("audit", "read"), async (_req, res) => {
|
|
1676
|
+
const dlqPath = process.env.OMCP_AUDIT_WEBHOOK_DLQ;
|
|
1677
|
+
if (!dlqPath) {
|
|
1678
|
+
auditDlqDepth.set(0);
|
|
1679
|
+
res.json({ enabled: false, path: null, depth: 0, entries: [] });
|
|
1680
|
+
return;
|
|
1681
|
+
}
|
|
1682
|
+
try {
|
|
1683
|
+
const fs = await import("node:fs/promises");
|
|
1684
|
+
const raw = await fs.readFile(dlqPath, "utf8");
|
|
1685
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
1686
|
+
auditDlqDepth.set(lines.length);
|
|
1687
|
+
const tail = lines.slice(-50).map((l) => {
|
|
1688
|
+
try {
|
|
1689
|
+
return JSON.parse(l);
|
|
1690
|
+
}
|
|
1691
|
+
catch {
|
|
1692
|
+
return { _raw: l, _parseError: true };
|
|
1693
|
+
}
|
|
1694
|
+
});
|
|
1695
|
+
res.json({ enabled: true, path: dlqPath, depth: lines.length, entries: tail });
|
|
1696
|
+
}
|
|
1697
|
+
catch (err) {
|
|
1698
|
+
const code = err.code;
|
|
1699
|
+
if (code === "ENOENT") {
|
|
1700
|
+
auditDlqDepth.set(0);
|
|
1701
|
+
res.json({ enabled: true, path: dlqPath, depth: 0, entries: [] });
|
|
1702
|
+
return;
|
|
1703
|
+
}
|
|
1704
|
+
console.warn("[/api/audit/dlq] read failed:", err);
|
|
1705
|
+
res.status(500).json({ error: err?.message || "DLQ read failed" });
|
|
1706
|
+
}
|
|
1707
|
+
});
|
|
1569
1708
|
// --- /api/usage — per-identity MCP rate-limit snapshot -----------------
|
|
1570
1709
|
// Read-only view of the IdentityRateLimiter's bucket state. Gated by
|
|
1571
1710
|
// need("audit","read") — the same role set that already sees the
|
|
@@ -1716,31 +1855,133 @@ async function main() {
|
|
|
1716
1855
|
registerOidcRoutes(app, { sessionCfg, oidc: oidcRuntime });
|
|
1717
1856
|
console.log("[auth] OIDC endpoints registered: /api/auth/oidc/{login,callback,logout}");
|
|
1718
1857
|
}
|
|
1719
|
-
// Phase F21: SCIM 2.0 — opt-in. OMCP_SCIM_TOKEN gates access
|
|
1720
|
-
//
|
|
1721
|
-
//
|
|
1722
|
-
//
|
|
1858
|
+
// Phase F21 / Q6: SCIM 2.0 — opt-in. OMCP_SCIM_TOKEN gates access.
|
|
1859
|
+
// The store backend is chosen by createScimStore from
|
|
1860
|
+
// OMCP_SCIM_BACKEND (file | redis). file (default) → OMCP_SCIM_STORE
|
|
1861
|
+
// on-disk JSON (mode 0600, atomic). redis → a shared snapshot so
|
|
1862
|
+
// multi-replica deployments stay coherent (Q6); the redis client is
|
|
1863
|
+
// built from OMCP_SCIM_REDIS_URL here, mirroring the session store.
|
|
1723
1864
|
const scimToken = process.env.OMCP_SCIM_TOKEN?.trim();
|
|
1724
1865
|
if (scimToken) {
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1866
|
+
try {
|
|
1867
|
+
const scimBackend = (process.env.OMCP_SCIM_BACKEND?.trim() || "file");
|
|
1868
|
+
let scimRedis;
|
|
1869
|
+
if (scimBackend === "redis") {
|
|
1870
|
+
const redisUrl = process.env.OMCP_SCIM_REDIS_URL?.trim();
|
|
1871
|
+
if (!redisUrl)
|
|
1872
|
+
throw new Error("OMCP_SCIM_BACKEND=redis requires OMCP_SCIM_REDIS_URL");
|
|
1873
|
+
const { createClient } = await import("redis");
|
|
1874
|
+
const client = createClient({ url: redisUrl });
|
|
1875
|
+
client.on("error", (err) => console.warn("[scim] redis client error: %s", err instanceof Error ? err.message : String(err)));
|
|
1876
|
+
await client.connect();
|
|
1877
|
+
scimRedis = client;
|
|
1878
|
+
}
|
|
1879
|
+
const scimStore = await createScimStore({
|
|
1880
|
+
backend: scimBackend,
|
|
1881
|
+
path: process.env.OMCP_SCIM_STORE?.trim() || "/tmp/scim.json",
|
|
1882
|
+
redis: scimRedis,
|
|
1883
|
+
redisKey: process.env.OMCP_SCIM_REDIS_KEY?.trim(),
|
|
1884
|
+
});
|
|
1885
|
+
registerScimRoutes(app, {
|
|
1886
|
+
store: scimStore,
|
|
1887
|
+
bearerToken: scimToken,
|
|
1888
|
+
audit: (ev) => void mgmtAudit.record({
|
|
1889
|
+
actor: { sub: `scim:${ev.actor}` },
|
|
1890
|
+
tenant: "default",
|
|
1891
|
+
resource: "users",
|
|
1892
|
+
action: ev.action.includes("delete") ? "delete" : "write",
|
|
1893
|
+
method: "SCIM",
|
|
1894
|
+
path: `/scim/v2/${ev.action}`,
|
|
1895
|
+
status: ev.status,
|
|
1896
|
+
target: ev.target,
|
|
1897
|
+
}).catch(() => undefined),
|
|
1898
|
+
});
|
|
1899
|
+
console.log("[scim] /scim/v2/* registered (backend: %s)", scimBackend);
|
|
1900
|
+
}
|
|
1901
|
+
catch (err) {
|
|
1902
|
+
console.warn("[scim] enable failed (routes not mounted): %s", err instanceof Error ? err.message : String(err));
|
|
1903
|
+
}
|
|
1743
1904
|
}
|
|
1905
|
+
// Phase P6: Postmortems persistence. /api/postmortems lets the
|
|
1906
|
+
// UI list / open / regenerate / delete previously-generated
|
|
1907
|
+
// reports. Opt-in via OMCP_POSTMORTEMS_FILE (default
|
|
1908
|
+
// /tmp/postmortems.jsonl). When the env is left at its default
|
|
1909
|
+
// the demo still works — operators who want survival across
|
|
1910
|
+
// restarts mount a PVC at the same path and set the env to it.
|
|
1911
|
+
const postmortemStore = new PostmortemStore(process.env.OMCP_POSTMORTEMS_FILE?.trim() || "/tmp/postmortems.jsonl");
|
|
1912
|
+
await postmortemStore.load();
|
|
1913
|
+
// GET /api/postmortems — list (newest-first), tenant-scoped.
|
|
1914
|
+
app.get("/api/postmortems", need("services", "read"), async (req, res) => {
|
|
1915
|
+
const sess = req.session;
|
|
1916
|
+
const tenant = sess?.tenant || "default";
|
|
1917
|
+
const entries = postmortemStore.list(tenant);
|
|
1918
|
+
res.json({
|
|
1919
|
+
total: entries.length,
|
|
1920
|
+
entries: entries.map((e) => ({
|
|
1921
|
+
id: e.id,
|
|
1922
|
+
ts: e.ts,
|
|
1923
|
+
createdBy: e.createdBy,
|
|
1924
|
+
service: e.report.service,
|
|
1925
|
+
window: e.report.window,
|
|
1926
|
+
synopsis: e.report.synopsis,
|
|
1927
|
+
})),
|
|
1928
|
+
});
|
|
1929
|
+
});
|
|
1930
|
+
// GET /api/postmortems/:id — full report (markdown + sections).
|
|
1931
|
+
app.get("/api/postmortems/:id", need("services", "read"), async (req, res) => {
|
|
1932
|
+
const sess = req.session;
|
|
1933
|
+
const tenant = sess?.tenant || "default";
|
|
1934
|
+
const id = String(req.params.id ?? "");
|
|
1935
|
+
const entry = postmortemStore.get(id, tenant);
|
|
1936
|
+
if (!entry) {
|
|
1937
|
+
res.status(404).json({ error: `Postmortem ${id} not found` });
|
|
1938
|
+
return;
|
|
1939
|
+
}
|
|
1940
|
+
res.json(entry);
|
|
1941
|
+
});
|
|
1942
|
+
// POST /api/postmortems — regenerate via the tool handler +
|
|
1943
|
+
// persist. Body: { service, duration?, format? }. Returns the
|
|
1944
|
+
// stored entry with its id.
|
|
1945
|
+
app.post("/api/postmortems", need("services", "write"), async (req, res) => {
|
|
1946
|
+
const body = (req.body ?? {});
|
|
1947
|
+
if (!body.service || typeof body.service !== "string") {
|
|
1948
|
+
res.status(400).json({ error: "service is required" });
|
|
1949
|
+
return;
|
|
1950
|
+
}
|
|
1951
|
+
const sess = req.session;
|
|
1952
|
+
const tenant = sess?.tenant || "default";
|
|
1953
|
+
const createdBy = sess?.sub || sess?.name || "unknown";
|
|
1954
|
+
try {
|
|
1955
|
+
// Force JSON so we get the structured report shape back from
|
|
1956
|
+
// the tool, not just the markdown body. We persist the full
|
|
1957
|
+
// structured report; the markdown lives inside `report.markdown`.
|
|
1958
|
+
const ctx = { ...defaultContext(), tenant, principalId: createdBy };
|
|
1959
|
+
const result = await generatePostmortemHandler(registry, { service: body.service, duration: body.duration, format: "json" }, ctx);
|
|
1960
|
+
const text = result?.content?.[0]?.text;
|
|
1961
|
+
if (!text) {
|
|
1962
|
+
res.status(500).json({ error: "generate_postmortem returned no content" });
|
|
1963
|
+
return;
|
|
1964
|
+
}
|
|
1965
|
+
const report = JSON.parse(text);
|
|
1966
|
+
const stored = await postmortemStore.append({ report, createdBy, tenant });
|
|
1967
|
+
res.status(201).json(stored);
|
|
1968
|
+
}
|
|
1969
|
+
catch (e) {
|
|
1970
|
+
console.warn(`[postmortems] regen failed:`, e);
|
|
1971
|
+
res.status(500).json({ error: e?.message || "internal error" });
|
|
1972
|
+
}
|
|
1973
|
+
});
|
|
1974
|
+
// DELETE /api/postmortems/:id — admin-gated.
|
|
1975
|
+
app.delete("/api/postmortems/:id", need("services", "delete"), async (req, res) => {
|
|
1976
|
+
const sess = req.session;
|
|
1977
|
+
const tenant = sess?.tenant || "default";
|
|
1978
|
+
const ok = await postmortemStore.delete(String(req.params.id ?? ""), tenant);
|
|
1979
|
+
if (!ok) {
|
|
1980
|
+
res.status(404).json({ error: `Postmortem ${req.params.id} not found` });
|
|
1981
|
+
return;
|
|
1982
|
+
}
|
|
1983
|
+
res.status(204).end();
|
|
1984
|
+
});
|
|
1744
1985
|
// Connectors currently loaded into this server (builtin + filesystem
|
|
1745
1986
|
// plugins), with manifest metadata — drives the UI "Connectors" page.
|
|
1746
1987
|
app.get("/api/connectors", (_req, res) => {
|
|
@@ -2550,7 +2791,7 @@ async function main() {
|
|
|
2550
2791
|
});
|
|
2551
2792
|
// Stdio transport: one server over stdin/stdout, no HTTP listener.
|
|
2552
2793
|
if (STDIO) {
|
|
2553
|
-
const server = createMcpServer(defaultContext());
|
|
2794
|
+
const { mcpServer: server } = createMcpServer(defaultContext());
|
|
2554
2795
|
await server.connect(new StdioServerTransport());
|
|
2555
2796
|
console.error(`observability-mcp running on stdio transport · connectors: ${registry
|
|
2556
2797
|
.getAll()
|
|
@@ -2723,7 +2964,7 @@ async function main() {
|
|
|
2723
2964
|
}
|
|
2724
2965
|
mcpActiveSessions.set(transports.size);
|
|
2725
2966
|
};
|
|
2726
|
-
const sessionMcpServer = createMcpServer(ctx);
|
|
2967
|
+
const { mcpServer: sessionMcpServer } = createMcpServer(ctx);
|
|
2727
2968
|
await sessionMcpServer.connect(transport);
|
|
2728
2969
|
}
|
|
2729
2970
|
await transport.handleRequest(req, res, req.body);
|
|
@@ -2831,7 +3072,7 @@ async function main() {
|
|
|
2831
3072
|
}
|
|
2832
3073
|
mcpActiveSessions.set(transports.size);
|
|
2833
3074
|
};
|
|
2834
|
-
const sessionMcpServer = createMcpServer(ctx);
|
|
3075
|
+
const { mcpServer: sessionMcpServer } = createMcpServer(ctx);
|
|
2835
3076
|
await sessionMcpServer.connect(transport);
|
|
2836
3077
|
}
|
|
2837
3078
|
await transport.handleRequest(req, res, req.body);
|
|
@@ -2981,7 +3222,7 @@ async function main() {
|
|
|
2981
3222
|
wss.handleUpgrade(req, socket, head, async (ws) => {
|
|
2982
3223
|
try {
|
|
2983
3224
|
const transport = new WebSocketServerTransport(ws);
|
|
2984
|
-
const sessionMcpServer = createMcpServer(auth.ctx);
|
|
3225
|
+
const { mcpServer: sessionMcpServer } = createMcpServer(auth.ctx);
|
|
2985
3226
|
await sessionMcpServer.connect(transport);
|
|
2986
3227
|
}
|
|
2987
3228
|
catch (err) {
|
package/dist/metrics/self.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ export declare const mcpToolLatency: Histogram<"tool">;
|
|
|
5
5
|
export declare const connectorCalls: Counter<"type" | "source" | "outcome" | "operation">;
|
|
6
6
|
export declare const apiRequests: Counter<"status" | "route" | "method">;
|
|
7
7
|
export declare const mcpActiveSessions: Gauge<string>;
|
|
8
|
+
export declare const auditDlqDepth: Gauge<string>;
|
|
8
9
|
/**
|
|
9
10
|
* Wrap a (potentially async) tool handler to record call count + latency.
|
|
10
11
|
* Outcome is "ok" or "error" — never throws on its own.
|
package/dist/metrics/self.js
CHANGED
|
@@ -40,6 +40,14 @@ export const mcpActiveSessions = new Gauge({
|
|
|
40
40
|
help: "Active MCP Streamable HTTP sessions.",
|
|
41
41
|
registers: [selfRegistry],
|
|
42
42
|
});
|
|
43
|
+
// P9: Audit webhook dead-letter queue depth. Refreshed on each
|
|
44
|
+
// `/metrics` scrape and when the operator hits `/api/audit/dlq`.
|
|
45
|
+
// Stays at 0 when no DLQ file is configured or the file is missing.
|
|
46
|
+
export const auditDlqDepth = new Gauge({
|
|
47
|
+
name: "obsmcp_audit_webhook_dlq_depth",
|
|
48
|
+
help: "Number of audit entries waiting in the webhook-sink dead-letter queue.",
|
|
49
|
+
registers: [selfRegistry],
|
|
50
|
+
});
|
|
43
51
|
/**
|
|
44
52
|
* Wrap a (potentially async) tool handler to record call count + latency.
|
|
45
53
|
* Outcome is "ok" or "error" — never throws on its own.
|