@thotischner/observability-mcp 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ import { TopologyStore, podResource, podEdges, nodeResource, deploymentResource, deploymentEdges, replicaSetResource, replicaSetEdges, namespaceResource, namespacedId, clusterScopedId, } from "./kubernetes-graph.js";
2
+ // Default provider is loaded lazily so tests don't pay the
3
+ // @kubernetes/client-node import cost (and so the module is usable in
4
+ // environments where the SDK isn't installed yet — e.g. unit tests in CI
5
+ // before the dep lands).
6
+ let defaultProvider;
7
+ export function setDefaultInformerFactoryProvider(p) {
8
+ defaultProvider = p;
9
+ }
10
+ async function loadDefaultProvider() {
11
+ if (defaultProvider)
12
+ return defaultProvider;
13
+ const mod = await import("./kubernetes-client.js");
14
+ defaultProvider = mod.createInformerFactory;
15
+ return defaultProvider;
16
+ }
17
+ export class KubernetesConnector {
18
+ type = "kubernetes";
19
+ signalType = "topology";
20
+ name = "";
21
+ store;
22
+ factory;
23
+ informers = [];
24
+ providerOverride;
25
+ /** Constructor injection used by tests. */
26
+ constructor(provider) {
27
+ this.providerOverride = provider;
28
+ }
29
+ async connect(config) {
30
+ this.name = config.name;
31
+ this.store = new TopologyStore(config.name);
32
+ const provider = this.providerOverride ?? (await loadDefaultProvider());
33
+ this.factory = await provider(config);
34
+ // Wire each informer to the store. Pure builders translate Kube
35
+ // objects → Resource/Edge; the store dedupes and emits diffs.
36
+ const pods = this.factory.pods();
37
+ pods.on("add", (p) => this.applyPod(p));
38
+ pods.on("update", (p) => this.applyPod(p));
39
+ pods.on("delete", (p) => {
40
+ const id = idOfPod(p);
41
+ if (id)
42
+ this.store.removeResource(id);
43
+ });
44
+ pods.on("error", (err) => logWatchError(this.name, "pods", err));
45
+ const nodes = this.factory.nodes();
46
+ nodes.on("add", (n) => this.applyNode(n));
47
+ nodes.on("update", (n) => this.applyNode(n));
48
+ nodes.on("delete", (n) => {
49
+ const id = idOfNode(n);
50
+ if (id)
51
+ this.store.removeResource(id);
52
+ });
53
+ nodes.on("error", (err) => logWatchError(this.name, "nodes", err));
54
+ const deps = this.factory.deployments();
55
+ deps.on("add", (d) => this.applyDeployment(d));
56
+ deps.on("update", (d) => this.applyDeployment(d));
57
+ deps.on("delete", (d) => {
58
+ const id = idOfNamespaced("deployment", d);
59
+ if (id)
60
+ this.store.removeResource(id);
61
+ });
62
+ deps.on("error", (err) => logWatchError(this.name, "deployments", err));
63
+ const rs = this.factory.replicaSets();
64
+ rs.on("add", (r) => this.applyReplicaSet(r));
65
+ rs.on("update", (r) => this.applyReplicaSet(r));
66
+ rs.on("delete", (r) => {
67
+ const id = idOfNamespaced("replicaset", r);
68
+ if (id)
69
+ this.store.removeResource(id);
70
+ });
71
+ rs.on("error", (err) => logWatchError(this.name, "replicasets", err));
72
+ const ns = this.factory.namespaces();
73
+ ns.on("add", (n) => this.applyNamespace(n));
74
+ ns.on("update", (n) => this.applyNamespace(n));
75
+ ns.on("delete", (n) => {
76
+ const name = n.metadata?.name;
77
+ if (name)
78
+ this.store.removeResource(clusterScopedId("namespace", name));
79
+ });
80
+ ns.on("error", (err) => logWatchError(this.name, "namespaces", err));
81
+ this.informers = [pods, nodes, deps, rs, ns];
82
+ await Promise.all(this.informers.map((i) => i.start()));
83
+ }
84
+ applyPod(p) {
85
+ const r = podResource(this.name, p);
86
+ if (!r)
87
+ return;
88
+ this.store.upsertResource(r, podEdges(this.name, p));
89
+ }
90
+ applyNode(n) {
91
+ const r = nodeResource(this.name, n);
92
+ if (!r)
93
+ return;
94
+ this.store.upsertResource(r, []);
95
+ }
96
+ applyDeployment(d) {
97
+ const r = deploymentResource(this.name, d);
98
+ if (!r)
99
+ return;
100
+ this.store.upsertResource(r, deploymentEdges(this.name, d));
101
+ }
102
+ applyReplicaSet(rs) {
103
+ const r = replicaSetResource(this.name, rs);
104
+ if (!r)
105
+ return;
106
+ this.store.upsertResource(r, replicaSetEdges(this.name, rs));
107
+ }
108
+ applyNamespace(n) {
109
+ const r = namespaceResource(this.name, n);
110
+ if (!r)
111
+ return;
112
+ this.store.upsertResource(r, []);
113
+ }
114
+ async healthCheck() {
115
+ if (!this.factory)
116
+ return { status: "down", latencyMs: 0, message: "not connected" };
117
+ const r = await this.factory.healthCheck();
118
+ return { status: r.ok ? "up" : "down", latencyMs: r.latencyMs, message: r.message };
119
+ }
120
+ async disconnect() {
121
+ await Promise.all(this.informers.map((i) => i.stop().catch(() => { })));
122
+ this.informers = [];
123
+ await this.factory?.close().catch(() => { });
124
+ this.factory = undefined;
125
+ }
126
+ // Topology has no metric/service surface — these stay empty/inert.
127
+ getDefaultMetrics() {
128
+ return [];
129
+ }
130
+ getMetrics() {
131
+ return [];
132
+ }
133
+ async listServices() {
134
+ return [];
135
+ }
136
+ // --- Topology capability ---
137
+ async listResources() {
138
+ return this.store?.listResources() ?? [];
139
+ }
140
+ async listEdges() {
141
+ return this.store?.listEdges() ?? [];
142
+ }
143
+ async getTopologySnapshot() {
144
+ return (this.store?.snapshot() ?? {
145
+ source: this.name,
146
+ resources: [],
147
+ edges: [],
148
+ revision: 0,
149
+ });
150
+ }
151
+ watchTopology(listener) {
152
+ if (!this.store)
153
+ return () => { };
154
+ // Initial resync so subscribers see the current state without
155
+ // racing the next watch event.
156
+ queueMicrotask(() => listener({ type: "resync", snapshot: this.store.snapshot() }));
157
+ return this.store.subscribe(listener);
158
+ }
159
+ }
160
+ // --- helpers ---
161
+ function idOfPod(p) {
162
+ const n = p.metadata?.name;
163
+ const ns = p.metadata?.namespace;
164
+ if (!n || !ns)
165
+ return undefined;
166
+ return namespacedId("pod", ns, n);
167
+ }
168
+ function idOfNode(n) {
169
+ return n.metadata?.name ? clusterScopedId("node", n.metadata.name) : undefined;
170
+ }
171
+ function idOfNamespaced(kind, obj) {
172
+ const n = obj.metadata?.name;
173
+ const ns = obj.metadata?.namespace;
174
+ if (!n || !ns)
175
+ return undefined;
176
+ return namespacedId(kind, ns, n);
177
+ }
178
+ function logWatchError(source, kind, err) {
179
+ // AbortError is what makeInformer emits when we cleanly stop the watch
180
+ // (disconnect, process shutdown) — not actually an error to surface.
181
+ const msg = String(err);
182
+ if (msg.includes("AbortError") || /aborted a request/i.test(msg))
183
+ return;
184
+ console.warn("k8s watch error: source=%s kind=%s err=%s", source, kind, msg);
185
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,136 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { KubernetesConnector } from "./kubernetes.js";
4
+ import { isTopologyProvider } from "./interface.js";
5
+ class FakeInformer {
6
+ handlers = {};
7
+ started = false;
8
+ stopped = false;
9
+ on(event, handler) {
10
+ (this.handlers[event] ??= []).push(handler);
11
+ }
12
+ async start() {
13
+ this.started = true;
14
+ }
15
+ async stop() {
16
+ this.stopped = true;
17
+ }
18
+ emit(event, obj) {
19
+ for (const h of this.handlers[event] ?? [])
20
+ h(obj);
21
+ }
22
+ }
23
+ function makeFakeFactory() {
24
+ const pods = new FakeInformer();
25
+ const nodes = new FakeInformer();
26
+ const deps = new FakeInformer();
27
+ const rs = new FakeInformer();
28
+ const ns = new FakeInformer();
29
+ const factory = {
30
+ pods: () => pods,
31
+ nodes: () => nodes,
32
+ deployments: () => deps,
33
+ replicaSets: () => rs,
34
+ namespaces: () => ns,
35
+ async healthCheck() {
36
+ return { ok: true, latencyMs: 1 };
37
+ },
38
+ async close() { },
39
+ };
40
+ return { factory, pods, nodes, deps, rs, ns };
41
+ }
42
+ const CFG = {
43
+ name: "test-cluster",
44
+ type: "kubernetes",
45
+ url: "",
46
+ enabled: true,
47
+ };
48
+ describe("KubernetesConnector", () => {
49
+ it("implements the TopologyProvider capability", async () => {
50
+ const { factory } = makeFakeFactory();
51
+ const conn = new KubernetesConnector(async () => factory);
52
+ await conn.connect(CFG);
53
+ assert.equal(isTopologyProvider(conn), true);
54
+ assert.equal(conn.signalType, "topology");
55
+ await conn.disconnect();
56
+ });
57
+ it("starts every informer on connect and stops them on disconnect", async () => {
58
+ const fake = makeFakeFactory();
59
+ const conn = new KubernetesConnector(async () => fake.factory);
60
+ await conn.connect(CFG);
61
+ for (const inf of [fake.pods, fake.nodes, fake.deps, fake.rs, fake.ns]) {
62
+ assert.equal(inf.started, true);
63
+ }
64
+ await conn.disconnect();
65
+ for (const inf of [fake.pods, fake.nodes, fake.deps, fake.rs, fake.ns]) {
66
+ assert.equal(inf.stopped, true);
67
+ }
68
+ });
69
+ it("builds the graph from watch events", async () => {
70
+ const fake = makeFakeFactory();
71
+ const conn = new KubernetesConnector(async () => fake.factory);
72
+ await conn.connect(CFG);
73
+ fake.nodes.emit("add", { metadata: { name: "worker-1" } });
74
+ fake.ns.emit("add", { metadata: { name: "default" } });
75
+ fake.deps.emit("add", { metadata: { name: "checkout", namespace: "default" } });
76
+ fake.rs.emit("add", {
77
+ metadata: {
78
+ name: "checkout-7f89",
79
+ namespace: "default",
80
+ ownerReferences: [{ kind: "Deployment", name: "checkout" }],
81
+ },
82
+ });
83
+ fake.pods.emit("add", {
84
+ metadata: {
85
+ name: "checkout-7f89d",
86
+ namespace: "default",
87
+ ownerReferences: [{ kind: "ReplicaSet", name: "checkout-7f89" }],
88
+ },
89
+ spec: { nodeName: "worker-1" },
90
+ });
91
+ const snap = await conn.getTopologySnapshot();
92
+ const ids = snap.resources.map((r) => r.id).sort();
93
+ assert.deepEqual(ids, [
94
+ "k8s:deployment:default/checkout",
95
+ "k8s:namespace:default",
96
+ "k8s:node:worker-1",
97
+ "k8s:pod:default/checkout-7f89d",
98
+ "k8s:replicaset:default/checkout-7f89",
99
+ ]);
100
+ // Full RCA chain present: pod → rs → deployment, pod → node, * → namespace.
101
+ const e = snap.edges;
102
+ assert.ok(e.some((x) => x.from === "k8s:pod:default/checkout-7f89d" && x.relation === "RUNS_ON"));
103
+ assert.ok(e.some((x) => x.from === "k8s:pod:default/checkout-7f89d" && x.relation === "OWNED_BY"));
104
+ assert.ok(e.some((x) => x.from === "k8s:replicaset:default/checkout-7f89" && x.relation === "OWNED_BY"));
105
+ await conn.disconnect();
106
+ });
107
+ it("removes a pod's edges when the pod is deleted", async () => {
108
+ const fake = makeFakeFactory();
109
+ const conn = new KubernetesConnector(async () => fake.factory);
110
+ await conn.connect(CFG);
111
+ const pod = {
112
+ metadata: { name: "p1", namespace: "default" },
113
+ spec: { nodeName: "n1" },
114
+ };
115
+ fake.pods.emit("add", pod);
116
+ assert.equal((await conn.listEdges()).length > 0, true);
117
+ fake.pods.emit("delete", pod);
118
+ assert.equal((await conn.listResources()).length, 0);
119
+ assert.equal((await conn.listEdges()).length, 0);
120
+ await conn.disconnect();
121
+ });
122
+ it("watchTopology delivers a resync then live diffs", async () => {
123
+ const fake = makeFakeFactory();
124
+ const conn = new KubernetesConnector(async () => fake.factory);
125
+ await conn.connect(CFG);
126
+ fake.nodes.emit("add", { metadata: { name: "n0" } });
127
+ const events = [];
128
+ const unsub = conn.watchTopology((e) => events.push(e));
129
+ await new Promise((r) => setImmediate(r));
130
+ assert.equal(events[0]?.type, "resync");
131
+ fake.nodes.emit("add", { metadata: { name: "n1" } });
132
+ assert.ok(events.some((e) => e.type === "resource_added"));
133
+ unsub();
134
+ await conn.disconnect();
135
+ });
136
+ });
@@ -4,6 +4,7 @@ import { pathToFileURL } from "node:url";
4
4
  import { manifestSchema } from "../sdk/manifest-schema.js";
5
5
  import { PrometheusConnector } from "./prometheus.js";
6
6
  import { LokiConnector } from "./loki.js";
7
+ import { KubernetesConnector } from "./kubernetes.js";
7
8
  import { sanitizeForLog } from "../util/sanitize.js";
8
9
  import { instrumentConnector } from "../metrics/instrument-connector.js";
9
10
  import { loadTrustRoot, verifyIntegrity, verifyManifestSignature, PluginVerificationError, } from "./verify.js";
@@ -96,6 +97,11 @@ export class PluginLoader {
96
97
  source: "builtin",
97
98
  factory: () => new LokiConnector(),
98
99
  });
100
+ this.register({
101
+ name: "kubernetes",
102
+ source: "builtin",
103
+ factory: () => new KubernetesConnector(),
104
+ });
99
105
  }
100
106
  async loadFilesystem() {
101
107
  const dir = this.pluginsDir;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,165 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { isTopologyProvider } from "./interface.js";
4
+ // A minimal connector that exposes no topology methods.
5
+ function makeMetricsOnlyConnector() {
6
+ return {
7
+ name: "m1",
8
+ type: "prometheus",
9
+ signalType: "metrics",
10
+ async connect() { },
11
+ async healthCheck() {
12
+ return { status: "up", latencyMs: 0 };
13
+ },
14
+ async disconnect() { },
15
+ getDefaultMetrics() {
16
+ return [];
17
+ },
18
+ getMetrics() {
19
+ return [];
20
+ },
21
+ async listServices() {
22
+ return [];
23
+ },
24
+ };
25
+ }
26
+ // A fake topology connector that returns a tiny, well-formed graph.
27
+ function makeTopologyConnector() {
28
+ const resources = [
29
+ {
30
+ id: "k8s:pod:default/checkout-7f89d",
31
+ kind: "pod",
32
+ name: "checkout-7f89d",
33
+ source: "kind-cluster",
34
+ labels: { app: "checkout" },
35
+ attributes: { uid: "11111111-1111-1111-1111-111111111111" },
36
+ },
37
+ {
38
+ id: "k8s:node:worker-1",
39
+ kind: "node",
40
+ name: "worker-1",
41
+ source: "kind-cluster",
42
+ labels: {},
43
+ },
44
+ ];
45
+ const edges = [
46
+ {
47
+ from: "k8s:pod:default/checkout-7f89d",
48
+ to: "k8s:node:worker-1",
49
+ relation: "RUNS_ON",
50
+ source: "kind-cluster",
51
+ confidence: 1.0,
52
+ },
53
+ ];
54
+ return {
55
+ name: "kind-cluster",
56
+ type: "kubernetes",
57
+ signalType: "topology",
58
+ async connect() { },
59
+ async healthCheck() {
60
+ return { status: "up", latencyMs: 0 };
61
+ },
62
+ async disconnect() { },
63
+ getDefaultMetrics() {
64
+ return [];
65
+ },
66
+ getMetrics() {
67
+ return [];
68
+ },
69
+ async listServices() {
70
+ return [];
71
+ },
72
+ async listResources() {
73
+ return resources;
74
+ },
75
+ async listEdges() {
76
+ return edges;
77
+ },
78
+ async getTopologySnapshot() {
79
+ return { source: "kind-cluster", resources, edges, revision: 1 };
80
+ },
81
+ watchTopology(listener) {
82
+ // emit an initial resync, then a no-op unsubscribe
83
+ queueMicrotask(() => listener({
84
+ type: "resync",
85
+ snapshot: { source: "kind-cluster", resources, edges, revision: 1 },
86
+ }));
87
+ return () => { };
88
+ },
89
+ };
90
+ }
91
+ describe("isTopologyProvider", () => {
92
+ it("returns false for metrics-only connectors", () => {
93
+ assert.equal(isTopologyProvider(makeMetricsOnlyConnector()), false);
94
+ });
95
+ it("returns true when all four topology methods are present", () => {
96
+ assert.equal(isTopologyProvider(makeTopologyConnector()), true);
97
+ });
98
+ it("returns false if any topology method is missing", () => {
99
+ const conn = makeTopologyConnector();
100
+ // Strip one method — partial topology support is not a TopologyProvider.
101
+ delete conn.watchTopology;
102
+ assert.equal(isTopologyProvider(conn), false);
103
+ });
104
+ });
105
+ describe("topology data model", () => {
106
+ it("Resource.id follows the k8s:<kind>:<namespace>/<name> shape for namespaced kinds", async () => {
107
+ const conn = makeTopologyConnector();
108
+ assert.ok(isTopologyProvider(conn));
109
+ const resources = await conn.listResources();
110
+ const pod = resources.find((r) => r.kind === "pod");
111
+ assert.ok(pod, "expected a pod resource");
112
+ assert.match(pod.id, /^k8s:pod:[^/]+\/.+$/);
113
+ });
114
+ it("Resource.id for cluster-scoped kinds has no namespace segment", async () => {
115
+ const conn = makeTopologyConnector();
116
+ assert.ok(isTopologyProvider(conn));
117
+ const resources = await conn.listResources();
118
+ const node = resources.find((r) => r.kind === "node");
119
+ assert.ok(node, "expected a node resource");
120
+ assert.match(node.id, /^k8s:node:[^/]+$/);
121
+ });
122
+ it("every Resource and Edge carries a non-empty source", async () => {
123
+ const conn = makeTopologyConnector();
124
+ assert.ok(isTopologyProvider(conn));
125
+ const snap = await conn.getTopologySnapshot();
126
+ for (const r of snap.resources)
127
+ assert.ok(r.source.length > 0);
128
+ for (const e of snap.edges)
129
+ assert.ok(e.source.length > 0);
130
+ });
131
+ it("Edge endpoints reference existing Resource ids", async () => {
132
+ const conn = makeTopologyConnector();
133
+ assert.ok(isTopologyProvider(conn));
134
+ const snap = await conn.getTopologySnapshot();
135
+ const ids = new Set(snap.resources.map((r) => r.id));
136
+ for (const e of snap.edges) {
137
+ assert.ok(ids.has(e.from), `dangling edge.from: ${e.from}`);
138
+ assert.ok(ids.has(e.to), `dangling edge.to: ${e.to}`);
139
+ }
140
+ });
141
+ it("confidence is bounded to [0,1]", async () => {
142
+ const conn = makeTopologyConnector();
143
+ assert.ok(isTopologyProvider(conn));
144
+ const edges = await conn.listEdges();
145
+ for (const e of edges) {
146
+ assert.ok(e.confidence >= 0 && e.confidence <= 1);
147
+ }
148
+ });
149
+ });
150
+ describe("watchTopology", () => {
151
+ it("delivers a resync event with the current snapshot", async () => {
152
+ const conn = makeTopologyConnector();
153
+ assert.ok(isTopologyProvider(conn));
154
+ const events = [];
155
+ const unsubscribe = conn.watchTopology((e) => events.push(e));
156
+ // Allow the queued microtask to fire.
157
+ await new Promise((resolve) => setImmediate(resolve));
158
+ unsubscribe();
159
+ assert.equal(events.length, 1);
160
+ assert.equal(events[0].type, "resync");
161
+ if (events[0].type === "resync") {
162
+ assert.ok(events[0].snapshot.revision >= 1);
163
+ }
164
+ });
165
+ });
package/dist/index.js CHANGED
@@ -8,6 +8,7 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
8
8
  import { z } from "zod";
9
9
  import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
10
10
  import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
11
+ import { isTopologyProvider } from "./connectors/interface.js";
11
12
  import { defaultContext, principalContext } from "./context.js";
12
13
  import { enforceEntitledAccess, enterpriseGateStatus, enterpriseGateInfo, enterprisePolicyView, enterpriseCatalogView, enterpriseAuditTail, authorizeAdmin, updateRbacPolicy, updateCatalog, } from "./enterprise-gate.js";
13
14
  import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
@@ -23,6 +24,7 @@ import { queryMetricsHandler } from "./tools/query-metrics.js";
23
24
  import { queryLogsHandler } from "./tools/query-logs.js";
24
25
  import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-service-health.js";
25
26
  import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
27
+ import { getTopologyHandler, getBlastRadiusHandler } from "./tools/topology.js";
26
28
  import { fileURLToPath } from "node:url";
27
29
  import { dirname, join } from "node:path";
28
30
  import { readFileSync, writeFileSync, mkdtempSync, rmSync } from "node:fs";
@@ -238,6 +240,48 @@ async function main() {
238
240
  await enforceEntitledAccess(ctx, { tool: "detect_anomalies", source: args?.source, service: args?.service });
239
241
  return withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx));
240
242
  });
243
+ mcpServer.tool("get_topology", [
244
+ "Return the infrastructure topology graph (Resources and Edges) from every topology-capable connector.",
245
+ "When to use: when an agent needs to reason about which workload runs on which host, who owns whom, or which scope (namespace/project/folder) a resource belongs to. Pair with `get_blast_radius` for shared-host RCA.",
246
+ "Behavior: read-only, no side effects. Returns `{ sources, resources, edges, total, truncated }`. Filters compose: `source` to one connector, `kind` to one resource type (e.g. 'pod', 'node', 'deployment'), `scope` to members of a namespace/folder/project. Output is capped by `limit` (default 500, max 5000) and edges referencing dropped resources are removed.",
247
+ "Related: `get_blast_radius` to evaluate the impact of a host failure; `list_sources` to discover topology-capable connectors.",
248
+ ].join(" "), {
249
+ source: z
250
+ .string()
251
+ .optional()
252
+ .describe("Optional. Restrict the graph to one topology connector by source name (see `list_sources`). Default: merge across all connectors."),
253
+ kind: z
254
+ .string()
255
+ .optional()
256
+ .describe("Optional. Restrict to resources of one kind. Common values for Kubernetes: 'pod', 'node', 'deployment', 'replicaset', 'namespace'. Other connectors may emit different kinds (e.g. 'vm', 'hypervisor', 'volume'). Default: all kinds."),
257
+ scope: z
258
+ .string()
259
+ .optional()
260
+ .describe("Optional. Restrict to resources contained in a scope (anything pointed to by `IN_NAMESPACE` edges). Pass the scope's resource id (e.g. 'k8s:namespace:default') or its name (e.g. 'default'). Default: no scope filter."),
261
+ limit: z
262
+ .number()
263
+ .int()
264
+ .min(1)
265
+ .max(5000)
266
+ .optional()
267
+ .describe("Optional. Maximum resources to return; edges are trimmed to the kept set. Default 500, max 5000."),
268
+ }, async (args) => {
269
+ await enforceEntitledAccess(ctx, { tool: "get_topology", source: args?.source });
270
+ return withToolMetrics("get_topology", () => getTopologyHandler(registry, args, ctx));
271
+ });
272
+ mcpServer.tool("get_blast_radius", [
273
+ "Given a resource, return who else fails if its underlying host(s) fail.",
274
+ "When to use: cross-cutting RCA — when several services degrade together and you suspect a shared host. Works for any RUNS_ON relationship: pod→node, vm→hypervisor, container→host.",
275
+ "Behavior: read-only, no side effects. Resolves `resource` to a Resource (accepts canonical id, exact name, or unique substring), determines its host(s) via RUNS_ON, then lists every other resource that runs on those hosts, bucketed by ownership root (the terminal `OWNED_BY` target — e.g. the Deployment, not the ReplicaSet). If the target is itself a host, its tenants are reported. Returns a structured error if the resource is ambiguous or unknown.",
276
+ "Related: `get_topology` for the full graph; `get_service_health` for the per-service verdict on each co-tenant.",
277
+ ].join(" "), {
278
+ resource: z
279
+ .string()
280
+ .describe("Required. Resource to evaluate. Accepts the canonical id (e.g. 'k8s:pod:default/checkout-7f89d'), the exact resource name (e.g. 'checkout-7f89d'), or a unique substring of either."),
281
+ }, async (args) => {
282
+ await enforceEntitledAccess(ctx, { tool: "get_blast_radius" });
283
+ return withToolMetrics("get_blast_radius", () => getBlastRadiusHandler(registry, args, ctx));
284
+ });
241
285
  return mcpServer;
242
286
  }
243
287
  // --- HTTP server ---
@@ -691,6 +735,36 @@ async function main() {
691
735
  res.status(500).json({ error: "Failed to get health data" });
692
736
  }
693
737
  });
738
+ // --- Topology API ---
739
+ // Returns the union of topology snapshots across all topology-capable
740
+ // connectors (today only "kubernetes"). One JSON document so the UI can
741
+ // render summary + grouped views without N round-trips.
742
+ app.get("/api/topology", async (_req, res) => {
743
+ try {
744
+ const sources = [];
745
+ const allResources = [];
746
+ const allEdges = [];
747
+ for (const c of registry.getAll()) {
748
+ if (!isTopologyProvider(c))
749
+ continue;
750
+ const snap = await c.getTopologySnapshot();
751
+ sources.push({
752
+ source: snap.source,
753
+ type: c.type,
754
+ revision: snap.revision,
755
+ resources: snap.resources.length,
756
+ edges: snap.edges.length,
757
+ });
758
+ allResources.push(...snap.resources);
759
+ allEdges.push(...snap.edges);
760
+ }
761
+ res.json({ sources, resources: allResources, edges: allEdges });
762
+ }
763
+ catch (err) {
764
+ console.error("topology endpoint failed:", err);
765
+ res.status(500).json({ error: "Failed to read topology" });
766
+ }
767
+ });
694
768
  // --- Settings API ---
695
769
  // Get general settings
696
770
  app.get("/api/settings", (_req, res) => {
@@ -1,7 +1,7 @@
1
1
  export type { ObservabilityConnector } from "../connectors/interface.js";
2
2
  export { manifestSchema } from "./manifest-schema.js";
3
3
  export type { ValidatedConnectorManifest } from "./manifest-schema.js";
4
- export type { SignalType, SourceConfig, SourceAuth, SourceTls, ConnectorHealth, ServiceInfo, MetricInfo, MetricQuery, MetricResult, MetricSummary, DataPoint, LogQuery, LogResult, LogEntry, LogSummary, MetricDefinition, } from "../types.js";
4
+ export type { SignalType, SourceConfig, SourceAuth, SourceTls, ConnectorHealth, ServiceInfo, MetricInfo, MetricQuery, MetricResult, MetricSummary, DataPoint, LogQuery, LogResult, LogEntry, LogSummary, MetricDefinition, Resource, Edge, TopologySnapshot, TopologyChangeEvent, TopologyChangeListener, } from "../types.js";
5
5
  /**
6
6
  * Manifest shape declared in a plugin's `manifest.json`. The server
7
7
  * validates plugin manifests against this at load time.
@@ -18,7 +18,7 @@ export interface ConnectorManifest {
18
18
  /** Semver of this connector build. */
19
19
  version: string;
20
20
  description: string;
21
- signalTypes: Array<"metrics" | "logs" | "traces">;
21
+ signalTypes: Array<"metrics" | "logs" | "traces" | "topology">;
22
22
  homepage?: string;
23
23
  license?: string;
24
24
  logo?: string;
@@ -9,6 +9,7 @@ export declare const manifestSchema: z.ZodObject<{
9
9
  metrics: "metrics";
10
10
  logs: "logs";
11
11
  traces: "traces";
12
+ topology: "topology";
12
13
  }>>;
13
14
  homepage: z.ZodOptional<z.ZodString>;
14
15
  license: z.ZodOptional<z.ZodString>;
@@ -15,7 +15,7 @@ export const manifestSchema = z.object({
15
15
  message: "version must be semver",
16
16
  }),
17
17
  description: z.string().min(1),
18
- signalTypes: z.array(z.enum(["metrics", "logs", "traces"])).min(1),
18
+ signalTypes: z.array(z.enum(["metrics", "logs", "traces", "topology"])).min(1),
19
19
  homepage: z.string().url().optional(),
20
20
  license: z.string().optional(),
21
21
  logo: z.string().optional(),