@thotischner/observability-mcp 1.7.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/config/products.yaml.example +48 -0
  2. package/dist/analysis/history.d.ts +70 -0
  3. package/dist/analysis/history.js +170 -0
  4. package/dist/analysis/history.test.d.ts +1 -0
  5. package/dist/analysis/history.test.js +141 -0
  6. package/dist/audit/log.d.ts +108 -0
  7. package/dist/audit/log.js +200 -0
  8. package/dist/audit/log.test.d.ts +1 -0
  9. package/dist/audit/log.test.js +147 -0
  10. package/dist/audit/middleware.d.ts +20 -0
  11. package/dist/audit/middleware.js +50 -0
  12. package/dist/audit/redaction-bypass.d.ts +67 -0
  13. package/dist/audit/redaction-bypass.js +64 -0
  14. package/dist/audit/redaction-bypass.test.d.ts +1 -0
  15. package/dist/audit/redaction-bypass.test.js +72 -0
  16. package/dist/audit/sinks/types.d.ts +18 -0
  17. package/dist/audit/sinks/types.js +1 -0
  18. package/dist/audit/sinks/webhook.d.ts +45 -0
  19. package/dist/audit/sinks/webhook.js +111 -0
  20. package/dist/audit/sinks/webhook.test.d.ts +1 -0
  21. package/dist/audit/sinks/webhook.test.js +162 -0
  22. package/dist/auth/credentials.d.ts +29 -0
  23. package/dist/auth/credentials.js +53 -1
  24. package/dist/auth/credentials.test.js +46 -1
  25. package/dist/auth/csrf.d.ts +26 -0
  26. package/dist/auth/csrf.js +128 -0
  27. package/dist/auth/csrf.test.d.ts +1 -0
  28. package/dist/auth/csrf.test.js +143 -0
  29. package/dist/auth/local-users.d.ts +68 -0
  30. package/dist/auth/local-users.js +154 -0
  31. package/dist/auth/local-users.test.d.ts +1 -0
  32. package/dist/auth/local-users.test.js +121 -0
  33. package/dist/auth/middleware.d.ts +49 -0
  34. package/dist/auth/middleware.js +65 -0
  35. package/dist/auth/middleware.test.d.ts +1 -0
  36. package/dist/auth/middleware.test.js +90 -0
  37. package/dist/auth/oidc/client.d.ts +73 -0
  38. package/dist/auth/oidc/client.js +104 -0
  39. package/dist/auth/oidc/client.test.d.ts +1 -0
  40. package/dist/auth/oidc/client.test.js +121 -0
  41. package/dist/auth/oidc/dcr.d.ts +70 -0
  42. package/dist/auth/oidc/dcr.js +160 -0
  43. package/dist/auth/oidc/dcr.test.d.ts +1 -0
  44. package/dist/auth/oidc/dcr.test.js +109 -0
  45. package/dist/auth/oidc/discovery.d.ts +38 -0
  46. package/dist/auth/oidc/discovery.js +48 -0
  47. package/dist/auth/oidc/discovery.test.d.ts +1 -0
  48. package/dist/auth/oidc/discovery.test.js +68 -0
  49. package/dist/auth/oidc/endpoints.d.ts +20 -0
  50. package/dist/auth/oidc/endpoints.js +168 -0
  51. package/dist/auth/oidc/endpoints.test.d.ts +7 -0
  52. package/dist/auth/oidc/endpoints.test.js +304 -0
  53. package/dist/auth/oidc/flow-cookie.d.ts +57 -0
  54. package/dist/auth/oidc/flow-cookie.js +142 -0
  55. package/dist/auth/oidc/flow-cookie.test.d.ts +1 -0
  56. package/dist/auth/oidc/flow-cookie.test.js +0 -0
  57. package/dist/auth/oidc/index.d.ts +7 -0
  58. package/dist/auth/oidc/index.js +6 -0
  59. package/dist/auth/oidc/jwks.d.ts +36 -0
  60. package/dist/auth/oidc/jwks.js +69 -0
  61. package/dist/auth/oidc/jwks.test.d.ts +1 -0
  62. package/dist/auth/oidc/jwks.test.js +65 -0
  63. package/dist/auth/oidc/jwt.d.ts +62 -0
  64. package/dist/auth/oidc/jwt.js +113 -0
  65. package/dist/auth/oidc/jwt.test.d.ts +1 -0
  66. package/dist/auth/oidc/jwt.test.js +141 -0
  67. package/dist/auth/oidc/pkce.d.ts +19 -0
  68. package/dist/auth/oidc/pkce.js +43 -0
  69. package/dist/auth/oidc/pkce.test.d.ts +1 -0
  70. package/dist/auth/oidc/pkce.test.js +55 -0
  71. package/dist/auth/oidc/profiles.d.ts +22 -0
  72. package/dist/auth/oidc/profiles.js +95 -0
  73. package/dist/auth/oidc/profiles.test.d.ts +1 -0
  74. package/dist/auth/oidc/profiles.test.js +51 -0
  75. package/dist/auth/oidc/runtime.d.ts +66 -0
  76. package/dist/auth/oidc/runtime.js +142 -0
  77. package/dist/auth/oidc/runtime.test.d.ts +1 -0
  78. package/dist/auth/oidc/runtime.test.js +181 -0
  79. package/dist/auth/policy/batch-dry-run.d.ts +56 -0
  80. package/dist/auth/policy/batch-dry-run.js +129 -0
  81. package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
  82. package/dist/auth/policy/batch-dry-run.test.js +140 -0
  83. package/dist/auth/policy/engine.d.ts +64 -0
  84. package/dist/auth/policy/engine.js +87 -0
  85. package/dist/auth/policy/engine.test.d.ts +1 -0
  86. package/dist/auth/policy/engine.test.js +98 -0
  87. package/dist/auth/policy/loader.d.ts +45 -0
  88. package/dist/auth/policy/loader.js +137 -0
  89. package/dist/auth/policy/loader.test.d.ts +1 -0
  90. package/dist/auth/policy/loader.test.js +86 -0
  91. package/dist/auth/policy/opa.d.ts +69 -0
  92. package/dist/auth/policy/opa.js +173 -0
  93. package/dist/auth/policy/opa.test.d.ts +1 -0
  94. package/dist/auth/policy/opa.test.js +206 -0
  95. package/dist/auth/rbac.d.ts +62 -0
  96. package/dist/auth/rbac.js +162 -0
  97. package/dist/auth/rbac.test.d.ts +1 -0
  98. package/dist/auth/rbac.test.js +183 -0
  99. package/dist/auth/session.d.ts +66 -0
  100. package/dist/auth/session.js +146 -0
  101. package/dist/auth/session.test.d.ts +1 -0
  102. package/dist/auth/session.test.js +90 -0
  103. package/dist/catalog/loader.d.ts +67 -0
  104. package/dist/catalog/loader.js +122 -0
  105. package/dist/catalog/loader.test.d.ts +1 -0
  106. package/dist/catalog/loader.test.js +108 -0
  107. package/dist/cli/index.js +3 -0
  108. package/dist/cli/inspector-config.d.ts +9 -0
  109. package/dist/cli/inspector-config.js +28 -0
  110. package/dist/cli/inspector-config.test.d.ts +1 -0
  111. package/dist/cli/inspector-config.test.js +33 -0
  112. package/dist/cli/lib.d.ts +1 -1
  113. package/dist/cli/lib.js +1 -0
  114. package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
  115. package/dist/conformance/mcp-2025-11-25.test.js +206 -0
  116. package/dist/connectors/interface.d.ts +5 -1
  117. package/dist/connectors/loader.js +6 -4
  118. package/dist/connectors/loader.test.d.ts +1 -0
  119. package/dist/connectors/loader.test.js +78 -0
  120. package/dist/connectors/prometheus.test.js +31 -13
  121. package/dist/connectors/registry.d.ts +13 -0
  122. package/dist/connectors/registry.js +30 -0
  123. package/dist/connectors/registry.test.js +56 -2
  124. package/dist/context.d.ts +45 -1
  125. package/dist/context.js +40 -1
  126. package/dist/context.test.d.ts +1 -0
  127. package/dist/context.test.js +58 -0
  128. package/dist/federation/registry.d.ts +32 -0
  129. package/dist/federation/registry.js +77 -0
  130. package/dist/federation/registry.test.d.ts +1 -0
  131. package/dist/federation/registry.test.js +130 -0
  132. package/dist/federation/upstream.d.ts +60 -0
  133. package/dist/federation/upstream.js +114 -0
  134. package/dist/index.js +2124 -73
  135. package/dist/middleware/ssrfGuard.d.ts +15 -0
  136. package/dist/middleware/ssrfGuard.js +103 -0
  137. package/dist/middleware/ssrfGuard.test.d.ts +1 -0
  138. package/dist/middleware/ssrfGuard.test.js +81 -0
  139. package/dist/net/egress-policy.js +2 -0
  140. package/dist/observability/otel.d.ts +20 -0
  141. package/dist/observability/otel.js +118 -0
  142. package/dist/observability/otel.test.d.ts +1 -0
  143. package/dist/observability/otel.test.js +56 -0
  144. package/dist/openapi.js +654 -6
  145. package/dist/openapi.test.d.ts +1 -0
  146. package/dist/openapi.test.js +98 -0
  147. package/dist/policy/redact.d.ts +44 -0
  148. package/dist/policy/redact.js +144 -0
  149. package/dist/policy/redact.test.d.ts +1 -0
  150. package/dist/policy/redact.test.js +172 -0
  151. package/dist/postmortem/synthesizer.d.ts +83 -0
  152. package/dist/postmortem/synthesizer.js +205 -0
  153. package/dist/postmortem/synthesizer.test.d.ts +1 -0
  154. package/dist/postmortem/synthesizer.test.js +141 -0
  155. package/dist/products/loader.d.ts +112 -0
  156. package/dist/products/loader.js +289 -0
  157. package/dist/products/loader.test.d.ts +1 -0
  158. package/dist/products/loader.test.js +257 -0
  159. package/dist/quota/charge.d.ts +28 -0
  160. package/dist/quota/charge.js +30 -0
  161. package/dist/quota/charge.test.d.ts +1 -0
  162. package/dist/quota/charge.test.js +83 -0
  163. package/dist/quota/limiter.d.ts +97 -0
  164. package/dist/quota/limiter.js +161 -0
  165. package/dist/quota/limiter.test.d.ts +1 -0
  166. package/dist/quota/limiter.test.js +205 -0
  167. package/dist/quota/token-budget.d.ts +119 -0
  168. package/dist/quota/token-budget.js +297 -0
  169. package/dist/quota/token-budget.test.d.ts +1 -0
  170. package/dist/quota/token-budget.test.js +215 -0
  171. package/dist/scim/group-role-map.d.ts +4 -0
  172. package/dist/scim/group-role-map.js +33 -0
  173. package/dist/scim/group-role-map.test.d.ts +1 -0
  174. package/dist/scim/group-role-map.test.js +33 -0
  175. package/dist/scim/routes.d.ts +15 -0
  176. package/dist/scim/routes.js +249 -0
  177. package/dist/scim/store.d.ts +37 -0
  178. package/dist/scim/store.js +178 -0
  179. package/dist/scim/store.test.d.ts +1 -0
  180. package/dist/scim/store.test.js +121 -0
  181. package/dist/scim/types.d.ts +73 -0
  182. package/dist/scim/types.js +29 -0
  183. package/dist/sdk/hooks.d.ts +77 -0
  184. package/dist/sdk/hooks.js +72 -0
  185. package/dist/sdk/hooks.test.d.ts +1 -0
  186. package/dist/sdk/hooks.test.js +159 -0
  187. package/dist/sdk/index.d.ts +2 -0
  188. package/dist/sdk/index.js +1 -0
  189. package/dist/sdk/manifest-schema.d.ts +17 -0
  190. package/dist/sdk/manifest-schema.js +21 -0
  191. package/dist/tenancy/context.d.ts +45 -0
  192. package/dist/tenancy/context.js +97 -0
  193. package/dist/tenancy/context.test.d.ts +1 -0
  194. package/dist/tenancy/context.test.js +72 -0
  195. package/dist/tenancy/migration.test.d.ts +7 -0
  196. package/dist/tenancy/migration.test.js +75 -0
  197. package/dist/tools/context-seam.test.js +6 -1
  198. package/dist/tools/detect-anomalies.d.ts +1 -1
  199. package/dist/tools/detect-anomalies.js +5 -4
  200. package/dist/tools/generate-postmortem.d.ts +35 -0
  201. package/dist/tools/generate-postmortem.js +191 -0
  202. package/dist/tools/get-anomaly-history.d.ts +35 -0
  203. package/dist/tools/get-anomaly-history.js +126 -0
  204. package/dist/tools/get-service-health.d.ts +1 -1
  205. package/dist/tools/get-service-health.js +4 -3
  206. package/dist/tools/list-services.d.ts +1 -1
  207. package/dist/tools/list-services.js +3 -2
  208. package/dist/tools/list-sources.d.ts +1 -1
  209. package/dist/tools/list-sources.js +6 -2
  210. package/dist/tools/query-logs.d.ts +1 -1
  211. package/dist/tools/query-logs.js +2 -2
  212. package/dist/tools/query-metrics.d.ts +1 -1
  213. package/dist/tools/query-metrics.js +19 -6
  214. package/dist/tools/query-traces.d.ts +47 -0
  215. package/dist/tools/query-traces.js +145 -0
  216. package/dist/tools/query-traces.test.d.ts +1 -0
  217. package/dist/tools/query-traces.test.js +110 -0
  218. package/dist/tools/registry-names.d.ts +35 -0
  219. package/dist/tools/registry-names.js +54 -0
  220. package/dist/tools/registry-names.test.d.ts +1 -0
  221. package/dist/tools/registry-names.test.js +61 -0
  222. package/dist/tools/topology.d.ts +3 -3
  223. package/dist/tools/topology.js +10 -6
  224. package/dist/topology/merge.d.ts +22 -0
  225. package/dist/topology/merge.js +178 -0
  226. package/dist/topology/merge.test.d.ts +1 -0
  227. package/dist/topology/merge.test.js +110 -0
  228. package/dist/transport/sessionStore.d.ts +66 -0
  229. package/dist/transport/sessionStore.js +138 -0
  230. package/dist/transport/sessionStore.test.d.ts +1 -0
  231. package/dist/transport/sessionStore.test.js +118 -0
  232. package/dist/transport/websocket.d.ts +35 -0
  233. package/dist/transport/websocket.js +133 -0
  234. package/dist/transport/websocket.test.d.ts +1 -0
  235. package/dist/transport/websocket.test.js +124 -0
  236. package/dist/types.d.ts +51 -0
  237. package/dist/ui/index.html +3083 -88
  238. package/package.json +32 -5
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Migration regression suite — pre-E7 single-tenant deployments must
3
+ * continue to work without any config change. These tests pin the
4
+ * "everything defaults to `default`" contract by simulating the
5
+ * exact data shapes a pre-E7 server / file / token would carry.
6
+ */
7
+ import { test } from "node:test";
8
+ import assert from "node:assert/strict";
9
+ import { defaultContext, principalContext } from "../context.js";
10
+ import { issueSession, verifySession } from "../auth/session.js";
11
+ import { loadCredentials } from "../auth/credentials.js";
12
+ import { CatalogStore } from "../catalog/loader.js";
13
+ import { AuditLog } from "../audit/log.js";
14
+ import { DEFAULT_TENANT } from "./context.js";
15
+ const SECRET = "x".repeat(32);
16
+ test("migration — anonymous context lands in DEFAULT_TENANT", () => {
17
+ const ctx = defaultContext();
18
+ assert.equal(ctx.tenant, DEFAULT_TENANT);
19
+ });
20
+ test("migration — principalContext without tenant opt → DEFAULT_TENANT", () => {
21
+ const ctx = principalContext("agent", ["prom-prod"]);
22
+ assert.equal(ctx.tenant, DEFAULT_TENANT);
23
+ });
24
+ test("migration — pre-E7 session cookie (no tenant field) verifies + reads back fine", () => {
25
+ // Session minted as it would have been pre-E7: no tenant.
26
+ const { cookie } = issueSession({ sub: "alice", name: "Alice", roles: ["operator"] }, { secret: SECRET });
27
+ const verified = verifySession(cookie, { secret: SECRET });
28
+ assert.ok(verified, "verifySession should accept a pre-E7 cookie");
29
+ assert.equal(verified.tenant, undefined, "tenant stays undefined; consumers default to 'default'");
30
+ });
31
+ test("migration — pre-E7 OMCP_API_KEYS (no OMCP_KEY_TENANTS) leaves credentials in DEFAULT_TENANT", () => {
32
+ const creds = loadCredentials({ OMCP_API_KEYS: "agent:tok_abc,ci:tok_def" });
33
+ assert.equal(creds.length, 2);
34
+ for (const c of creds) {
35
+ assert.equal(c.tenant, undefined, "no env → no tenant assignment → consumers default to 'default'");
36
+ }
37
+ });
38
+ test("migration — pre-E7 catalog (entries without tenant field) still enriches DEFAULT_TENANT callers", () => {
39
+ const store = new CatalogStore({
40
+ services: {
41
+ "payments": { owner: "team-payments" }, // pre-E7 shape
42
+ "shipping": { owner: "team-shipping" },
43
+ },
44
+ });
45
+ // A pre-E7 caller (no session, ctx.tenant = "default") sees both
46
+ // entries through the tenant-aware get().
47
+ assert.equal(store.get("payments", DEFAULT_TENANT)?.owner, "team-payments");
48
+ assert.equal(store.get("shipping", DEFAULT_TENANT)?.owner, "team-shipping");
49
+ // Same caller via the unfiltered get path also sees them (admins).
50
+ assert.equal(store.get("payments")?.owner, "team-payments");
51
+ });
52
+ test("migration — pre-E7 audit entries (no tenant field) surface under ?tenant=default", async () => {
53
+ const log = new AuditLog();
54
+ // Pre-E7 record: no tenant.
55
+ await log.record({ actor: { sub: "alice" }, resource: "sources", action: "write", method: "POST", path: "/api/sources", status: 200 });
56
+ const entries = log.list({ tenant: "default" });
57
+ assert.equal(entries.length, 1);
58
+ assert.equal(entries[0].actor.sub, "alice");
59
+ });
60
+ test("migration — opt-in is per-entry: an admin defining `tenant: acme` doesn't break the rest", () => {
61
+ const store = new CatalogStore({
62
+ services: {
63
+ "acme-app": { owner: "acme-team", tenant: "acme" }, // opted in
64
+ "shared-cdn": { owner: "infra" }, // pre-E7 shape
65
+ },
66
+ });
67
+ // The acme-tenant caller sees only their entry.
68
+ assert.equal(store.count("acme"), 1);
69
+ assert.equal(store.get("shared-cdn", "acme"), undefined);
70
+ // The default-tenant caller (anonymous / single-tenant) sees only
71
+ // the pre-E7 entry — the acme entry is correctly hidden.
72
+ assert.equal(store.count("default"), 1);
73
+ assert.equal(store.get("acme-app", "default"), undefined);
74
+ assert.equal(store.get("shared-cdn", "default")?.owner, "infra");
75
+ });
@@ -15,7 +15,12 @@ describe("RequestContext seam", () => {
15
15
  if (!hasHandler)
16
16
  continue;
17
17
  it(`${file}: handler accepts a RequestContext`, () => {
18
- assert.match(src, /_ctx:\s*RequestContext/, `${file} exports a *Handler but does not thread RequestContext` +
18
+ // Accept both the read-and-use form (`ctx: RequestContext`) and
19
+ // the historic placeholder form (`_ctx: RequestContext`) — the
20
+ // seam is the same; the underscore was only there to silence
21
+ // unused-param lints. Handlers that actually consume the ctx
22
+ // (tenant-aware tools, post-E7) drop it.
23
+ assert.match(src, /\b_?ctx:\s*RequestContext/, `${file} exports a *Handler but does not thread RequestContext — ` +
19
24
  `add the ctx seam (see context.ts)`);
20
25
  assert.match(src, /from "\.\.\/context\.js"/, `${file} must import from ../context.js`);
21
26
  });
@@ -26,7 +26,7 @@ export declare function detectAnomaliesHandler(registry: ConnectorRegistry, args
26
26
  service?: string;
27
27
  duration?: string;
28
28
  sensitivity?: string;
29
- }, _ctx?: RequestContext): Promise<{
29
+ }, ctx?: RequestContext): Promise<{
30
30
  content: {
31
31
  type: "text";
32
32
  text: string;
@@ -33,12 +33,13 @@ const KEY_METRICS = ["cpu", "memory", "error_rate", "latency_p99", "request_rate
33
33
  // the overall error ratio is low (e.g. a memory leak emits a handful of
34
34
  // "OutOfMemoryWarning" lines long before it turns into 5xx errors).
35
35
  const CRITICAL_LOG_PATTERN = /\b(out\s?of\s?memory|oom|outofmemory|heap (usage|exhaust)|memory leak|panic|fatal|deadlock|segfault|stack overflow|cannot allocate)\b/i;
36
- export async function detectAnomaliesHandler(registry, args, _ctx = defaultContext()) {
36
+ export async function detectAnomaliesHandler(registry, args, ctx = defaultContext()) {
37
37
  const duration = args.duration || "10m";
38
38
  const threshold = SENSITIVITY_THRESHOLDS[args.sensitivity || "medium"] || 2.0;
39
- // Discover services to scan
40
- const metricsConnectors = registry.getBySignal("metrics");
41
- const logConnectors = registry.getBySignal("logs");
39
+ // Discover services to scan — tenant-scoped.
40
+ const tenantConnectors = registry.getByTenant(ctx.tenant);
41
+ const metricsConnectors = tenantConnectors.filter((c) => c.signalType === "metrics");
42
+ const logConnectors = tenantConnectors.filter((c) => c.signalType === "logs");
42
43
  let serviceNames = [];
43
44
  if (args.service) {
44
45
  serviceNames = [args.service];
@@ -0,0 +1,35 @@
1
+ import type { ConnectorRegistry } from "../connectors/registry.js";
2
+ import { type RequestContext } from "../context.js";
3
+ export declare const generatePostmortemDefinition: {
4
+ name: "generate_postmortem";
5
+ description: string;
6
+ inputSchema: {
7
+ type: "object";
8
+ properties: {
9
+ service: {
10
+ type: string;
11
+ description: string;
12
+ };
13
+ duration: {
14
+ type: string;
15
+ description: string;
16
+ };
17
+ format: {
18
+ type: string;
19
+ description: string;
20
+ };
21
+ };
22
+ required: string[];
23
+ };
24
+ };
25
+ export declare function generatePostmortemHandler(registry: ConnectorRegistry, args: {
26
+ service: string;
27
+ duration?: string;
28
+ format?: string;
29
+ }, ctx?: RequestContext): Promise<{
30
+ content: {
31
+ type: "text";
32
+ text: string;
33
+ }[];
34
+ isError: boolean;
35
+ }>;
@@ -0,0 +1,191 @@
1
+ // generate_postmortem — Phase F19a.
2
+ //
3
+ // Stitches together anomaly history (F15), trace summaries (F13),
4
+ // and the topology blast-radius (existing get_blast_radius
5
+ // machinery) into a single markdown post-mortem report.
6
+ //
7
+ // The synthesizer is pure compute (see ./../postmortem/synthesizer);
8
+ // this handler is just the orchestration: pull each upstream
9
+ // primitive in parallel, hand the result to the synthesizer.
10
+ import { defaultContext } from "../context.js";
11
+ import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
12
+ import { synthesizePostmortem, } from "../postmortem/synthesizer.js";
13
+ export const generatePostmortemDefinition = {
14
+ name: "generate_postmortem",
15
+ description: [
16
+ "Stitch the gateway's primitives (anomaly history, blast-radius, traces, log highlights) into a single markdown post-mortem report for one service over a given window.",
17
+ "When to use: after an incident, when the operator or LLM wants 'one document the on-call can read in 60 seconds' instead of poking the individual tools.",
18
+ "Prerequisites: anomaly history requires OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus source pointed at the same TSDB (see docs/anomaly-history.md). Traces require a Tempo / Jaeger source. Blast-radius requires a topology provider.",
19
+ "Behavior: read-only. Returns BOTH a structured JSON shape AND a markdown body suitable to paste straight into a ticket. Output is capped (timeline truncated to 20 rows in the markdown, 30 nodes in the blast radius table, 10 traces) — the structured shape carries the full data.",
20
+ "Related: `get_anomaly_history` for the raw scores; `query_traces` for individual traces; `get_blast_radius` for the topology.",
21
+ ].join(" "),
22
+ inputSchema: {
23
+ type: "object",
24
+ properties: {
25
+ service: { type: "string", description: "Suspected root-cause service (the operator's first guess)." },
26
+ duration: { type: "string", description: "Rolling window the incident took place in, e.g. '1h', '6h'. Default '1h'." },
27
+ format: { type: "string", description: "Output format: 'markdown' (default) or 'json'." },
28
+ },
29
+ required: ["service"],
30
+ },
31
+ };
32
+ export async function generatePostmortemHandler(registry, args, ctx = defaultContext()) {
33
+ const svcErr = validateServiceName(args.service);
34
+ if (svcErr)
35
+ return errorResponse(svcErr);
36
+ const duration = args.duration || "1h";
37
+ const durationErr = validateDuration(duration);
38
+ if (durationErr)
39
+ return errorResponse(durationErr);
40
+ const now = new Date();
41
+ const fromIso = new Date(now.getTime() - parseDurationMs(duration)).toISOString();
42
+ const toIso = now.toISOString();
43
+ // Parallel-fetch every upstream primitive. Each fetch swallows
44
+ // its own errors and returns an empty result — the post-mortem
45
+ // must always synthesise SOMETHING (even "no signal found").
46
+ const [anomalies, traces, blastRadius, logHighlights] = await Promise.all([
47
+ fetchAnomalies(registry, args.service, duration, ctx),
48
+ fetchTraces(registry, args.service, duration, ctx),
49
+ fetchBlastRadius(registry, args.service, ctx),
50
+ fetchLogHighlights(registry, args.service, duration, ctx),
51
+ ]);
52
+ const report = synthesizePostmortem({
53
+ service: args.service,
54
+ window: duration,
55
+ tenant: ctx.tenant || "default",
56
+ fromIso,
57
+ toIso,
58
+ anomalies,
59
+ blastRadius,
60
+ traces,
61
+ logHighlights,
62
+ });
63
+ if ((args.format || "markdown").toLowerCase() === "json") {
64
+ return {
65
+ content: [{ type: "text", text: JSON.stringify(report) }],
66
+ isError: false,
67
+ };
68
+ }
69
+ // Default: return the markdown body. The structured sections live
70
+ // in JSON if the caller asked for them.
71
+ return {
72
+ content: [{ type: "text", text: report.markdown }],
73
+ isError: false,
74
+ };
75
+ }
76
+ function parseDurationMs(d) {
77
+ const m = d.match(/^(\d+)([smhd])$/);
78
+ if (!m)
79
+ return 60 * 60 * 1000;
80
+ const n = parseInt(m[1], 10);
81
+ const unit = m[2];
82
+ return unit === "s" ? n * 1000
83
+ : unit === "m" ? n * 60_000
84
+ : unit === "h" ? n * 3_600_000
85
+ : n * 86_400_000;
86
+ }
87
+ async function fetchAnomalies(registry, service, duration, ctx) {
88
+ const metric = `omcp_anomaly_score{service="${escLabel(service)}"}`;
89
+ for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryMetrics === "function")) {
90
+ try {
91
+ const r = await c.queryMetrics({ service, metric, duration });
92
+ if (r && r.values && r.values.length > 0) {
93
+ return r.values.map((v) => ({
94
+ ts: typeof v.timestamp === "number" ? new Date(v.timestamp).toISOString() : String(v.timestamp),
95
+ service,
96
+ score: typeof v.value === "number" ? v.value : Number(v.value) || 0,
97
+ method: "mad",
98
+ severity: "warn",
99
+ }));
100
+ }
101
+ }
102
+ catch {
103
+ /* fall through to next source */
104
+ }
105
+ }
106
+ return [];
107
+ }
108
+ async function fetchTraces(registry, service, duration, ctx) {
109
+ for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryTraces === "function")) {
110
+ try {
111
+ const r = await c.queryTraces({ service, duration, limit: 10 });
112
+ if (r && r.traces && r.traces.length > 0) {
113
+ return r.traces.map((t) => ({
114
+ traceId: t.traceId,
115
+ rootName: t.rootName,
116
+ rootService: t.rootService,
117
+ durationMs: t.durationMs,
118
+ hasError: t.hasError,
119
+ }));
120
+ }
121
+ }
122
+ catch {
123
+ /* fall through */
124
+ }
125
+ }
126
+ return [];
127
+ }
128
+ async function fetchBlastRadius(registry, service, ctx) {
129
+ // We don't have a direct "give me blast radius for service X" helper at
130
+ // this layer — the existing get_blast_radius is a tool that takes a
131
+ // resource id. For the post-mortem we settle for the full topology
132
+ // snapshot of the caller's tenant and let the synthesizer mark the
133
+ // suspect-named node as root. Future F19b can plumb the real walker.
134
+ for (const c of registry.getByTenant(ctx.tenant)) {
135
+ if (typeof c.getTopologySnapshot !== "function")
136
+ continue;
137
+ try {
138
+ const snap = await c.getTopologySnapshot();
139
+ if (!snap?.resources?.length)
140
+ continue;
141
+ // Pick nodes whose name matches the suspected service (case-
142
+ // insensitive substring is conservative-enough for the
143
+ // synopsis; the real walker can be precise later).
144
+ const needle = service.toLowerCase();
145
+ const matching = snap.resources.filter((r) => r.name?.toLowerCase().includes(needle) ||
146
+ (r.labels && Object.values(r.labels).some((v) => String(v).toLowerCase() === needle)));
147
+ if (matching.length === 0)
148
+ continue;
149
+ const matchedIds = new Set(matching.map((r) => r.id));
150
+ const connected = snap.edges.filter((e) => matchedIds.has(e.from) || matchedIds.has(e.to));
151
+ const neighborIds = new Set([
152
+ ...matching.map((r) => r.id),
153
+ ...connected.map((e) => e.from),
154
+ ...connected.map((e) => e.to),
155
+ ]);
156
+ const nodes = snap.resources
157
+ .filter((r) => neighborIds.has(r.id))
158
+ .map((r) => ({
159
+ id: r.id,
160
+ kind: r.kind,
161
+ name: r.name,
162
+ root: matchedIds.has(r.id),
163
+ }));
164
+ return {
165
+ nodes,
166
+ edges: connected.map((e) => ({ from: e.from, to: e.to, relation: e.relation })),
167
+ };
168
+ }
169
+ catch {
170
+ /* fall through */
171
+ }
172
+ }
173
+ return { nodes: [], edges: [] };
174
+ }
175
+ async function fetchLogHighlights(registry, service, duration, ctx) {
176
+ for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryLogs === "function")) {
177
+ try {
178
+ const r = await c.queryLogs({ service, duration, limit: 5 });
179
+ if (r?.summary?.errorCount && r.summary.errorCount > 0) {
180
+ return [`${service}: ${r.summary.errorCount} error log line(s) in window (source: ${r.source}).`];
181
+ }
182
+ }
183
+ catch {
184
+ /* skip */
185
+ }
186
+ }
187
+ return [];
188
+ }
189
+ function escLabel(v) {
190
+ return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
191
+ }
@@ -0,0 +1,35 @@
1
+ import type { ConnectorRegistry } from "../connectors/registry.js";
2
+ import { type RequestContext } from "../context.js";
3
+ export declare const getAnomalyHistoryDefinition: {
4
+ name: "get_anomaly_history";
5
+ description: string;
6
+ inputSchema: {
7
+ type: "object";
8
+ properties: {
9
+ service: {
10
+ type: string;
11
+ description: string;
12
+ };
13
+ duration: {
14
+ type: string;
15
+ description: string;
16
+ };
17
+ method: {
18
+ type: string;
19
+ description: string;
20
+ };
21
+ };
22
+ required: string[];
23
+ };
24
+ };
25
+ export declare function getAnomalyHistoryHandler(registry: ConnectorRegistry, args: {
26
+ service: string;
27
+ duration?: string;
28
+ method?: string;
29
+ }, ctx?: RequestContext): Promise<{
30
+ content: {
31
+ type: "text";
32
+ text: string;
33
+ }[];
34
+ isError: boolean;
35
+ }>;
@@ -0,0 +1,126 @@
1
+ // get_anomaly_history — Phase F15.
2
+ //
3
+ // Reads anomaly scores previously written to the TSDB by the
4
+ // AnomalyHistory writer. The tool is a thin convenience wrapper: it
5
+ // builds the PromQL query `omcp_anomaly_score{service="..."}` and
6
+ // dispatches via any Prometheus-shaped connector in the caller's
7
+ // tenant.
8
+ //
9
+ // Operators wire the round-trip themselves (Prometheus scrapes the
10
+ // same remote-write endpoint the writer pushes to) — the gateway
11
+ // doesn't need a direct TSDB query path because it already speaks
12
+ // PromQL via the Prometheus connector.
13
+ import { defaultContext } from "../context.js";
14
+ import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
15
+ export const getAnomalyHistoryDefinition = {
16
+ name: "get_anomaly_history",
17
+ description: [
18
+ "Replay historical anomaly scores for a service from the TSDB the gateway writes to (omcp_anomaly_score series).",
19
+ "When to use: post-mortem reconstruction (what did the gateway see at 03:42?), trend analysis on detector noise, or pulling context for the LLM when an incident is reviewed after the fact.",
20
+ "Prerequisites: the operator must have OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus connector pointed at the same TSDB so the round-trip closes.",
21
+ "Behavior: read-only. Returns the time-series of scores with per-method/severity labels. Empty result means either no anomalies in the window or history is disabled.",
22
+ "Related: `detect_anomalies` for the live scores; `query_metrics` if you want to write the PromQL by hand.",
23
+ ].join(" "),
24
+ inputSchema: {
25
+ type: "object",
26
+ properties: {
27
+ service: { type: "string", description: "Service name to filter on." },
28
+ duration: { type: "string", description: "Rolling window (e.g. '1h', '24h'). Default '1h'." },
29
+ method: { type: "string", description: "Filter by detector method ('mad', 'seasonality', 'correlator'). Optional." },
30
+ },
31
+ required: ["service"],
32
+ },
33
+ };
34
+ export async function getAnomalyHistoryHandler(registry, args, ctx = defaultContext()) {
35
+ const svcErr = validateServiceName(args.service);
36
+ if (svcErr)
37
+ return errorResponse(svcErr);
38
+ const duration = args.duration || "1h";
39
+ const durationErr = validateDuration(duration);
40
+ if (durationErr)
41
+ return errorResponse(durationErr);
42
+ // Pick any metrics connector. The operator is expected to have
43
+ // their TSDB scraped by Prometheus, so any metric source can serve
44
+ // the query. We don't try to auto-detect "the right source" — the
45
+ // query is global by metric name.
46
+ const candidates = registry
47
+ .getByTenant(ctx.tenant)
48
+ .filter((c) => typeof c.queryMetrics === "function");
49
+ if (candidates.length === 0) {
50
+ return {
51
+ content: [
52
+ {
53
+ type: "text",
54
+ text: JSON.stringify({
55
+ error: "No metrics backend configured to query the TSDB. Configure a Prometheus source pointed at the same TSDB OMCP_ANOMALY_HISTORY_REMOTE_WRITE writes to.",
56
+ }),
57
+ },
58
+ ],
59
+ isError: true,
60
+ };
61
+ }
62
+ // Build the PromQL. The recording metric `omcp_anomaly_score` is
63
+ // expected to exist; if the writer is disabled or never fired, the
64
+ // query just returns an empty series — that's a valid result.
65
+ const labelFilters = [`service="${escLabel(args.service)}"`];
66
+ if (args.method)
67
+ labelFilters.push(`method="${escLabel(args.method)}"`);
68
+ const metric = `omcp_anomaly_score{${labelFilters.join(",")}}`;
69
+ // Fan out across every metrics connector; first non-empty answer wins.
70
+ for (const c of candidates) {
71
+ if (!c.queryMetrics)
72
+ continue;
73
+ try {
74
+ const r = await c.queryMetrics({
75
+ service: args.service,
76
+ metric,
77
+ duration,
78
+ });
79
+ if (r && Array.isArray(r.values) && r.values.length > 0) {
80
+ return {
81
+ content: [
82
+ {
83
+ type: "text",
84
+ text: JSON.stringify({
85
+ service: args.service,
86
+ duration,
87
+ method: args.method,
88
+ source: r.source,
89
+ values: r.values,
90
+ summary: r.summary,
91
+ metric,
92
+ }),
93
+ },
94
+ ],
95
+ isError: false,
96
+ };
97
+ }
98
+ }
99
+ catch (err) {
100
+ console.warn("get_anomaly_history: %s threw: %s", c.name, err instanceof Error ? err.message : String(err));
101
+ }
102
+ }
103
+ // No connector returned data — either the metric doesn't exist or
104
+ // there were no anomalies in the window. Both are useful answers.
105
+ return {
106
+ content: [
107
+ {
108
+ type: "text",
109
+ text: JSON.stringify({
110
+ service: args.service,
111
+ duration,
112
+ method: args.method,
113
+ values: [],
114
+ summary: { count: 0 },
115
+ metric,
116
+ hint: "No anomaly history found. Either the window is clean, or OMCP_ANOMALY_HISTORY_REMOTE_WRITE was unset when the anomalies fired, or the configured Prometheus source isn't scraping the TSDB this writer pushes to.",
117
+ }),
118
+ },
119
+ ],
120
+ isError: false,
121
+ };
122
+ }
123
+ /** Escape a PromQL label value (backslash + double-quote). */
124
+ function escLabel(v) {
125
+ return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
126
+ }
@@ -18,7 +18,7 @@ export declare const getServiceHealthDefinition: {
18
18
  };
19
19
  export declare function getServiceHealthHandler(registry: ConnectorRegistry, args: {
20
20
  service: string;
21
- }, _ctx?: RequestContext): Promise<{
21
+ }, ctx?: RequestContext): Promise<{
22
22
  content: {
23
23
  type: "text";
24
24
  text: string;
@@ -20,9 +20,10 @@ export const getServiceHealthDefinition = {
20
20
  required: ["service"],
21
21
  },
22
22
  };
23
- export async function getServiceHealthHandler(registry, args, _ctx = defaultContext()) {
24
- const metricsConnectors = registry.getBySignal("metrics");
25
- const logConnectors = registry.getBySignal("logs");
23
+ export async function getServiceHealthHandler(registry, args, ctx = defaultContext()) {
24
+ const tenantConnectors = registry.getByTenant(ctx.tenant);
25
+ const metricsConnectors = tenantConnectors.filter((c) => c.signalType === "metrics");
26
+ const logConnectors = tenantConnectors.filter((c) => c.signalType === "logs");
26
27
  // Gather metrics
27
28
  let cpu = 0, memory = 0, errorRate = 0, latencyP99 = 0;
28
29
  const anomalies = [];
@@ -15,7 +15,7 @@ export declare const listServicesDefinition: {
15
15
  };
16
16
  export declare function listServicesHandler(registry: ConnectorRegistry, args: {
17
17
  filter?: string;
18
- }, _ctx?: RequestContext): Promise<{
18
+ }, ctx?: RequestContext): Promise<{
19
19
  content: {
20
20
  type: "text";
21
21
  text: string;
@@ -12,8 +12,9 @@ export const listServicesDefinition = {
12
12
  },
13
13
  },
14
14
  };
15
- export async function listServicesHandler(registry, args, _ctx = defaultContext()) {
16
- const connectors = registry.getAll();
15
+ export async function listServicesHandler(registry, args, ctx = defaultContext()) {
16
+ // Tenant-scoped: only consult sources the caller can see.
17
+ const connectors = registry.getByTenant(ctx.tenant);
17
18
  const allServices = [];
18
19
  for (const connector of connectors) {
19
20
  try {
@@ -8,7 +8,7 @@ export declare const listSourcesDefinition: {
8
8
  properties: {};
9
9
  };
10
10
  };
11
- export declare function listSourcesHandler(registry: ConnectorRegistry, _ctx?: RequestContext): Promise<{
11
+ export declare function listSourcesHandler(registry: ConnectorRegistry, ctx?: RequestContext): Promise<{
12
12
  content: {
13
13
  type: "text";
14
14
  text: string;
@@ -7,9 +7,13 @@ export const listSourcesDefinition = {
7
7
  properties: {},
8
8
  },
9
9
  };
10
- export async function listSourcesHandler(registry, _ctx = defaultContext()) {
10
+ export async function listSourcesHandler(registry, ctx = defaultContext()) {
11
11
  const healthResults = await registry.healthCheckAll();
12
- const connectors = registry.getAll();
12
+ // Tenant-scoped: caller only sees sources tagged with their tenant
13
+ // plus untagged (global) sources. Pre-E7 deployments (no tenant
14
+ // labels on any source) behave identically — every source is
15
+ // global and visible to every tenant.
16
+ const connectors = registry.getByTenant(ctx.tenant);
13
17
  const sources = connectors.map((c) => ({
14
18
  name: c.name,
15
19
  type: c.type,
@@ -36,7 +36,7 @@ export declare function queryLogsHandler(registry: ConnectorRegistry, args: {
36
36
  duration?: string;
37
37
  level?: string;
38
38
  limit?: number;
39
- }, _ctx?: RequestContext): Promise<{
39
+ }, ctx?: RequestContext): Promise<{
40
40
  content: {
41
41
  type: "text";
42
42
  text: string;
@@ -30,7 +30,7 @@ export const queryLogsDefinition = {
30
30
  required: ["service"],
31
31
  },
32
32
  };
33
- export async function queryLogsHandler(registry, args, _ctx = defaultContext()) {
33
+ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
34
34
  const svcErr = validateServiceName(args.service);
35
35
  if (svcErr)
36
36
  return errorResponse(svcErr);
@@ -38,7 +38,7 @@ export async function queryLogsHandler(registry, args, _ctx = defaultContext())
38
38
  const durationErr = validateDuration(duration);
39
39
  if (durationErr)
40
40
  return errorResponse(durationErr);
41
- const connectors = registry.getBySignal("logs");
41
+ const connectors = registry.getByTenant(ctx.tenant).filter((c) => c.signalType === "logs");
42
42
  if (connectors.length === 0) {
43
43
  return {
44
44
  content: [
@@ -36,7 +36,7 @@ export declare function queryMetricsHandler(registry: ConnectorRegistry, args: {
36
36
  duration?: string;
37
37
  source?: string;
38
38
  groupBy?: string;
39
- }, _ctx?: RequestContext): Promise<{
39
+ }, ctx?: RequestContext): Promise<{
40
40
  content: {
41
41
  type: "text";
42
42
  text: string;