@thotischner/observability-mcp 1.8.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/dist/analysis/history.d.ts +70 -0
  2. package/dist/analysis/history.js +170 -0
  3. package/dist/analysis/history.test.d.ts +1 -0
  4. package/dist/analysis/history.test.js +141 -0
  5. package/dist/audit/log.d.ts +9 -0
  6. package/dist/audit/log.js +20 -0
  7. package/dist/audit/redaction-bypass.d.ts +67 -0
  8. package/dist/audit/redaction-bypass.js +64 -0
  9. package/dist/audit/redaction-bypass.test.d.ts +1 -0
  10. package/dist/audit/redaction-bypass.test.js +72 -0
  11. package/dist/audit/sinks/s3.d.ts +61 -0
  12. package/dist/audit/sinks/s3.js +179 -0
  13. package/dist/audit/sinks/s3.test.d.ts +1 -0
  14. package/dist/audit/sinks/s3.test.js +175 -0
  15. package/dist/audit/sinks/types.d.ts +18 -0
  16. package/dist/audit/sinks/types.js +1 -0
  17. package/dist/audit/sinks/webhook.d.ts +45 -0
  18. package/dist/audit/sinks/webhook.js +111 -0
  19. package/dist/audit/sinks/webhook.test.d.ts +1 -0
  20. package/dist/audit/sinks/webhook.test.js +162 -0
  21. package/dist/auth/credentials.d.ts +11 -0
  22. package/dist/auth/credentials.js +27 -0
  23. package/dist/auth/credentials.test.js +21 -1
  24. package/dist/auth/csrf.d.ts +26 -0
  25. package/dist/auth/csrf.js +128 -0
  26. package/dist/auth/csrf.test.d.ts +1 -0
  27. package/dist/auth/csrf.test.js +143 -0
  28. package/dist/auth/local-users.d.ts +6 -0
  29. package/dist/auth/local-users.js +11 -0
  30. package/dist/auth/local-users.test.js +41 -0
  31. package/dist/auth/middleware.d.ts +7 -6
  32. package/dist/auth/oidc/dcr.d.ts +70 -0
  33. package/dist/auth/oidc/dcr.js +160 -0
  34. package/dist/auth/oidc/dcr.test.d.ts +1 -0
  35. package/dist/auth/oidc/dcr.test.js +109 -0
  36. package/dist/auth/oidc/endpoints.js +44 -0
  37. package/dist/auth/oidc/profiles.d.ts +22 -0
  38. package/dist/auth/oidc/profiles.js +95 -0
  39. package/dist/auth/oidc/profiles.test.d.ts +1 -0
  40. package/dist/auth/oidc/profiles.test.js +51 -0
  41. package/dist/auth/oidc/runtime.d.ts +3 -0
  42. package/dist/auth/oidc/runtime.js +16 -3
  43. package/dist/auth/oidc/runtime.test.js +1 -0
  44. package/dist/auth/policy/batch-dry-run.d.ts +56 -0
  45. package/dist/auth/policy/batch-dry-run.js +144 -0
  46. package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
  47. package/dist/auth/policy/batch-dry-run.test.js +140 -0
  48. package/dist/auth/policy/engine.d.ts +20 -4
  49. package/dist/auth/policy/engine.js +16 -2
  50. package/dist/auth/policy/loader.d.ts +11 -1
  51. package/dist/auth/policy/loader.js +37 -0
  52. package/dist/auth/policy/loader.test.d.ts +1 -0
  53. package/dist/auth/policy/loader.test.js +86 -0
  54. package/dist/auth/policy/opa.d.ts +5 -5
  55. package/dist/auth/policy/opa.js +25 -14
  56. package/dist/auth/policy/opa.test.js +48 -0
  57. package/dist/auth/rbac.d.ts +23 -1
  58. package/dist/auth/rbac.js +43 -1
  59. package/dist/auth/rbac.test.js +62 -0
  60. package/dist/cli/index.js +3 -0
  61. package/dist/cli/inspector-config.d.ts +9 -0
  62. package/dist/cli/inspector-config.js +28 -0
  63. package/dist/cli/inspector-config.test.d.ts +1 -0
  64. package/dist/cli/inspector-config.test.js +33 -0
  65. package/dist/cli/lib.d.ts +1 -1
  66. package/dist/cli/lib.js +1 -0
  67. package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
  68. package/dist/conformance/mcp-2025-11-25.test.js +206 -0
  69. package/dist/connectors/interface.d.ts +5 -1
  70. package/dist/connectors/loader.d.ts +8 -0
  71. package/dist/connectors/loader.js +55 -4
  72. package/dist/connectors/loader.test.d.ts +1 -0
  73. package/dist/connectors/loader.test.js +78 -0
  74. package/dist/connectors/manifest-hooks.test.d.ts +1 -0
  75. package/dist/connectors/manifest-hooks.test.js +206 -0
  76. package/dist/connectors/prometheus.test.js +31 -13
  77. package/dist/connectors/registry.d.ts +13 -0
  78. package/dist/connectors/registry.js +30 -0
  79. package/dist/connectors/registry.test.js +56 -2
  80. package/dist/context.d.ts +32 -0
  81. package/dist/context.js +35 -0
  82. package/dist/context.test.d.ts +1 -0
  83. package/dist/context.test.js +58 -0
  84. package/dist/federation/registry.d.ts +54 -0
  85. package/dist/federation/registry.js +122 -0
  86. package/dist/federation/registry.test.d.ts +1 -0
  87. package/dist/federation/registry.test.js +206 -0
  88. package/dist/federation/upstream.d.ts +86 -0
  89. package/dist/federation/upstream.js +162 -0
  90. package/dist/federation/upstream.test.d.ts +1 -0
  91. package/dist/federation/upstream.test.js +118 -0
  92. package/dist/index.js +1435 -126
  93. package/dist/metrics/self.d.ts +1 -0
  94. package/dist/metrics/self.js +8 -0
  95. package/dist/middleware/ssrfGuard.d.ts +15 -0
  96. package/dist/middleware/ssrfGuard.js +103 -0
  97. package/dist/middleware/ssrfGuard.test.d.ts +1 -0
  98. package/dist/middleware/ssrfGuard.test.js +81 -0
  99. package/dist/observability/otel.d.ts +20 -0
  100. package/dist/observability/otel.js +118 -0
  101. package/dist/observability/otel.test.d.ts +1 -0
  102. package/dist/observability/otel.test.js +56 -0
  103. package/dist/openapi.js +215 -7
  104. package/dist/openapi.test.js +34 -0
  105. package/dist/policy/redact.js +1 -1
  106. package/dist/postmortem/store.d.ts +34 -0
  107. package/dist/postmortem/store.js +113 -0
  108. package/dist/postmortem/store.test.d.ts +1 -0
  109. package/dist/postmortem/store.test.js +118 -0
  110. package/dist/postmortem/synthesizer.d.ts +83 -0
  111. package/dist/postmortem/synthesizer.js +205 -0
  112. package/dist/postmortem/synthesizer.test.d.ts +1 -0
  113. package/dist/postmortem/synthesizer.test.js +141 -0
  114. package/dist/products/loader.d.ts +31 -3
  115. package/dist/products/loader.js +77 -4
  116. package/dist/products/loader.test.js +90 -1
  117. package/dist/quota/charge.d.ts +28 -0
  118. package/dist/quota/charge.js +30 -0
  119. package/dist/quota/charge.test.d.ts +1 -0
  120. package/dist/quota/charge.test.js +83 -0
  121. package/dist/quota/limiter.d.ts +29 -4
  122. package/dist/quota/limiter.js +64 -8
  123. package/dist/quota/limiter.test.js +86 -0
  124. package/dist/scim/compliance.test.d.ts +1 -0
  125. package/dist/scim/compliance.test.js +169 -0
  126. package/dist/scim/factory.test.d.ts +1 -0
  127. package/dist/scim/factory.test.js +54 -0
  128. package/dist/scim/group-role-map.d.ts +4 -0
  129. package/dist/scim/group-role-map.js +33 -0
  130. package/dist/scim/group-role-map.test.d.ts +1 -0
  131. package/dist/scim/group-role-map.test.js +33 -0
  132. package/dist/scim/patch-ops.test.d.ts +1 -0
  133. package/dist/scim/patch-ops.test.js +100 -0
  134. package/dist/scim/redis-store.d.ts +38 -0
  135. package/dist/scim/redis-store.js +178 -0
  136. package/dist/scim/redis-store.test.d.ts +1 -0
  137. package/dist/scim/redis-store.test.js +138 -0
  138. package/dist/scim/routes.d.ts +40 -0
  139. package/dist/scim/routes.js +395 -0
  140. package/dist/scim/store.d.ts +76 -0
  141. package/dist/scim/store.js +196 -0
  142. package/dist/scim/store.test.d.ts +1 -0
  143. package/dist/scim/store.test.js +121 -0
  144. package/dist/scim/types.d.ts +73 -0
  145. package/dist/scim/types.js +29 -0
  146. package/dist/sdk/hook-wrappers.d.ts +39 -0
  147. package/dist/sdk/hook-wrappers.js +113 -0
  148. package/dist/sdk/hook-wrappers.test.d.ts +1 -0
  149. package/dist/sdk/hook-wrappers.test.js +204 -0
  150. package/dist/sdk/hooks.d.ts +77 -0
  151. package/dist/sdk/hooks.js +72 -0
  152. package/dist/sdk/hooks.test.d.ts +1 -0
  153. package/dist/sdk/hooks.test.js +159 -0
  154. package/dist/sdk/index.d.ts +15 -0
  155. package/dist/sdk/index.js +1 -0
  156. package/dist/sdk/manifest-schema.d.ts +17 -0
  157. package/dist/sdk/manifest-schema.js +21 -0
  158. package/dist/tools/context-seam.test.js +6 -1
  159. package/dist/tools/detect-anomalies.d.ts +12 -1
  160. package/dist/tools/detect-anomalies.js +26 -5
  161. package/dist/tools/generate-postmortem.d.ts +35 -0
  162. package/dist/tools/generate-postmortem.js +191 -0
  163. package/dist/tools/get-anomaly-history.d.ts +35 -0
  164. package/dist/tools/get-anomaly-history.js +126 -0
  165. package/dist/tools/get-service-health.d.ts +1 -1
  166. package/dist/tools/get-service-health.js +4 -3
  167. package/dist/tools/list-services.d.ts +1 -1
  168. package/dist/tools/list-services.js +3 -2
  169. package/dist/tools/list-sources.d.ts +1 -1
  170. package/dist/tools/list-sources.js +6 -2
  171. package/dist/tools/query-logs.d.ts +1 -1
  172. package/dist/tools/query-logs.js +2 -2
  173. package/dist/tools/query-metrics.d.ts +1 -1
  174. package/dist/tools/query-metrics.js +19 -6
  175. package/dist/tools/query-traces.d.ts +47 -0
  176. package/dist/tools/query-traces.js +145 -0
  177. package/dist/tools/query-traces.test.d.ts +1 -0
  178. package/dist/tools/query-traces.test.js +110 -0
  179. package/dist/tools/registry-names.d.ts +35 -0
  180. package/dist/tools/registry-names.js +54 -0
  181. package/dist/tools/registry-names.test.d.ts +1 -0
  182. package/dist/tools/registry-names.test.js +61 -0
  183. package/dist/tools/topology.d.ts +3 -3
  184. package/dist/tools/topology.js +33 -11
  185. package/dist/tools/topology.test.js +45 -0
  186. package/dist/topology/merge.d.ts +22 -0
  187. package/dist/topology/merge.js +178 -0
  188. package/dist/topology/merge.test.d.ts +1 -0
  189. package/dist/topology/merge.test.js +110 -0
  190. package/dist/transport/sessionStore.d.ts +66 -0
  191. package/dist/transport/sessionStore.js +138 -0
  192. package/dist/transport/sessionStore.test.d.ts +1 -0
  193. package/dist/transport/sessionStore.test.js +118 -0
  194. package/dist/transport/transportSessionMap.d.ts +70 -0
  195. package/dist/transport/transportSessionMap.js +128 -0
  196. package/dist/transport/transportSessionMap.test.d.ts +1 -0
  197. package/dist/transport/transportSessionMap.test.js +111 -0
  198. package/dist/transport/websocket.d.ts +35 -0
  199. package/dist/transport/websocket.js +133 -0
  200. package/dist/transport/websocket.test.d.ts +1 -0
  201. package/dist/transport/websocket.test.js +124 -0
  202. package/dist/types.d.ts +51 -0
  203. package/dist/ui/index.html +2529 -145
  204. package/package.json +13 -3
@@ -0,0 +1,35 @@
1
+ import type { ConnectorRegistry } from "../connectors/registry.js";
2
+ import { type RequestContext } from "../context.js";
3
+ export declare const getAnomalyHistoryDefinition: {
4
+ name: "get_anomaly_history";
5
+ description: string;
6
+ inputSchema: {
7
+ type: "object";
8
+ properties: {
9
+ service: {
10
+ type: string;
11
+ description: string;
12
+ };
13
+ duration: {
14
+ type: string;
15
+ description: string;
16
+ };
17
+ method: {
18
+ type: string;
19
+ description: string;
20
+ };
21
+ };
22
+ required: string[];
23
+ };
24
+ };
25
+ export declare function getAnomalyHistoryHandler(registry: ConnectorRegistry, args: {
26
+ service: string;
27
+ duration?: string;
28
+ method?: string;
29
+ }, ctx?: RequestContext): Promise<{
30
+ content: {
31
+ type: "text";
32
+ text: string;
33
+ }[];
34
+ isError: boolean;
35
+ }>;
@@ -0,0 +1,126 @@
1
+ // get_anomaly_history — Phase F15.
2
+ //
3
+ // Reads anomaly scores previously written to the TSDB by the
4
+ // AnomalyHistory writer. The tool is a thin convenience wrapper: it
5
+ // builds the PromQL query `omcp_anomaly_score{service="..."}` and
6
+ // dispatches via any Prometheus-shaped connector in the caller's
7
+ // tenant.
8
+ //
9
+ // Operators wire the round-trip themselves (Prometheus scrapes the
10
+ // same remote-write endpoint the writer pushes to) — the gateway
11
+ // doesn't need a direct TSDB query path because it already speaks
12
+ // PromQL via the Prometheus connector.
13
+ import { defaultContext } from "../context.js";
14
+ import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
15
+ export const getAnomalyHistoryDefinition = {
16
+ name: "get_anomaly_history",
17
+ description: [
18
+ "Replay historical anomaly scores for a service from the TSDB the gateway writes to (omcp_anomaly_score series).",
19
+ "When to use: post-mortem reconstruction (what did the gateway see at 03:42?), trend analysis on detector noise, or pulling context for the LLM when an incident is reviewed after the fact.",
20
+ "Prerequisites: the operator must have OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus connector pointed at the same TSDB so the round-trip closes.",
21
+ "Behavior: read-only. Returns the time-series of scores with per-method/severity labels. Empty result means either no anomalies in the window or history is disabled.",
22
+ "Related: `detect_anomalies` for the live scores; `query_metrics` if you want to write the PromQL by hand.",
23
+ ].join(" "),
24
+ inputSchema: {
25
+ type: "object",
26
+ properties: {
27
+ service: { type: "string", description: "Service name to filter on." },
28
+ duration: { type: "string", description: "Rolling window (e.g. '1h', '24h'). Default '1h'." },
29
+ method: { type: "string", description: "Filter by detector method ('mad', 'seasonality', 'correlator'). Optional." },
30
+ },
31
+ required: ["service"],
32
+ },
33
+ };
34
+ export async function getAnomalyHistoryHandler(registry, args, ctx = defaultContext()) {
35
+ const svcErr = validateServiceName(args.service);
36
+ if (svcErr)
37
+ return errorResponse(svcErr);
38
+ const duration = args.duration || "1h";
39
+ const durationErr = validateDuration(duration);
40
+ if (durationErr)
41
+ return errorResponse(durationErr);
42
+ // Pick any metrics connector. The operator is expected to have
43
+ // their TSDB scraped by Prometheus, so any metric source can serve
44
+ // the query. We don't try to auto-detect "the right source" — the
45
+ // query is global by metric name.
46
+ const candidates = registry
47
+ .getByTenant(ctx.tenant)
48
+ .filter((c) => typeof c.queryMetrics === "function");
49
+ if (candidates.length === 0) {
50
+ return {
51
+ content: [
52
+ {
53
+ type: "text",
54
+ text: JSON.stringify({
55
+ error: "No metrics backend configured to query the TSDB. Configure a Prometheus source pointed at the same TSDB OMCP_ANOMALY_HISTORY_REMOTE_WRITE writes to.",
56
+ }),
57
+ },
58
+ ],
59
+ isError: true,
60
+ };
61
+ }
62
+ // Build the PromQL. The recording metric `omcp_anomaly_score` is
63
+ // expected to exist; if the writer is disabled or never fired, the
64
+ // query just returns an empty series — that's a valid result.
65
+ const labelFilters = [`service="${escLabel(args.service)}"`];
66
+ if (args.method)
67
+ labelFilters.push(`method="${escLabel(args.method)}"`);
68
+ const metric = `omcp_anomaly_score{${labelFilters.join(",")}}`;
69
+ // Fan out across every metrics connector; first non-empty answer wins.
70
+ for (const c of candidates) {
71
+ if (!c.queryMetrics)
72
+ continue;
73
+ try {
74
+ const r = await c.queryMetrics({
75
+ service: args.service,
76
+ metric,
77
+ duration,
78
+ });
79
+ if (r && Array.isArray(r.values) && r.values.length > 0) {
80
+ return {
81
+ content: [
82
+ {
83
+ type: "text",
84
+ text: JSON.stringify({
85
+ service: args.service,
86
+ duration,
87
+ method: args.method,
88
+ source: r.source,
89
+ values: r.values,
90
+ summary: r.summary,
91
+ metric,
92
+ }),
93
+ },
94
+ ],
95
+ isError: false,
96
+ };
97
+ }
98
+ }
99
+ catch (err) {
100
+ console.warn("get_anomaly_history: %s threw: %s", c.name, err instanceof Error ? err.message : String(err));
101
+ }
102
+ }
103
+ // No connector returned data — either the metric doesn't exist or
104
+ // there were no anomalies in the window. Both are useful answers.
105
+ return {
106
+ content: [
107
+ {
108
+ type: "text",
109
+ text: JSON.stringify({
110
+ service: args.service,
111
+ duration,
112
+ method: args.method,
113
+ values: [],
114
+ summary: { count: 0 },
115
+ metric,
116
+ hint: "No anomaly history found. Either the window is clean, or OMCP_ANOMALY_HISTORY_REMOTE_WRITE was unset when the anomalies fired, or the configured Prometheus source isn't scraping the TSDB this writer pushes to.",
117
+ }),
118
+ },
119
+ ],
120
+ isError: false,
121
+ };
122
+ }
123
+ /** Escape a PromQL label value (backslash + double-quote). */
124
+ function escLabel(v) {
125
+ return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
126
+ }
@@ -18,7 +18,7 @@ export declare const getServiceHealthDefinition: {
18
18
  };
19
19
  export declare function getServiceHealthHandler(registry: ConnectorRegistry, args: {
20
20
  service: string;
21
- }, _ctx?: RequestContext): Promise<{
21
+ }, ctx?: RequestContext): Promise<{
22
22
  content: {
23
23
  type: "text";
24
24
  text: string;
@@ -20,9 +20,10 @@ export const getServiceHealthDefinition = {
20
20
  required: ["service"],
21
21
  },
22
22
  };
23
- export async function getServiceHealthHandler(registry, args, _ctx = defaultContext()) {
24
- const metricsConnectors = registry.getBySignal("metrics");
25
- const logConnectors = registry.getBySignal("logs");
23
+ export async function getServiceHealthHandler(registry, args, ctx = defaultContext()) {
24
+ const tenantConnectors = registry.getByTenant(ctx.tenant);
25
+ const metricsConnectors = tenantConnectors.filter((c) => c.signalType === "metrics");
26
+ const logConnectors = tenantConnectors.filter((c) => c.signalType === "logs");
26
27
  // Gather metrics
27
28
  let cpu = 0, memory = 0, errorRate = 0, latencyP99 = 0;
28
29
  const anomalies = [];
@@ -15,7 +15,7 @@ export declare const listServicesDefinition: {
15
15
  };
16
16
  export declare function listServicesHandler(registry: ConnectorRegistry, args: {
17
17
  filter?: string;
18
- }, _ctx?: RequestContext): Promise<{
18
+ }, ctx?: RequestContext): Promise<{
19
19
  content: {
20
20
  type: "text";
21
21
  text: string;
@@ -12,8 +12,9 @@ export const listServicesDefinition = {
12
12
  },
13
13
  },
14
14
  };
15
- export async function listServicesHandler(registry, args, _ctx = defaultContext()) {
16
- const connectors = registry.getAll();
15
+ export async function listServicesHandler(registry, args, ctx = defaultContext()) {
16
+ // Tenant-scoped: only consult sources the caller can see.
17
+ const connectors = registry.getByTenant(ctx.tenant);
17
18
  const allServices = [];
18
19
  for (const connector of connectors) {
19
20
  try {
@@ -8,7 +8,7 @@ export declare const listSourcesDefinition: {
8
8
  properties: {};
9
9
  };
10
10
  };
11
- export declare function listSourcesHandler(registry: ConnectorRegistry, _ctx?: RequestContext): Promise<{
11
+ export declare function listSourcesHandler(registry: ConnectorRegistry, ctx?: RequestContext): Promise<{
12
12
  content: {
13
13
  type: "text";
14
14
  text: string;
@@ -7,9 +7,13 @@ export const listSourcesDefinition = {
7
7
  properties: {},
8
8
  },
9
9
  };
10
- export async function listSourcesHandler(registry, _ctx = defaultContext()) {
10
+ export async function listSourcesHandler(registry, ctx = defaultContext()) {
11
11
  const healthResults = await registry.healthCheckAll();
12
- const connectors = registry.getAll();
12
+ // Tenant-scoped: caller only sees sources tagged with their tenant
13
+ // plus untagged (global) sources. Pre-E7 deployments (no tenant
14
+ // labels on any source) behave identically — every source is
15
+ // global and visible to every tenant.
16
+ const connectors = registry.getByTenant(ctx.tenant);
13
17
  const sources = connectors.map((c) => ({
14
18
  name: c.name,
15
19
  type: c.type,
@@ -36,7 +36,7 @@ export declare function queryLogsHandler(registry: ConnectorRegistry, args: {
36
36
  duration?: string;
37
37
  level?: string;
38
38
  limit?: number;
39
- }, _ctx?: RequestContext): Promise<{
39
+ }, ctx?: RequestContext): Promise<{
40
40
  content: {
41
41
  type: "text";
42
42
  text: string;
@@ -30,7 +30,7 @@ export const queryLogsDefinition = {
30
30
  required: ["service"],
31
31
  },
32
32
  };
33
- export async function queryLogsHandler(registry, args, _ctx = defaultContext()) {
33
+ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
34
34
  const svcErr = validateServiceName(args.service);
35
35
  if (svcErr)
36
36
  return errorResponse(svcErr);
@@ -38,7 +38,7 @@ export async function queryLogsHandler(registry, args, _ctx = defaultContext())
38
38
  const durationErr = validateDuration(duration);
39
39
  if (durationErr)
40
40
  return errorResponse(durationErr);
41
- const connectors = registry.getBySignal("logs");
41
+ const connectors = registry.getByTenant(ctx.tenant).filter((c) => c.signalType === "logs");
42
42
  if (connectors.length === 0) {
43
43
  return {
44
44
  content: [
@@ -36,7 +36,7 @@ export declare function queryMetricsHandler(registry: ConnectorRegistry, args: {
36
36
  duration?: string;
37
37
  source?: string;
38
38
  groupBy?: string;
39
- }, _ctx?: RequestContext): Promise<{
39
+ }, ctx?: RequestContext): Promise<{
40
40
  content: {
41
41
  type: "text";
42
42
  text: string;
@@ -30,12 +30,12 @@ export const queryMetricsDefinition = {
30
30
  required: ["service", "metric"],
31
31
  },
32
32
  };
33
- export async function queryMetricsHandler(registry, args, _ctx = defaultContext()) {
33
+ export async function queryMetricsHandler(registry, args, ctx = defaultContext()) {
34
34
  // Coarse single-tenant source scoping: if the principal is restricted to a
35
35
  // source allow-list, deny an explicit out-of-scope source.
36
- if (_ctx.allowedSources &&
36
+ if (ctx.allowedSources &&
37
37
  args.source &&
38
- !_ctx.allowedSources.includes(args.source)) {
38
+ !ctx.allowedSources.includes(args.source)) {
39
39
  return errorResponse(`forbidden: source "${args.source}" is not in your allowed sources`);
40
40
  }
41
41
  const svcErr = validateServiceName(args.service);
@@ -51,12 +51,25 @@ export async function queryMetricsHandler(registry, args, _ctx = defaultContext(
51
51
  if (args.groupBy && !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(args.groupBy)) {
52
52
  return errorResponse(`Invalid groupBy "${args.groupBy}". Must be a valid Prometheus label name (alphanumeric + underscore, starting with letter/underscore).`);
53
53
  }
54
+ // Tenant-scoped resolution: an explicit `source` from the agent
55
+ // must belong to the caller's tenant (or be a global / untagged
56
+ // source) — cross-tenant sources resolve to undefined exactly like
57
+ // a missing source, preserving the no-existence-leak posture used
58
+ // elsewhere in the tenancy layer.
54
59
  const connectors = args.source
55
- ? [registry.getByName(args.source)].filter(Boolean)
56
- : registry.getBySignal("metrics");
60
+ ? [registry.getByNameForTenant(args.source, ctx.tenant)].filter(Boolean)
61
+ : registry.getByTenant(ctx.tenant).filter((c) => c.signalType === "metrics");
57
62
  if (connectors.length === 0) {
63
+ // Distinct messages but identical posture: the source-named branch
64
+ // could land here either because the source doesn't exist OR
65
+ // belongs to another tenant — both surface as "not found", same
66
+ // shape, no existence leak. The fan-out branch lands here only on
67
+ // an empty registry.
68
+ const msg = args.source
69
+ ? `Source "${args.source}" not found`
70
+ : "No metrics backends configured";
58
71
  return {
59
- content: [{ type: "text", text: JSON.stringify({ error: "No metrics backends configured" }) }],
72
+ content: [{ type: "text", text: JSON.stringify({ error: msg }) }],
60
73
  isError: true,
61
74
  };
62
75
  }
@@ -0,0 +1,47 @@
1
+ import type { ConnectorRegistry } from "../connectors/registry.js";
2
+ import { type RequestContext } from "../context.js";
3
+ export declare const queryTracesDefinition: {
4
+ name: "query_traces";
5
+ description: string;
6
+ inputSchema: {
7
+ type: "object";
8
+ properties: {
9
+ service: {
10
+ type: string;
11
+ description: string;
12
+ };
13
+ duration: {
14
+ type: string;
15
+ description: string;
16
+ };
17
+ filter: {
18
+ type: string;
19
+ description: string;
20
+ };
21
+ limit: {
22
+ type: string;
23
+ description: string;
24
+ };
25
+ errorsOnly: {
26
+ type: string;
27
+ description: string;
28
+ };
29
+ };
30
+ required: string[];
31
+ };
32
+ };
33
+ export declare function queryTracesHandler(registry: ConnectorRegistry, args: {
34
+ service: string;
35
+ duration?: string;
36
+ filter?: string;
37
+ limit?: number;
38
+ errorsOnly?: boolean;
39
+ }, ctx?: RequestContext): Promise<{
40
+ content: {
41
+ type: "text";
42
+ text: string;
43
+ }[];
44
+ isError: boolean;
45
+ }>;
46
+ /** Pure percentile over a numeric array. Returns 0 for empty input. */
47
+ export declare function percentile(values: number[], p: number): number;
@@ -0,0 +1,145 @@
1
+ // query_traces — Phase F13.
2
+ //
3
+ // Surfaces distributed traces from any connector that implements the
4
+ // queryTraces capability. Fans out across every traces-signal
5
+ // connector in the caller's tenant, merges the returned trace
6
+ // summaries, and recomputes a global p50/p95 over the merged set
7
+ // (rather than blindly averaging per-source summaries).
8
+ //
9
+ // Backend support today: a Tempo connector + a Jaeger shim ship as
10
+ // filesystem plugins. Any connector that implements queryTraces
11
+ // participates automatically — no changes needed in the tool layer
12
+ // when a new backend lands.
13
+ import { defaultContext } from "../context.js";
14
+ import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
15
+ export const queryTracesDefinition = {
16
+ name: "query_traces",
17
+ description: [
18
+ "Query distributed traces for a service over a given timeframe.",
19
+ "Returns ranked trace summaries with duration, error status, and span count, plus a p50/p95 duration aggregate across the returned set.",
20
+ "When to use: investigating tail-latency outliers, walking call chains across services for a known time window, or pulling related traces for an anomaly the metric/log tools surfaced first.",
21
+ "Behavior: read-only; results may be capped via `limit` (default 50). `filter` accepts the backend's native query language (TraceQL on Tempo, tag query on Jaeger). When `errorsOnly=true`, only traces with at least one error span are returned.",
22
+ "Related: `query_metrics` for the per-service latency series; `get_blast_radius` for the topology a trace traverses.",
23
+ ].join(" "),
24
+ inputSchema: {
25
+ type: "object",
26
+ properties: {
27
+ service: { type: "string", description: "Service name (e.g. 'payment-service')" },
28
+ duration: { type: "string", description: "Rolling time window (e.g. '5m', '1h'). Default '15m'." },
29
+ filter: { type: "string", description: "Backend-native filter (TraceQL on Tempo, tag query on Jaeger). Optional." },
30
+ limit: { type: "number", description: "Soft cap on returned trace summaries. Default 50." },
31
+ errorsOnly: { type: "boolean", description: "If true, only traces with at least one error span." },
32
+ },
33
+ required: ["service"],
34
+ },
35
+ };
36
+ export async function queryTracesHandler(registry, args, ctx = defaultContext()) {
37
+ const svcErr = validateServiceName(args.service);
38
+ if (svcErr)
39
+ return errorResponse(svcErr);
40
+ const duration = args.duration || "15m";
41
+ const durationErr = validateDuration(duration);
42
+ if (durationErr)
43
+ return errorResponse(durationErr);
44
+ // signalType filter: traces-aware connectors should report "traces"
45
+ // (the new signal type) but we also accept any connector that
46
+ // declares queryTraces — back-compat for connectors that haven't
47
+ // updated their signalType yet.
48
+ const candidates = registry
49
+ .getByTenant(ctx.tenant)
50
+ .filter((c) => typeof c.queryTraces === "function");
51
+ if (candidates.length === 0) {
52
+ return {
53
+ content: [
54
+ {
55
+ type: "text",
56
+ text: JSON.stringify({ error: "No trace backends configured" }),
57
+ },
58
+ ],
59
+ isError: true,
60
+ };
61
+ }
62
+ const results = [];
63
+ const errors = [];
64
+ for (const connector of candidates) {
65
+ if (!connector.queryTraces)
66
+ continue;
67
+ try {
68
+ const r = await connector.queryTraces({
69
+ service: args.service,
70
+ duration,
71
+ filter: args.filter,
72
+ limit: args.limit,
73
+ errorsOnly: args.errorsOnly,
74
+ });
75
+ results.push(r);
76
+ }
77
+ catch (err) {
78
+ const msg = err instanceof Error ? err.message : String(err);
79
+ console.error(`Trace query failed on ${connector.name}:`, msg);
80
+ errors.push(`${connector.name}: ${msg}`);
81
+ }
82
+ }
83
+ if (results.length === 0) {
84
+ return {
85
+ content: [
86
+ {
87
+ type: "text",
88
+ text: JSON.stringify({
89
+ error: errors.length > 0 ? `Query failed: ${errors.join("; ")}` : "No traces returned",
90
+ service: args.service,
91
+ duration,
92
+ }),
93
+ },
94
+ ],
95
+ isError: errors.length > 0,
96
+ };
97
+ }
98
+ // Merge: every source returns its own ranked set; we keep the union
99
+ // and recompute a global p50/p95 over the merged set so the summary
100
+ // reflects what the tool actually returned to the caller.
101
+ const merged = [];
102
+ for (const r of results)
103
+ merged.push(...r.traces);
104
+ // Sort hottest-first by duration, then truncate to the requested limit.
105
+ merged.sort((a, b) => b.durationMs - a.durationMs);
106
+ const limit = args.limit ?? 50;
107
+ const capped = merged.slice(0, limit);
108
+ const errorCount = capped.filter((t) => t.hasError).length;
109
+ const summary = {
110
+ total: capped.length,
111
+ errorCount,
112
+ p50DurationMs: percentile(capped.map((t) => t.durationMs), 0.5),
113
+ p95DurationMs: percentile(capped.map((t) => t.durationMs), 0.95),
114
+ };
115
+ return {
116
+ content: [
117
+ {
118
+ type: "text",
119
+ text: JSON.stringify({
120
+ service: args.service,
121
+ duration,
122
+ sources: results.map((r) => r.source),
123
+ summary,
124
+ traces: capped,
125
+ errors: errors.length > 0 ? errors : undefined,
126
+ }),
127
+ },
128
+ ],
129
+ isError: false,
130
+ };
131
+ }
132
+ /** Pure percentile over a numeric array. Returns 0 for empty input. */
133
+ export function percentile(values, p) {
134
+ if (values.length === 0)
135
+ return 0;
136
+ const sorted = [...values].sort((a, b) => a - b);
137
+ // Linear interpolation between the two surrounding samples.
138
+ const rank = p * (sorted.length - 1);
139
+ const lo = Math.floor(rank);
140
+ const hi = Math.ceil(rank);
141
+ if (lo === hi)
142
+ return sorted[lo] ?? 0;
143
+ const frac = rank - lo;
144
+ return Math.round((sorted[lo] ?? 0) * (1 - frac) + (sorted[hi] ?? 0) * frac);
145
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,110 @@
1
+ import { test } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { queryTracesHandler, percentile } from "./query-traces.js";
4
+ function span(traceId, durationMs, opts = {}) {
5
+ return {
6
+ traceId,
7
+ rootName: "GET /pay",
8
+ rootService: opts.service ?? "payment-service",
9
+ durationMs,
10
+ spanCount: 4,
11
+ hasError: opts.hasError ?? false,
12
+ startTs: "2026-06-06T00:00:00.000Z",
13
+ };
14
+ }
15
+ function fakeRegistry(connectors) {
16
+ return {
17
+ getByTenant: (_tenant) => connectors,
18
+ };
19
+ }
20
+ function parseResponse(r) {
21
+ return JSON.parse(r.content[0].text);
22
+ }
23
+ test("percentile: empty → 0; single value → that value; linear interpolation otherwise", () => {
24
+ assert.equal(percentile([], 0.5), 0);
25
+ assert.equal(percentile([10], 0.5), 10);
26
+ assert.equal(percentile([1, 2, 3, 4, 5], 0.5), 3);
27
+ // 95th of [1..20] sits between index 18 (19) and 19 (20)
28
+ const vs = Array.from({ length: 20 }, (_, i) => i + 1);
29
+ assert.ok(percentile(vs, 0.95) >= 19 && percentile(vs, 0.95) <= 20);
30
+ });
31
+ test("query_traces: rejects invalid service name", async () => {
32
+ const r = await queryTracesHandler(fakeRegistry([]), { service: "bad name with space" });
33
+ assert.equal(r.isError, true);
34
+ });
35
+ test("query_traces: rejects invalid duration", async () => {
36
+ const r = await queryTracesHandler(fakeRegistry([]), { service: "ok", duration: "bogus" });
37
+ assert.equal(r.isError, true);
38
+ });
39
+ test("query_traces: no trace backends configured → isError + clear message", async () => {
40
+ // Connectors without queryTraces are skipped.
41
+ const conn = { name: "prom", signalType: "metrics" };
42
+ const r = await queryTracesHandler(fakeRegistry([conn]), { service: "ok" });
43
+ assert.equal(r.isError, true);
44
+ assert.match(parseResponse(r).error, /No trace backends/);
45
+ });
46
+ test("query_traces: merges spans from every connector that returned, caps to limit, ranks by duration", async () => {
47
+ const tempo = {
48
+ name: "tempo",
49
+ signalType: "metrics",
50
+ queryTraces: async () => ({
51
+ source: "tempo",
52
+ service: "payment",
53
+ traces: [span("aaa", 100), span("bbb", 800), span("ccc", 300)],
54
+ summary: { total: 3, errorCount: 0, p50DurationMs: 300, p95DurationMs: 800 },
55
+ }),
56
+ };
57
+ const jaeger = {
58
+ name: "jaeger",
59
+ signalType: "metrics",
60
+ queryTraces: async () => ({
61
+ source: "jaeger",
62
+ service: "payment",
63
+ traces: [span("ddd", 500, { hasError: true }), span("eee", 200)],
64
+ summary: { total: 2, errorCount: 1, p50DurationMs: 350, p95DurationMs: 500 },
65
+ }),
66
+ };
67
+ const r = await queryTracesHandler(fakeRegistry([tempo, jaeger]), { service: "payment", limit: 4 });
68
+ const body = parseResponse(r);
69
+ assert.deepEqual(body.sources.sort(), ["jaeger", "tempo"]);
70
+ assert.equal(body.traces.length, 4, "limit honoured");
71
+ // Sorted hottest-first
72
+ assert.equal(body.traces[0].durationMs, 800);
73
+ assert.equal(body.traces[1].durationMs, 500);
74
+ assert.equal(body.summary.errorCount, 1);
75
+ });
76
+ test("query_traces: surfaces per-connector errors but still returns successful results", async () => {
77
+ const ok = {
78
+ name: "tempo",
79
+ signalType: "metrics",
80
+ queryTraces: async () => ({
81
+ source: "tempo",
82
+ service: "payment",
83
+ traces: [span("aaa", 50)],
84
+ summary: { total: 1, errorCount: 0, p50DurationMs: 50, p95DurationMs: 50 },
85
+ }),
86
+ };
87
+ const broken = {
88
+ name: "jaeger",
89
+ signalType: "metrics",
90
+ queryTraces: async () => {
91
+ throw new Error("upstream 503");
92
+ },
93
+ };
94
+ const r = await queryTracesHandler(fakeRegistry([ok, broken]), { service: "payment" });
95
+ const body = parseResponse(r);
96
+ assert.equal(body.errors.length, 1);
97
+ assert.equal(body.traces.length, 1);
98
+ });
99
+ test("query_traces: all backends fail → isError true + errors surfaced", async () => {
100
+ const broken = {
101
+ name: "tempo",
102
+ signalType: "metrics",
103
+ queryTraces: async () => {
104
+ throw new Error("upstream gone");
105
+ },
106
+ };
107
+ const r = await queryTracesHandler(fakeRegistry([broken]), { service: "payment" });
108
+ assert.equal(r.isError, true);
109
+ assert.match(parseResponse(r).error, /upstream gone/);
110
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Canonical list of MCP tool names exposed by createMcpServer().
3
+ *
4
+ * Used by:
5
+ * - the Product validator (typo guard): a Product's `tools` allow-
6
+ * list must reference names that actually register, otherwise a
7
+ * bound credential opens an /mcp session with an empty tool set
8
+ * and the agent silently fails.
9
+ * - the keystone integration test in registry-names.test.ts that
10
+ * reads index.ts and asserts the registerTool() call sites match
11
+ * this list 1:1 — a missing entry or an extra one trips the test.
12
+ *
13
+ * Keep this list and the registerTool("name", ...) calls in
14
+ * createMcpServer in sync. The test enforces it.
15
+ */
16
+ export declare const REGISTERED_TOOL_NAMES: readonly ["list_sources", "list_services", "query_metrics", "query_logs", "query_traces", "get_service_health", "detect_anomalies", "get_anomaly_history", "generate_postmortem", "get_topology", "get_blast_radius"];
17
+ export type RegisteredToolName = typeof REGISTERED_TOOL_NAMES[number];
18
+ /** Functional category of a tool, surfaced in /api/tools/registry and
19
+ * used by the Products UI to group the multi-select picker. Keeps
20
+ * operator-facing taxonomy stable even when tool descriptions evolve. */
21
+ export type ToolCategory = "discovery" | "query" | "diagnose" | "topology";
22
+ export interface ToolRegistryEntry {
23
+ name: RegisteredToolName;
24
+ category: ToolCategory;
25
+ /** One-liner — what the tool does, no fluff. The full multi-paragraph
26
+ * description lives in createMcpServer's registerTool() call; this
27
+ * is the catalogue summary the picker shows alongside the name. */
28
+ summary: string;
29
+ }
30
+ export declare const REGISTERED_TOOLS: readonly ToolRegistryEntry[];
31
+ /** Validate a candidate Product tools[] array. Returns the unknown
32
+ * names (empty array = all OK). Pure helper — the caller decides
33
+ * how to surface the rejection (the API handler emits a 422 with a
34
+ * hint of valid names; the YAML loader could decide to warn). */
35
+ export declare function unknownToolNames(tools: readonly string[]): string[];