@thotischner/observability-mcp 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,3 +245,133 @@ test("MCP 2025-11-25: server advertises protocolVersion equal to or newer than 2
245
245
  // recognised date-style version string.
246
246
  assert.match(r.protocolVersion, /^\d{4}-\d{2}-\d{2}$/, "protocolVersion must be a YYYY-MM-DD date");
247
247
  });
248
+ // ---------------------------------------------------------------------------
249
+ // Behavioural tools/call E2E (post-#415 hardening).
250
+ //
251
+ // These run over the REAL /mcp Streamable-HTTP transport against the booted
252
+ // demo stack (integration.yml sets OMCP_CONFORMANCE_URL). They close the gap
253
+ // that let #415 ship: a param can be ADVERTISED in tools/list yet silently
254
+ // stripped by the SDK before it reaches the handler — an advertise-only
255
+ // assertion passes anyway. Here we call the tool and assert the param TAKES
256
+ // EFFECT over the wire. The demo mcp-server runs with OMCP_RAW_QUERY unset and
257
+ // OMCP_IP_ENRICH_FILE unset, so the gate/not-configured assertions are
258
+ // deterministic regardless of backend data.
259
+ // ---------------------------------------------------------------------------
260
+ async function callTool(session, name, args, id = 50) {
261
+ const { response } = await jsonRpc("tools/call", { name, arguments: args }, { id, session });
262
+ if (response.error)
263
+ return { error: response.error };
264
+ const r = response.result;
265
+ const text = r?.content?.[0]?.text;
266
+ let parsed;
267
+ try {
268
+ parsed = text ? JSON.parse(text) : undefined;
269
+ }
270
+ catch {
271
+ parsed = undefined;
272
+ }
273
+ return { isError: r?.isError, parsed, text };
274
+ }
275
+ async function discoverService(session) {
276
+ const r = await callTool(session, "list_services", {}, 40);
277
+ const list = Array.isArray(r.parsed) ? r.parsed : r.parsed?.services;
278
+ const name = Array.isArray(list) && list[0] && (list[0].name || list[0].service);
279
+ return name || "payment-service"; // demo k3s service as fallback
280
+ }
281
+ test("E2E tools/call: query_logs raw_query is refused over the wire when capability off (#415 #3)", opts, async () => {
282
+ const session = await newSession();
283
+ const r = await callTool(session, "query_logs", { raw_query: '{job="x"}' });
284
+ // Proves raw_query SURVIVES transport (not stripped) AND the gate fires E2E.
285
+ const msg = JSON.stringify(r.parsed ?? r.text ?? "");
286
+ assert.match(msg, /raw_query is disabled/i, `expected gate refusal, got ${msg}`);
287
+ });
288
+ test("E2E tools/call: query_metrics raw_query is refused over the wire when capability off (#415 #3)", opts, async () => {
289
+ const session = await newSession();
290
+ const r = await callTool(session, "query_metrics", { raw_query: "up" });
291
+ const msg = JSON.stringify(r.parsed ?? r.text ?? "");
292
+ assert.match(msg, /raw_query is disabled/i, `expected gate refusal, got ${msg}`);
293
+ });
294
+ test("E2E tools/call: enrich_ips dispatches and reports not-configured over the wire (Gap B)", opts, async () => {
295
+ const session = await newSession();
296
+ const r = await callTool(session, "enrich_ips", { ips: ["203.0.113.5"] });
297
+ const msg = JSON.stringify(r.parsed ?? r.text ?? "");
298
+ // Proves the ips param survives transport and the tool dispatches; demo has
299
+ // no OMCP_IP_ENRICH_FILE so the deterministic "not configured" path fires.
300
+ assert.match(msg, /not configured/i, `expected not-configured notice, got ${msg}`);
301
+ });
302
+ test("E2E tools/call: query_logs aggregate takes effect over the wire — grouped result, not raw rows (#415 #2)", opts, async () => {
303
+ const session = await newSession();
304
+ const service = await discoverService(session);
305
+ const r = await callTool(session, "query_logs", {
306
+ service,
307
+ aggregate: { op: "count_over_time", step: "15m" },
308
+ duration: "1h",
309
+ });
310
+ // The aggregate result shape (op/mode/series) is structurally distinct from
311
+ // the raw-rows shape (entries/summary). Asserting the aggregate shape proves
312
+ // the `aggregate` param survived the SDK input parsing and reached the
313
+ // connector — even if the series is empty on a sparse demo window.
314
+ const p = Array.isArray(r.parsed) ? r.parsed[0] : r.parsed;
315
+ assert.ok(p, `expected an aggregate result, got ${JSON.stringify(r)}`);
316
+ assert.equal(p.op, "count_over_time", "result must carry the aggregate op");
317
+ assert.ok("mode" in p && Array.isArray(p.series), "result must be the aggregate shape (mode + series)");
318
+ assert.ok(!("entries" in p), "aggregate path must NOT return the raw-rows shape");
319
+ });
320
+ test("E2E tools/call: query_metrics labels param is accepted over the wire (#415 #4)", opts, async () => {
321
+ const session = await newSession();
322
+ const service = await discoverService(session);
323
+ const r = await callTool(session, "query_metrics", {
324
+ service,
325
+ metric: "cpu",
326
+ labels: { job: service },
327
+ duration: "5m",
328
+ });
329
+ // Must not be a transport/dispatch error; the labels param must be accepted
330
+ // (a structured "no data" result is fine — proves it reached the handler).
331
+ assert.ok(!r.error, `unexpected JSON-RPC error: ${JSON.stringify(r.error)}`);
332
+ assert.ok(r.parsed !== undefined || r.text !== undefined, "expected a CallToolResult payload");
333
+ });
334
+ test("E2E tools/call: get_anomaly_history dispatches without a PromQL 400 crash (H1 over the wire)", opts, async () => {
335
+ const session = await newSession();
336
+ const service = await discoverService(session);
337
+ const r = await callTool(session, "get_anomaly_history", { service, duration: "1h", method: "mad" });
338
+ // After the rawQuery fix the emitted PromQL is valid; empty data is a clean
339
+ // non-error result. The bug produced an invalid-query path that still
340
+ // returned non-error empty, so we assert the dispatch shape is well-formed.
341
+ assert.ok(!r.error, `unexpected JSON-RPC error: ${JSON.stringify(r.error)}`);
342
+ assert.ok(r.parsed !== undefined || r.text !== undefined, "expected a CallToolResult payload");
343
+ });
344
+ test("E2E tools/call: every registered tool dispatches over MCP and returns a CallToolResult", opts, async () => {
345
+ const session = await newSession();
346
+ const service = await discoverService(session);
347
+ // Minimal valid args per tool; tools with required args get discovered/dummy
348
+ // values. A clean isError result (e.g. query_traces 'no trace backends') is
349
+ // acceptable — we only require a shape-conformant dispatch, never a -32xxx.
350
+ const calls = {
351
+ list_sources: {},
352
+ list_services: {},
353
+ query_metrics: { service, metric: "cpu" },
354
+ query_logs: { service },
355
+ get_anomaly_history: { service },
356
+ generate_postmortem: { service },
357
+ query_traces: { service },
358
+ get_service_health: { service },
359
+ detect_anomalies: {},
360
+ get_topology: {},
361
+ get_blast_radius: { resource: service },
362
+ enrich_ips: { ips: ["203.0.113.5"] },
363
+ };
364
+ const { response: list } = await jsonRpc("tools/list", {}, { id: 41, session });
365
+ const names = (list.result?.tools ?? []).map((t) => t.name);
366
+ assert.ok(names.length >= 12, `expected >=12 tools, got ${names.length}`);
367
+ let id = 60;
368
+ for (const name of names) {
369
+ const args = calls[name] ?? {};
370
+ const { response } = await jsonRpc("tools/call", { name, arguments: args }, { id: id++, session });
371
+ if (response.error) {
372
+ assert.fail(`tool ${name} returned a JSON-RPC dispatch error: ${JSON.stringify(response.error)}`);
373
+ }
374
+ const r = response.result;
375
+ assert.ok(Array.isArray(r.content), `tool ${name} must return content[]`);
376
+ }
377
+ });
package/dist/index.js CHANGED
@@ -399,7 +399,7 @@ async function main() {
399
399
  registerTool("list_sources", [
400
400
  "List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
401
401
  "When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
402
- "Behavior: read-only, no side effects. Returns one entry per source with its name, type, configured URL, signal types (metrics/logs), and a live up/down status. Never throws for an unreachable backend — the backend is reported as down instead.",
402
+ "Behavior: read-only, no side effects. Returns one entry per source with its name, type, signal types (metrics/logs), and a live up/down status (the backend URL is intentionally not exposed — it may carry embedded credentials). Never throws for an unreachable backend — the backend is reported as down instead.",
403
403
  "Related: use `list_services` to see what is monitored within these sources.",
404
404
  ].join(" "), {}, async () => {
405
405
  await enforceEntitledAccess(ctx, { tool: "list_sources" });
@@ -589,8 +589,8 @@ async function main() {
589
589
  "Query distributed traces for a service over a given timeframe.",
590
590
  "Returns ranked trace summaries (duration, span count, error status) with a p50/p95 aggregate across the returned set.",
591
591
  "When to use: investigate tail-latency outliers, walk call chains across services for a specific time window, or pull traces related to an anomaly that the metric/log tools surfaced first.",
592
- "Prerequisites: get the exact service name from `list_services`. A Tempo / Jaeger / OTLP connector must be configured.",
593
- "Behavior: read-only. `filter` accepts the backend's native query language (TraceQL on Tempo, tag query on Jaeger). When `errorsOnly=true`, only traces with at least one error span are returned. Default limit is 50.",
592
+ "Prerequisites: get the exact service name from `list_services`. A traces connector (e.g. Tempo, installable from the connector hub) must be configured — none is bundled by default, so without one this returns a clean 'No trace backends configured' result.",
593
+ "Behavior: read-only. `filter` accepts the backend's native query language (e.g. TraceQL on Tempo). When `errorsOnly=true`, only traces with at least one error span are returned. Default limit is 50.",
594
594
  ].join(" "), {
595
595
  service: z.string().describe("Service name (e.g. 'payment-service')."),
596
596
  duration: z.string().optional().describe("Rolling time window, e.g. '5m', '1h'. Default '15m'."),
@@ -1117,11 +1117,11 @@ async function main() {
1117
1117
  // get_anomaly_history queries them back via any Prometheus source
1118
1118
  // pointed at the same TSDB.
1119
1119
  //
1120
- // The detector-side hook that actually records per-anomaly scores
1121
- // is plumbed in F15b (it requires passing this instance into the
1122
- // detectAnomaliesHandler minor surgery deferred). The
1123
- // infrastructure ships now so externally-written omcp_anomaly_score
1124
- // metrics are already queryable end-to-end.
1120
+ // The detector-side hook that records per-anomaly scores is wired:
1121
+ // this instance is passed into detectAnomaliesHandler at the
1122
+ // detect_anomalies tool registration below, so every scan records its
1123
+ // scores. Externally-written omcp_anomaly_score metrics are queryable
1124
+ // end-to-end too.
1125
1125
  const anomalyHistory = new AnomalyHistory(anomalyHistoryFromEnv());
1126
1126
  anomalyHistory.start();
1127
1127
  if (anomalyHistory.isEnabled()) {
@@ -18,8 +18,14 @@ export declare const manifestSchema: z.ZodObject<{
18
18
  capabilities: z.ZodOptional<z.ZodObject<{
19
19
  queryMetrics: z.ZodOptional<z.ZodBoolean>;
20
20
  queryLogs: z.ZodOptional<z.ZodBoolean>;
21
+ queryLogAggregate: z.ZodOptional<z.ZodBoolean>;
22
+ queryTraces: z.ZodOptional<z.ZodBoolean>;
21
23
  listServices: z.ZodOptional<z.ZodBoolean>;
22
24
  listAvailableMetrics: z.ZodOptional<z.ZodBoolean>;
25
+ listResources: z.ZodOptional<z.ZodBoolean>;
26
+ listEdges: z.ZodOptional<z.ZodBoolean>;
27
+ getTopologySnapshot: z.ZodOptional<z.ZodBoolean>;
28
+ watchTopology: z.ZodOptional<z.ZodBoolean>;
23
29
  }, z.core.$strip>>;
24
30
  compat: z.ZodOptional<z.ZodObject<{
25
31
  serverVersion: z.ZodOptional<z.ZodString>;
@@ -24,8 +24,15 @@ export const manifestSchema = z.object({
24
24
  .object({
25
25
  queryMetrics: z.boolean().optional(),
26
26
  queryLogs: z.boolean().optional(),
27
+ queryLogAggregate: z.boolean().optional(),
28
+ queryTraces: z.boolean().optional(),
27
29
  listServices: z.boolean().optional(),
28
30
  listAvailableMetrics: z.boolean().optional(),
31
+ // Topology-provider capabilities (e.g. the Kubernetes connector).
32
+ listResources: z.boolean().optional(),
33
+ listEdges: z.boolean().optional(),
34
+ getTopologySnapshot: z.boolean().optional(),
35
+ watchTopology: z.boolean().optional(),
29
36
  })
30
37
  .optional(),
31
38
  compat: z
@@ -67,13 +67,20 @@ export async function getAnomalyHistoryHandler(registry, args, ctx = defaultCont
67
67
  labelFilters.push(`method="${escLabel(args.method)}"`);
68
68
  const metric = `omcp_anomaly_score{${labelFilters.join(",")}}`;
69
69
  // Fan out across every metrics connector; first non-empty answer wins.
70
+ // CRITICAL: pass the hand-built selector via `rawQuery`, NOT `metric`.
71
+ // The connector's curated path wraps a bare `metric` in `{ {{selector}} }`,
72
+ // which for our already-complete selector produces invalid double-brace
73
+ // PromQL (`omcp_anomaly_score{service="x"}{ job="x" }`) → 400 → the catch
74
+ // below swallowed it and the tool always reported "no history". rawQuery is
75
+ // sent verbatim to /api/v1/query_range (the R4 passthrough).
70
76
  for (const c of candidates) {
71
77
  if (!c.queryMetrics)
72
78
  continue;
73
79
  try {
74
80
  const r = await c.queryMetrics({
75
81
  service: args.service,
76
- metric,
82
+ metric: "omcp_anomaly_score",
83
+ rawQuery: metric,
77
84
  duration,
78
85
  });
79
86
  if (r && Array.isArray(r.values) && r.values.length > 0) {
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,62 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { getAnomalyHistoryHandler } from "./get-anomaly-history.js";
4
+ // Regression guard for the wired-but-dead bug found in the v3.2 audit:
5
+ // get_anomaly_history hand-builds a complete PromQL selector
6
+ // (`omcp_anomaly_score{service="x",method="mad"}`) and must pass it via
7
+ // `rawQuery` (verbatim passthrough), NOT via `metric`. The curated `metric`
8
+ // path wraps the value in `{ {{selector}} }`, which for an already-complete
9
+ // selector yields invalid double-brace PromQL → Prometheus 400 → the handler
10
+ // swallowed it and always returned "no history". This test pins that the
11
+ // connector receives a verbatim rawQuery and never the manglable metric path.
12
+ function fakeRegistry(capture, result) {
13
+ const conn = {
14
+ name: "prom",
15
+ type: "prometheus",
16
+ signalType: "metrics",
17
+ async queryMetrics(q) {
18
+ capture(q);
19
+ if (!result)
20
+ throw new Error("no data");
21
+ return result;
22
+ },
23
+ };
24
+ return { getByTenant: () => [conn] };
25
+ }
26
+ function parse(r) {
27
+ return JSON.parse(r.content[0].text);
28
+ }
29
+ describe("get_anomaly_history — rawQuery wiring (audit regression)", () => {
30
+ it("routes the omcp_anomaly_score selector via rawQuery, not metric", async () => {
31
+ let captured;
32
+ const reg = fakeRegistry((q) => (captured = q), {
33
+ source: "prom",
34
+ service: "payment",
35
+ metric: "omcp_anomaly_score",
36
+ unit: "",
37
+ values: [{ timestamp: "2026-06-09T00:00:00.000Z", value: 0.7 }],
38
+ summary: { current: 0.7, average: 0.7, min: 0.7, max: 0.7, trend: "stable" },
39
+ });
40
+ const out = parse(await getAnomalyHistoryHandler(reg, { service: "payment", method: "mad", duration: "1h" }));
41
+ assert.ok(captured, "connector.queryMetrics must be called");
42
+ // The fix: rawQuery carries the verbatim selector.
43
+ assert.equal(captured.rawQuery, 'omcp_anomaly_score{service="payment",method="mad"}');
44
+ // And it must NOT be smuggled through the curated `metric` path (which would
45
+ // double-brace it). metric may be a bare name placeholder, but never the selector.
46
+ assert.doesNotMatch(String(captured.metric ?? ""), /\{/, "metric must not carry the brace selector");
47
+ // Sanity: the verbatim query has exactly one brace block (no double-brace).
48
+ assert.equal((captured.rawQuery.match(/\{/g) || []).length, 1);
49
+ assert.equal(out.isError, undefined);
50
+ assert.equal(out.values.length, 1);
51
+ });
52
+ it("omits the method filter when not given", async () => {
53
+ let captured;
54
+ const reg = fakeRegistry((q) => (captured = q), {
55
+ source: "prom", service: "api", metric: "omcp_anomaly_score", unit: "",
56
+ values: [{ timestamp: "2026-06-09T00:00:00.000Z", value: 1 }],
57
+ summary: { current: 1, average: 1, min: 1, max: 1, trend: "stable" },
58
+ });
59
+ await getAnomalyHistoryHandler(reg, { service: "api" });
60
+ assert.equal(captured.rawQuery, 'omcp_anomaly_score{service="api"}');
61
+ });
62
+ });
@@ -90,6 +90,21 @@ describe("listServicesHandler", () => {
90
90
  assert.deepEqual(apiGw.sources.sort(), ["loki1", "prom1"]);
91
91
  assert.deepEqual(apiGw.signalTypes.sort(), ["logs", "metrics"]);
92
92
  });
93
+ it("carries per-service labels (e.g. discoveredVia) through the merge (audit: docs/loki.md)", async () => {
94
+ const reg = createRegistryWithMocks([
95
+ createMockConnector({
96
+ name: "loki1", type: "loki", signalType: "logs",
97
+ listServices: async () => [
98
+ { name: "payment-service", source: "loki1", signalType: "logs", labels: { discoveredVia: "service_name" } },
99
+ ],
100
+ }),
101
+ ]);
102
+ const result = await listServicesHandler(reg, {});
103
+ const data = JSON.parse(result.content[0].text);
104
+ const svc = data.services.find((s) => s.name === "payment-service");
105
+ assert.ok(svc, "service must be present");
106
+ assert.equal(svc.labels?.discoveredVia, "service_name", "discoveredVia must surface in the tool output");
107
+ });
93
108
  it("filters services case-insensitively", async () => {
94
109
  const reg = createRegistryWithMocks([
95
110
  createMockConnector({
@@ -25,7 +25,10 @@ export async function listServicesHandler(registry, args, ctx = defaultContext()
25
25
  console.error(`Failed to list services from ${connector.name}:`, err);
26
26
  }
27
27
  }
28
- // Deduplicate by name, merge signal types
28
+ // Deduplicate by name, merge signal types. Carry per-service `labels`
29
+ // (e.g. the Loki connector's `discoveredVia`, documented in docs/loki.md)
30
+ // through the merge so discovery metadata actually surfaces in the tool
31
+ // output; first source to set a given label key wins.
29
32
  const merged = new Map();
30
33
  for (const svc of allServices) {
31
34
  const existing = merged.get(svc.name);
@@ -34,12 +37,15 @@ export async function listServicesHandler(registry, args, ctx = defaultContext()
34
37
  existing.sources.push(svc.source);
35
38
  if (!existing.signalTypes.includes(svc.signalType))
36
39
  existing.signalTypes.push(svc.signalType);
40
+ if (svc.labels)
41
+ existing.labels = { ...svc.labels, ...(existing.labels ?? {}) };
37
42
  }
38
43
  else {
39
44
  merged.set(svc.name, {
40
45
  name: svc.name,
41
46
  sources: [svc.source],
42
47
  signalTypes: [svc.signalType],
48
+ labels: svc.labels ? { ...svc.labels } : undefined,
43
49
  });
44
50
  }
45
51
  }
@@ -6,10 +6,12 @@
6
6
  // summaries, and recomputes a global p50/p95 over the merged set
7
7
  // (rather than blindly averaging per-source summaries).
8
8
  //
9
- // Backend support today: a Tempo connector + a Jaeger shim ship as
10
- // filesystem plugins. Any connector that implements queryTraces
11
- // participates automatically no changes needed in the tool layer
12
- // when a new backend lands.
9
+ // Backend support: no traces backend is bundled by default. The Tempo
10
+ // connector ships in the connector hub (install it to enable traces);
11
+ // there is no Jaeger connector today. Any connector that implements the
12
+ // optional queryTraces capability participates automatically — so on a
13
+ // stack without one the tool returns a clean "No trace backends
14
+ // configured" result rather than failing.
13
15
  import { defaultContext } from "../context.js";
14
16
  import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
15
17
  export const queryTracesDefinition = {
@@ -18,7 +20,7 @@ export const queryTracesDefinition = {
18
20
  "Query distributed traces for a service over a given timeframe.",
19
21
  "Returns ranked trace summaries with duration, error status, and span count, plus a p50/p95 duration aggregate across the returned set.",
20
22
  "When to use: investigating tail-latency outliers, walking call chains across services for a known time window, or pulling related traces for an anomaly the metric/log tools surfaced first.",
21
- "Behavior: read-only; results may be capped via `limit` (default 50). `filter` accepts the backend's native query language (TraceQL on Tempo, tag query on Jaeger). When `errorsOnly=true`, only traces with at least one error span are returned.",
23
+ "Behavior: read-only; results may be capped via `limit` (default 50). `filter` accepts the backend's native query language (e.g. TraceQL on Tempo). When `errorsOnly=true`, only traces with at least one error span are returned.",
22
24
  "Related: `query_metrics` for the per-service latency series; `get_blast_radius` for the topology a trace traverses.",
23
25
  ].join(" "),
24
26
  inputSchema: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thotischner/observability-mcp",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",