@thotischner/observability-mcp 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conformance/mcp-2025-11-25.test.js +183 -0
- package/dist/index.js +138 -23
- package/dist/sdk/manifest-schema.d.ts +6 -0
- package/dist/sdk/manifest-schema.js +7 -0
- package/dist/tools/get-anomaly-history.js +8 -1
- package/dist/tools/get-anomaly-history.test.d.ts +1 -0
- package/dist/tools/get-anomaly-history.test.js +62 -0
- package/dist/tools/handlers.test.js +15 -0
- package/dist/tools/list-services.js +7 -1
- package/dist/tools/query-traces.js +7 -5
- package/package.json +1 -1
|
@@ -245,3 +245,186 @@ test("MCP 2025-11-25: server advertises protocolVersion equal to or newer than 2
|
|
|
245
245
|
// recognised date-style version string.
|
|
246
246
|
assert.match(r.protocolVersion, /^\d{4}-\d{2}-\d{2}$/, "protocolVersion must be a YYYY-MM-DD date");
|
|
247
247
|
});
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Behavioural tools/call E2E (post-#415 hardening).
|
|
250
|
+
//
|
|
251
|
+
// These run over the REAL /mcp Streamable-HTTP transport against the booted
|
|
252
|
+
// demo stack (integration.yml sets OMCP_CONFORMANCE_URL). They close the gap
|
|
253
|
+
// that let #415 ship: a param can be ADVERTISED in tools/list yet silently
|
|
254
|
+
// stripped by the SDK before it reaches the handler — an advertise-only
|
|
255
|
+
// assertion passes anyway. Here we call the tool and assert the param TAKES
|
|
256
|
+
// EFFECT over the wire. The demo mcp-server runs with OMCP_RAW_QUERY unset and
|
|
257
|
+
// OMCP_IP_ENRICH_FILE unset, so the gate/not-configured assertions are
|
|
258
|
+
// deterministic regardless of backend data.
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
async function callTool(session, name, args, id = 50) {
|
|
261
|
+
const { response } = await jsonRpc("tools/call", { name, arguments: args }, { id, session });
|
|
262
|
+
if (response.error)
|
|
263
|
+
return { error: response.error };
|
|
264
|
+
const r = response.result;
|
|
265
|
+
const text = r?.content?.[0]?.text;
|
|
266
|
+
let parsed;
|
|
267
|
+
try {
|
|
268
|
+
parsed = text ? JSON.parse(text) : undefined;
|
|
269
|
+
}
|
|
270
|
+
catch {
|
|
271
|
+
parsed = undefined;
|
|
272
|
+
}
|
|
273
|
+
return { isError: r?.isError, parsed, text };
|
|
274
|
+
}
|
|
275
|
+
async function discoverService(session) {
|
|
276
|
+
const r = await callTool(session, "list_services", {}, 40);
|
|
277
|
+
const list = Array.isArray(r.parsed) ? r.parsed : r.parsed?.services;
|
|
278
|
+
const name = Array.isArray(list) && list[0] && (list[0].name || list[0].service);
|
|
279
|
+
return name || "payment-service"; // demo k3s service as fallback
|
|
280
|
+
}
|
|
281
|
+
test("E2E tools/call: query_logs raw_query is refused over the wire when capability off (#415 #3)", opts, async () => {
|
|
282
|
+
const session = await newSession();
|
|
283
|
+
const r = await callTool(session, "query_logs", { raw_query: '{job="x"}' });
|
|
284
|
+
// Proves raw_query SURVIVES transport (not stripped) AND the gate fires E2E.
|
|
285
|
+
const msg = JSON.stringify(r.parsed ?? r.text ?? "");
|
|
286
|
+
assert.match(msg, /raw_query is disabled/i, `expected gate refusal, got ${msg}`);
|
|
287
|
+
});
|
|
288
|
+
test("E2E tools/call: query_metrics raw_query is refused over the wire when capability off (#415 #3)", opts, async () => {
|
|
289
|
+
const session = await newSession();
|
|
290
|
+
const r = await callTool(session, "query_metrics", { raw_query: "up" });
|
|
291
|
+
const msg = JSON.stringify(r.parsed ?? r.text ?? "");
|
|
292
|
+
assert.match(msg, /raw_query is disabled/i, `expected gate refusal, got ${msg}`);
|
|
293
|
+
});
|
|
294
|
+
test("E2E tools/call: enrich_ips dispatches and reports not-configured over the wire (Gap B)", opts, async () => {
|
|
295
|
+
const session = await newSession();
|
|
296
|
+
const r = await callTool(session, "enrich_ips", { ips: ["203.0.113.5"] });
|
|
297
|
+
const msg = JSON.stringify(r.parsed ?? r.text ?? "");
|
|
298
|
+
// Proves the ips param survives transport and the tool dispatches; demo has
|
|
299
|
+
// no OMCP_IP_ENRICH_FILE so the deterministic "not configured" path fires.
|
|
300
|
+
assert.match(msg, /not configured/i, `expected not-configured notice, got ${msg}`);
|
|
301
|
+
});
|
|
302
|
+
test("E2E tools/call: query_logs aggregate takes effect over the wire — grouped result, not raw rows (#415 #2)", opts, async () => {
|
|
303
|
+
const session = await newSession();
|
|
304
|
+
const service = await discoverService(session);
|
|
305
|
+
const r = await callTool(session, "query_logs", {
|
|
306
|
+
service,
|
|
307
|
+
aggregate: { op: "count_over_time", step: "15m" },
|
|
308
|
+
duration: "1h",
|
|
309
|
+
});
|
|
310
|
+
// The aggregate result shape (op/mode/series) is structurally distinct from
|
|
311
|
+
// the raw-rows shape (entries/summary). Asserting the aggregate shape proves
|
|
312
|
+
// the `aggregate` param survived the SDK input parsing and reached the
|
|
313
|
+
// connector — even if the series is empty on a sparse demo window.
|
|
314
|
+
const p = Array.isArray(r.parsed) ? r.parsed[0] : r.parsed;
|
|
315
|
+
assert.ok(p, `expected an aggregate result, got ${JSON.stringify(r)}`);
|
|
316
|
+
assert.equal(p.op, "count_over_time", "result must carry the aggregate op");
|
|
317
|
+
assert.ok("mode" in p && Array.isArray(p.series), "result must be the aggregate shape (mode + series)");
|
|
318
|
+
assert.ok(!("entries" in p), "aggregate path must NOT return the raw-rows shape");
|
|
319
|
+
});
|
|
320
|
+
test("E2E tools/call: query_metrics labels param is accepted over the wire (#415 #4)", opts, async () => {
|
|
321
|
+
const session = await newSession();
|
|
322
|
+
const service = await discoverService(session);
|
|
323
|
+
const r = await callTool(session, "query_metrics", {
|
|
324
|
+
service,
|
|
325
|
+
metric: "cpu",
|
|
326
|
+
labels: { job: service },
|
|
327
|
+
duration: "5m",
|
|
328
|
+
});
|
|
329
|
+
// Must not be a transport/dispatch error; the labels param must be accepted
|
|
330
|
+
// (a structured "no data" result is fine — proves it reached the handler).
|
|
331
|
+
assert.ok(!r.error, `unexpected JSON-RPC error: ${JSON.stringify(r.error)}`);
|
|
332
|
+
assert.ok(r.parsed !== undefined || r.text !== undefined, "expected a CallToolResult payload");
|
|
333
|
+
});
|
|
334
|
+
test("E2E tools/call: get_anomaly_history dispatches without a PromQL 400 crash (H1 over the wire)", opts, async () => {
|
|
335
|
+
const session = await newSession();
|
|
336
|
+
const service = await discoverService(session);
|
|
337
|
+
const r = await callTool(session, "get_anomaly_history", { service, duration: "1h", method: "mad" });
|
|
338
|
+
// After the rawQuery fix the emitted PromQL is valid; empty data is a clean
|
|
339
|
+
// non-error result. The bug produced an invalid-query path that still
|
|
340
|
+
// returned non-error empty, so we assert the dispatch shape is well-formed.
|
|
341
|
+
assert.ok(!r.error, `unexpected JSON-RPC error: ${JSON.stringify(r.error)}`);
|
|
342
|
+
assert.ok(r.parsed !== undefined || r.text !== undefined, "expected a CallToolResult payload");
|
|
343
|
+
});
|
|
344
|
+
test("E2E tools/call: every registered tool dispatches over MCP and returns a CallToolResult", opts, async () => {
|
|
345
|
+
const session = await newSession();
|
|
346
|
+
const service = await discoverService(session);
|
|
347
|
+
// Minimal valid args per tool; tools with required args get discovered/dummy
|
|
348
|
+
// values. A clean isError result (e.g. query_traces 'no trace backends') is
|
|
349
|
+
// acceptable — we only require a shape-conformant dispatch, never a -32xxx.
|
|
350
|
+
const calls = {
|
|
351
|
+
list_sources: {},
|
|
352
|
+
list_services: {},
|
|
353
|
+
query_metrics: { service, metric: "cpu" },
|
|
354
|
+
query_logs: { service },
|
|
355
|
+
get_anomaly_history: { service },
|
|
356
|
+
generate_postmortem: { service },
|
|
357
|
+
query_traces: { service },
|
|
358
|
+
get_service_health: { service },
|
|
359
|
+
detect_anomalies: {},
|
|
360
|
+
get_topology: {},
|
|
361
|
+
get_blast_radius: { resource: service },
|
|
362
|
+
enrich_ips: { ips: ["203.0.113.5"] },
|
|
363
|
+
};
|
|
364
|
+
const { response: list } = await jsonRpc("tools/list", {}, { id: 41, session });
|
|
365
|
+
const names = (list.result?.tools ?? []).map((t) => t.name);
|
|
366
|
+
assert.ok(names.length >= 12, `expected >=12 tools, got ${names.length}`);
|
|
367
|
+
let id = 60;
|
|
368
|
+
for (const name of names) {
|
|
369
|
+
const args = calls[name] ?? {};
|
|
370
|
+
const { response } = await jsonRpc("tools/call", { name, arguments: args }, { id: id++, session });
|
|
371
|
+
if (response.error) {
|
|
372
|
+
assert.fail(`tool ${name} returned a JSON-RPC dispatch error: ${JSON.stringify(response.error)}`);
|
|
373
|
+
}
|
|
374
|
+
const r = response.result;
|
|
375
|
+
assert.ok(Array.isArray(r.content), `tool ${name} must return content[]`);
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
test("E2E tools/list: every builtin tool advertises ToolAnnotations (readOnlyHint)", opts, async () => {
|
|
379
|
+
// AX hardening: all 12 builtin tools are read-only; clients (e.g. Claude)
|
|
380
|
+
// use these hints for auto-approve decisions, so they must be advertised
|
|
381
|
+
// over the live transport — not just present in the registration source.
|
|
382
|
+
const session = await newSession();
|
|
383
|
+
const { response } = await jsonRpc("tools/list", {}, { id: 2, session });
|
|
384
|
+
const r = response.result;
|
|
385
|
+
const tools = r.tools ?? [];
|
|
386
|
+
assert.ok(tools.length >= 12, `expected >=12 tools, got ${tools.length}`);
|
|
387
|
+
// Federated tools (namespaced `<prefix>.<tool>`) proxy upstream metadata and
|
|
388
|
+
// may legitimately lack annotations — only the builtin set is asserted.
|
|
389
|
+
const builtin = tools.filter((t) => t.name && !t.name.includes("."));
|
|
390
|
+
for (const t of builtin) {
|
|
391
|
+
assert.equal(t.annotations?.readOnlyHint, true, `tool ${t.name} must advertise annotations.readOnlyHint=true`);
|
|
392
|
+
assert.ok(t.annotations?.title, `tool ${t.name} must advertise annotations.title`);
|
|
393
|
+
}
|
|
394
|
+
});
|
|
395
|
+
test("E2E: builtin resource agent-usage-guide is listed and readable", opts, async () => {
|
|
396
|
+
// AX: the agent usage guide ships as an MCP resource so clients can pull
|
|
397
|
+
// it into context without a web fetch. Assert list + read over the wire.
|
|
398
|
+
const session = await newSession();
|
|
399
|
+
const list = await jsonRpc("resources/list", {}, { id: 10, session });
|
|
400
|
+
const resources = list.response.result?.resources ?? [];
|
|
401
|
+
assert.ok(resources.some((r) => r.uri === "omcp://guide/agent-usage"), `agent-usage-guide resource must be listed, got ${JSON.stringify(resources.map((r) => r.uri))}`);
|
|
402
|
+
const read = await jsonRpc("resources/read", { uri: "omcp://guide/agent-usage" }, { id: 11, session });
|
|
403
|
+
const contents = read.response.result?.contents ?? [];
|
|
404
|
+
assert.ok((contents[0]?.text ?? "").includes("Triage recipe"), "guide text must round-trip");
|
|
405
|
+
});
|
|
406
|
+
test("E2E: builtin prompts triage-incident + write-postmortem are listed and resolvable", opts, async () => {
|
|
407
|
+
const session = await newSession();
|
|
408
|
+
const list = await jsonRpc("prompts/list", {}, { id: 12, session });
|
|
409
|
+
const prompts = list.response.result?.prompts ?? [];
|
|
410
|
+
for (const name of ["triage-incident", "write-postmortem"]) {
|
|
411
|
+
assert.ok(prompts.some((p) => p.name === name), `prompt ${name} must be listed`);
|
|
412
|
+
}
|
|
413
|
+
const got = await jsonRpc("prompts/get", { name: "triage-incident", arguments: { service: "ci-probe" } }, { id: 13, session });
|
|
414
|
+
const msgs = got.response.result?.messages ?? [];
|
|
415
|
+
assert.ok((msgs[0]?.content?.text ?? "").includes('"ci-probe"'), "prompt must interpolate the service arg");
|
|
416
|
+
});
|
|
417
|
+
test("E2E: /llms.txt is served and reflects the canonical tool registry", opts, async () => {
|
|
418
|
+
// llms.txt convention: LLM-readable summary at the server root. Generated
|
|
419
|
+
// from registry-names.ts, so this also guards against registry drift.
|
|
420
|
+
const base = URL_ENV.replace(/\/mcp\/?$/, "");
|
|
421
|
+
const res = await fetch(`${base}/llms.txt`);
|
|
422
|
+
assert.equal(res.status, 200);
|
|
423
|
+
assert.match(res.headers.get("content-type") ?? "", /text\/plain/);
|
|
424
|
+
const text = await res.text();
|
|
425
|
+
assert.match(text, /^# observability-mcp/, "must start with the llms.txt H1");
|
|
426
|
+
for (const name of ["query_logs", "query_metrics", "enrich_ips", "get_blast_radius"]) {
|
|
427
|
+
assert.ok(text.includes(`- ${name} (`), `tool ${name} must be listed`);
|
|
428
|
+
}
|
|
429
|
+
assert.ok(text.includes("for-agents"), "must link the for-agents guide");
|
|
430
|
+
});
|
package/dist/index.js
CHANGED
|
@@ -393,15 +393,95 @@ async function main() {
|
|
|
393
393
|
}
|
|
394
394
|
return mcpServer.prompt(name, ...rest);
|
|
395
395
|
});
|
|
396
|
-
//
|
|
397
|
-
|
|
398
|
-
|
|
396
|
+
// --- Builtin resources + prompts (agent experience) -------------------
|
|
397
|
+
// The usage guide is the distilled, agent-validated workflow from issue
|
|
398
|
+
// #415 — served as an MCP resource so a client can pull it into context
|
|
399
|
+
// without a web fetch. Prompts compose the existing read-only tools into
|
|
400
|
+
// the two flows agents run most.
|
|
401
|
+
registerResource("agent-usage-guide", "omcp://guide/agent-usage", {
|
|
402
|
+
description: "How to use this gateway effectively as an agent: the proven filter→aggregate→enrich triage recipe, signal-vs-silence behaviours, and the operator flags that unlock optional tools.",
|
|
403
|
+
mimeType: "text/markdown",
|
|
404
|
+
}, async (uri) => ({
|
|
405
|
+
contents: [
|
|
406
|
+
{
|
|
407
|
+
uri: uri.toString(),
|
|
408
|
+
mimeType: "text/markdown",
|
|
409
|
+
text: [
|
|
410
|
+
"# Agent usage guide (observability-mcp)",
|
|
411
|
+
"",
|
|
412
|
+
"All tools are read-only (`readOnlyHint: true`). The golden rule:",
|
|
413
|
+
"**filter and aggregate server-side — ask for numbers, not haystacks.**",
|
|
414
|
+
"",
|
|
415
|
+
"## Triage recipe (agent-validated, issue #415)",
|
|
416
|
+
'1. `query_logs` with `labels` (exact-match field filters, e.g. {"environment":"prod"})',
|
|
417
|
+
' and `aggregate` ({"op":"topk","by":["ip"],"k":10} or {"op":"count_over_time","step":"15m"})',
|
|
418
|
+
" — pushed down to LogQL, returns a handful of numbers instead of thousands of rows.",
|
|
419
|
+
"2. `enrich_ips` with the IPs from step 1 — offline geo/ASN/hosting-flag lookup",
|
|
420
|
+
" (bot-vs-human signal). Requires OMCP_IP_ENRICH_FILE on the operator side.",
|
|
421
|
+
'3. `query_metrics` with `labels` ({"route":"/checkout"}) and `groupBy` to scope a',
|
|
422
|
+
" curated metric to the slice you care about.",
|
|
423
|
+
"",
|
|
424
|
+
"## Incident flow",
|
|
425
|
+
"`detect_anomalies` (fleet scan) → `get_service_health` (one-service verdict) →",
|
|
426
|
+
"`get_blast_radius` (shared-host impact) → `generate_postmortem` (markdown report).",
|
|
427
|
+
"",
|
|
428
|
+
"## When something is empty or refused",
|
|
429
|
+
"The gateway explains itself: no topology connector → explicit note; no trace",
|
|
430
|
+
"backend → explicit error; `raw_query` disabled → message naming OMCP_RAW_QUERY=on;",
|
|
431
|
+
"redacted values → a `_redacted` count in the result. Relay flag names to your",
|
|
432
|
+
"operator verbatim — the messages are written to be forwarded.",
|
|
433
|
+
"",
|
|
434
|
+
"## Report findings",
|
|
435
|
+
"Structured agent reports drive releases here (see issue #415). File one:",
|
|
436
|
+
"https://github.com/ThoTischner/observability-mcp/issues/new?template=agent-report.yml",
|
|
437
|
+
"Full guide: https://thotischner.github.io/observability-mcp/for-agents/",
|
|
438
|
+
].join("\n"),
|
|
439
|
+
},
|
|
440
|
+
],
|
|
441
|
+
}));
|
|
442
|
+
registerPrompt("triage-incident", "Guided incident triage for one service: health verdict, anomaly scan, blast radius, and the log slice that matters.", { service: z.string().describe("Service name as returned by list_services") }, ({ service }) => ({
|
|
443
|
+
messages: [
|
|
444
|
+
{
|
|
445
|
+
role: "user",
|
|
446
|
+
content: {
|
|
447
|
+
type: "text",
|
|
448
|
+
text: [
|
|
449
|
+
`Triage the service "${service}" using the observability-mcp tools, in this order:`,
|
|
450
|
+
`1. get_service_health {"service":"${service}"} — the current verdict and why.`,
|
|
451
|
+
`2. detect_anomalies {"service":"${service}","duration":"1h"} — what is statistically off.`,
|
|
452
|
+
`3. get_blast_radius {"resource":"${service}"} — who else fails if its host fails.`,
|
|
453
|
+
`4. query_logs {"service":"${service}","level":"error","aggregate":{"op":"count_over_time","step":"5m"},"duration":"1h"} — error-volume shape over time; drill into raw rows only for the spike window.`,
|
|
454
|
+
"Then summarise: current state, most likely cause, blast radius, and the next diagnostic step. Prefer aggregated queries over raw log dumps.",
|
|
455
|
+
].join("\n"),
|
|
456
|
+
},
|
|
457
|
+
},
|
|
458
|
+
],
|
|
459
|
+
}));
|
|
460
|
+
registerPrompt("write-postmortem", "Generate and refine a post-incident report for one service over a window.", {
|
|
461
|
+
service: z.string().describe("Service name as returned by list_services"),
|
|
462
|
+
duration: z.string().optional().describe("Look-back window, e.g. '1h', '6h'. Default '1h'."),
|
|
463
|
+
}, ({ service, duration }) => ({
|
|
464
|
+
messages: [
|
|
465
|
+
{
|
|
466
|
+
role: "user",
|
|
467
|
+
content: {
|
|
468
|
+
type: "text",
|
|
469
|
+
text: [
|
|
470
|
+
`Produce a post-mortem for "${service}" over the last ${duration || "1h"}:`,
|
|
471
|
+
`1. generate_postmortem {"service":"${service}","duration":"${duration || "1h"}"} — the stitched report (anomaly timeline, blast radius, traces, log highlights).`,
|
|
472
|
+
`2. Verify its claims: get_anomaly_history {"service":"${service}","duration":"${duration || "1h"}"} for the score timeline, and query_logs with an aggregate for the error shape.`,
|
|
473
|
+
"3. Rewrite the result as a blameless post-mortem: summary, impact, timeline, root-cause hypothesis (with confidence), follow-ups. Mark any section the gateway reported as missing data instead of inventing content.",
|
|
474
|
+
].join("\n"),
|
|
475
|
+
},
|
|
476
|
+
},
|
|
477
|
+
],
|
|
478
|
+
}));
|
|
399
479
|
registerTool("list_sources", [
|
|
400
480
|
"List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
|
|
401
481
|
"When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
|
|
402
|
-
"Behavior: read-only, no side effects. Returns one entry per source with its name, type,
|
|
482
|
+
"Behavior: read-only, no side effects. Returns one entry per source with its name, type, signal types (metrics/logs), and a live up/down status (the backend URL is intentionally not exposed — it may carry embedded credentials). Never throws for an unreachable backend — the backend is reported as down instead.",
|
|
403
483
|
"Related: use `list_services` to see what is monitored within these sources.",
|
|
404
|
-
].join(" "), {}, async () => {
|
|
484
|
+
].join(" "), {}, { title: "List Sources", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async () => {
|
|
405
485
|
await enforceEntitledAccess(ctx, { tool: "list_sources" });
|
|
406
486
|
return withToolMetrics("list_sources", () => listSourcesHandler(registry, ctx));
|
|
407
487
|
});
|
|
@@ -415,7 +495,7 @@ async function main() {
|
|
|
415
495
|
.string()
|
|
416
496
|
.optional()
|
|
417
497
|
.describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
|
|
418
|
-
}, async (args) => {
|
|
498
|
+
}, { title: "List Services", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
419
499
|
await enforceEntitledAccess(ctx, { tool: "list_services" });
|
|
420
500
|
const result = await withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx));
|
|
421
501
|
return enrichToolServicesText(result, ctx);
|
|
@@ -458,7 +538,7 @@ async function main() {
|
|
|
458
538
|
.string()
|
|
459
539
|
.optional()
|
|
460
540
|
.describe("Optional escape hatch: a verbatim PromQL expression, run as-is over the range — for ad-hoc queries the curated `metric` catalog can't express (any series, any function, broken down by any label). When set, `metric`/`service`/`groupBy`/`labels` are ignored. DISABLED by default; the operator must enable the raw-query capability (OMCP_RAW_QUERY=on) or the call is refused. Still tenant-scoped and source-allow-listed."),
|
|
461
|
-
}, async (args) => {
|
|
541
|
+
}, { title: "Query Metrics", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
462
542
|
await enforceEntitledAccess(ctx, { tool: "query_metrics", source: args?.source, service: args?.service });
|
|
463
543
|
const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx, { allowRawQuery: RAW_QUERY_ENABLED }));
|
|
464
544
|
return chargeTokenBudget(result, ctx, "query_metrics");
|
|
@@ -525,7 +605,7 @@ async function main() {
|
|
|
525
605
|
.string()
|
|
526
606
|
.optional()
|
|
527
607
|
.describe("Optional escape hatch: a verbatim LogQL log query, run as-is — for selectors/pipelines the curated params can't express. When set, `service`/`labels`/`level`/`query` are ignored and it is mutually exclusive with `aggregate` (express aggregation in the LogQL itself). DISABLED by default; the operator must enable the raw-query capability (OMCP_RAW_QUERY=on) or the call is refused. Redaction still applies to the returned log lines."),
|
|
528
|
-
}, async (args) => {
|
|
608
|
+
}, { title: "Query Logs", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
529
609
|
await enforceEntitledAccess(ctx, { tool: "query_logs", source: args?.source, service: args?.service });
|
|
530
610
|
const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx, { allowRawQuery: RAW_QUERY_ENABLED }));
|
|
531
611
|
// Redact PII / secrets from the log payload before it crosses the
|
|
@@ -565,7 +645,7 @@ async function main() {
|
|
|
565
645
|
service: z.string().describe("Service name to filter on."),
|
|
566
646
|
duration: z.string().optional().describe("Rolling window, e.g. '1h', '24h'. Default '1h'."),
|
|
567
647
|
method: z.string().optional().describe("Filter by detector method ('mad' / 'seasonality' / 'correlator'). Optional."),
|
|
568
|
-
}, async (args) => {
|
|
648
|
+
}, { title: "Anomaly History", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
569
649
|
await enforceEntitledAccess(ctx, { tool: "get_anomaly_history", service: args?.service });
|
|
570
650
|
const result = await withToolMetrics("get_anomaly_history", () => getAnomalyHistoryHandler(registry, args, ctx));
|
|
571
651
|
return chargeTokenBudget(result, ctx, "get_anomaly_history");
|
|
@@ -580,7 +660,7 @@ async function main() {
|
|
|
580
660
|
service: z.string().describe("Suspected root-cause service."),
|
|
581
661
|
duration: z.string().optional().describe("Window length, e.g. '1h', '6h'. Default '1h'."),
|
|
582
662
|
format: z.enum(["markdown", "json"]).optional().describe("'markdown' (default) or 'json'."),
|
|
583
|
-
}, async (args) => {
|
|
663
|
+
}, { title: "Generate Postmortem", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
584
664
|
await enforceEntitledAccess(ctx, { tool: "generate_postmortem", service: args?.service });
|
|
585
665
|
const result = await withToolMetrics("generate_postmortem", () => generatePostmortemHandler(registry, args, ctx));
|
|
586
666
|
return chargeTokenBudget(result, ctx, "generate_postmortem");
|
|
@@ -589,15 +669,15 @@ async function main() {
|
|
|
589
669
|
"Query distributed traces for a service over a given timeframe.",
|
|
590
670
|
"Returns ranked trace summaries (duration, span count, error status) with a p50/p95 aggregate across the returned set.",
|
|
591
671
|
"When to use: investigate tail-latency outliers, walk call chains across services for a specific time window, or pull traces related to an anomaly that the metric/log tools surfaced first.",
|
|
592
|
-
"Prerequisites: get the exact service name from `list_services`. A Tempo
|
|
593
|
-
"Behavior: read-only. `filter` accepts the backend's native query language (TraceQL on Tempo
|
|
672
|
+
"Prerequisites: get the exact service name from `list_services`. A traces connector (e.g. Tempo, installable from the connector hub) must be configured — none is bundled by default, so without one this returns a clean 'No trace backends configured' result.",
|
|
673
|
+
"Behavior: read-only. `filter` accepts the backend's native query language (e.g. TraceQL on Tempo). When `errorsOnly=true`, only traces with at least one error span are returned. Default limit is 50.",
|
|
594
674
|
].join(" "), {
|
|
595
675
|
service: z.string().describe("Service name (e.g. 'payment-service')."),
|
|
596
676
|
duration: z.string().optional().describe("Rolling time window, e.g. '5m', '1h'. Default '15m'."),
|
|
597
677
|
filter: z.string().optional().describe("Backend-native filter (TraceQL on Tempo, tag query on Jaeger). Optional."),
|
|
598
678
|
limit: z.number().int().positive().optional().describe("Soft cap on returned trace summaries. Default 50."),
|
|
599
679
|
errorsOnly: z.boolean().optional().describe("If true, only traces with at least one error span."),
|
|
600
|
-
}, async (args) => {
|
|
680
|
+
}, { title: "Query Traces", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
601
681
|
await enforceEntitledAccess(ctx, { tool: "query_traces", service: args?.service });
|
|
602
682
|
const result = await withToolMetrics("query_traces", () => queryTracesHandler(registry, args, ctx));
|
|
603
683
|
return chargeTokenBudget(result, ctx, "query_traces");
|
|
@@ -611,7 +691,7 @@ async function main() {
|
|
|
611
691
|
service: z
|
|
612
692
|
.string()
|
|
613
693
|
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
|
|
614
|
-
}, async (args) => {
|
|
694
|
+
}, { title: "Service Health", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
615
695
|
await enforceEntitledAccess(ctx, { tool: "get_service_health", service: args?.service });
|
|
616
696
|
const result = await withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx));
|
|
617
697
|
const enriched = enrichToolHealthText(result, String(args?.service ?? ""), ctx);
|
|
@@ -635,7 +715,7 @@ async function main() {
|
|
|
635
715
|
.enum(["low", "medium", "high"])
|
|
636
716
|
.optional()
|
|
637
717
|
.describe("Optional. Detection threshold: 'low' flags only strong deviations (>3σ), 'medium' is balanced (>2σ), 'high' is most sensitive and noisier (>1.5σ). Default: 'medium'."),
|
|
638
|
-
}, async (args) => {
|
|
718
|
+
}, { title: "Detect Anomalies", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
639
719
|
await enforceEntitledAccess(ctx, { tool: "detect_anomalies", source: args?.source, service: args?.service });
|
|
640
720
|
// P1: pass the anomaly-history sink so detected scores flow
|
|
641
721
|
// into the TSDB and `get_anomaly_history` returns real data.
|
|
@@ -666,7 +746,7 @@ async function main() {
|
|
|
666
746
|
.max(5000)
|
|
667
747
|
.optional()
|
|
668
748
|
.describe("Optional. Maximum resources to return; edges are trimmed to the kept set. Default 500, max 5000."),
|
|
669
|
-
}, async (args) => {
|
|
749
|
+
}, { title: "Topology Graph", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
670
750
|
await enforceEntitledAccess(ctx, { tool: "get_topology", source: args?.source });
|
|
671
751
|
return withToolMetrics("get_topology", () => getTopologyHandler(registry, args, ctx));
|
|
672
752
|
});
|
|
@@ -679,7 +759,7 @@ async function main() {
|
|
|
679
759
|
resource: z
|
|
680
760
|
.string()
|
|
681
761
|
.describe("Required. Resource to evaluate. Accepts the canonical id (e.g. 'k8s:pod:default/checkout-7f89d'), the exact resource name (e.g. 'checkout-7f89d'), or a unique substring of either."),
|
|
682
|
-
}, async (args) => {
|
|
762
|
+
}, { title: "Blast Radius", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
683
763
|
await enforceEntitledAccess(ctx, { tool: "get_blast_radius" });
|
|
684
764
|
return withToolMetrics("get_blast_radius", () => getBlastRadiusHandler(registry, args, ctx));
|
|
685
765
|
});
|
|
@@ -692,7 +772,7 @@ async function main() {
|
|
|
692
772
|
ips: z
|
|
693
773
|
.array(z.string())
|
|
694
774
|
.describe("Required. IPv4 address strings to enrich (e.g. ['203.0.113.5','198.51.100.9']). Max 1000 per call; invalid entries are returned with found=false rather than failing the batch."),
|
|
695
|
-
}, async (args) => {
|
|
775
|
+
}, { title: "Enrich IPs", readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false }, async (args) => {
|
|
696
776
|
await enforceEntitledAccess(ctx, { tool: "enrich_ips" });
|
|
697
777
|
return withToolMetrics("enrich_ips", async () => enrichIpsHandler(ipEnrichment, args, ctx));
|
|
698
778
|
});
|
|
@@ -1117,11 +1197,11 @@ async function main() {
|
|
|
1117
1197
|
// get_anomaly_history queries them back via any Prometheus source
|
|
1118
1198
|
// pointed at the same TSDB.
|
|
1119
1199
|
//
|
|
1120
|
-
// The detector-side hook that
|
|
1121
|
-
//
|
|
1122
|
-
//
|
|
1123
|
-
//
|
|
1124
|
-
//
|
|
1200
|
+
// The detector-side hook that records per-anomaly scores is wired:
|
|
1201
|
+
// this instance is passed into detectAnomaliesHandler at the
|
|
1202
|
+
// detect_anomalies tool registration below, so every scan records its
|
|
1203
|
+
// scores. Externally-written omcp_anomaly_score metrics are queryable
|
|
1204
|
+
// end-to-end too.
|
|
1125
1205
|
const anomalyHistory = new AnomalyHistory(anomalyHistoryFromEnv());
|
|
1126
1206
|
anomalyHistory.start();
|
|
1127
1207
|
if (anomalyHistory.isEnabled()) {
|
|
@@ -1195,6 +1275,41 @@ async function main() {
|
|
|
1195
1275
|
// enough to skip the request-counter middleware.
|
|
1196
1276
|
let ready = false;
|
|
1197
1277
|
app.get("/healthz", (_req, res) => res.type("text").send("ok"));
|
|
1278
|
+
// /llms.txt — the llms.txt convention (llmstxt.org): a plain-text,
|
|
1279
|
+
// LLM-friendly summary of what this server is and how to use it. The
|
|
1280
|
+
// primary audience of this gateway IS an LLM agent, so the gateway
|
|
1281
|
+
// serves its own. Tool list is generated from the canonical registry
|
|
1282
|
+
// (registry-names.ts) so it can't drift from the real surface.
|
|
1283
|
+
const LLMS_TXT = [
|
|
1284
|
+
"# observability-mcp",
|
|
1285
|
+
"",
|
|
1286
|
+
`> Unified observability gateway for AI agents (v${SERVER_VERSION}). One MCP server`,
|
|
1287
|
+
"> for Prometheus, Loki, and any backend via pluggable connectors — with",
|
|
1288
|
+
"> server-side filtering/aggregation so agents get numbers, not haystacks.",
|
|
1289
|
+
"",
|
|
1290
|
+
"MCP endpoint: POST /mcp (Streamable HTTP) · also stdio (--stdio) and WebSocket (/mcp/ws).",
|
|
1291
|
+
"All tools are read-only and advertise MCP ToolAnnotations (readOnlyHint: true).",
|
|
1292
|
+
"MCP resource omcp://guide/agent-usage carries the agent usage guide;",
|
|
1293
|
+
"prompts triage-incident and write-postmortem compose the tools into workflows.",
|
|
1294
|
+
"",
|
|
1295
|
+
"## Tools",
|
|
1296
|
+
"",
|
|
1297
|
+
...REGISTERED_TOOLS.map((t) => `- ${t.name} (${t.category}): ${t.summary}`),
|
|
1298
|
+
"",
|
|
1299
|
+
"## Connect",
|
|
1300
|
+
"",
|
|
1301
|
+
" claude mcp add observability --transport http http://localhost:3000/mcp",
|
|
1302
|
+
"",
|
|
1303
|
+
"## Docs",
|
|
1304
|
+
"",
|
|
1305
|
+
"- For agents (start here): https://thotischner.github.io/observability-mcp/for-agents/",
|
|
1306
|
+
"- Documentation site: https://thotischner.github.io/observability-mcp/",
|
|
1307
|
+
"- Report a finding (agent-report template): https://github.com/ThoTischner/observability-mcp/issues/new?template=agent-report.yml",
|
|
1308
|
+
"- Discussions (agent collaboration welcome): https://github.com/ThoTischner/observability-mcp/discussions",
|
|
1309
|
+
"- Source: https://github.com/ThoTischner/observability-mcp",
|
|
1310
|
+
"",
|
|
1311
|
+
].join("\n");
|
|
1312
|
+
app.get("/llms.txt", (_req, res) => res.type("text/plain; charset=utf-8").send(LLMS_TXT));
|
|
1198
1313
|
// Procurement-time probe: the MCP spec revisions and transports the
|
|
1199
1314
|
// gateway supports. Static today — kept as a separate endpoint so a
|
|
1200
1315
|
// discovery tool / RFP probe / catalog scanner can resolve our
|
|
@@ -18,8 +18,14 @@ export declare const manifestSchema: z.ZodObject<{
|
|
|
18
18
|
capabilities: z.ZodOptional<z.ZodObject<{
|
|
19
19
|
queryMetrics: z.ZodOptional<z.ZodBoolean>;
|
|
20
20
|
queryLogs: z.ZodOptional<z.ZodBoolean>;
|
|
21
|
+
queryLogAggregate: z.ZodOptional<z.ZodBoolean>;
|
|
22
|
+
queryTraces: z.ZodOptional<z.ZodBoolean>;
|
|
21
23
|
listServices: z.ZodOptional<z.ZodBoolean>;
|
|
22
24
|
listAvailableMetrics: z.ZodOptional<z.ZodBoolean>;
|
|
25
|
+
listResources: z.ZodOptional<z.ZodBoolean>;
|
|
26
|
+
listEdges: z.ZodOptional<z.ZodBoolean>;
|
|
27
|
+
getTopologySnapshot: z.ZodOptional<z.ZodBoolean>;
|
|
28
|
+
watchTopology: z.ZodOptional<z.ZodBoolean>;
|
|
23
29
|
}, z.core.$strip>>;
|
|
24
30
|
compat: z.ZodOptional<z.ZodObject<{
|
|
25
31
|
serverVersion: z.ZodOptional<z.ZodString>;
|
|
@@ -24,8 +24,15 @@ export const manifestSchema = z.object({
|
|
|
24
24
|
.object({
|
|
25
25
|
queryMetrics: z.boolean().optional(),
|
|
26
26
|
queryLogs: z.boolean().optional(),
|
|
27
|
+
queryLogAggregate: z.boolean().optional(),
|
|
28
|
+
queryTraces: z.boolean().optional(),
|
|
27
29
|
listServices: z.boolean().optional(),
|
|
28
30
|
listAvailableMetrics: z.boolean().optional(),
|
|
31
|
+
// Topology-provider capabilities (e.g. the Kubernetes connector).
|
|
32
|
+
listResources: z.boolean().optional(),
|
|
33
|
+
listEdges: z.boolean().optional(),
|
|
34
|
+
getTopologySnapshot: z.boolean().optional(),
|
|
35
|
+
watchTopology: z.boolean().optional(),
|
|
29
36
|
})
|
|
30
37
|
.optional(),
|
|
31
38
|
compat: z
|
|
@@ -67,13 +67,20 @@ export async function getAnomalyHistoryHandler(registry, args, ctx = defaultCont
|
|
|
67
67
|
labelFilters.push(`method="${escLabel(args.method)}"`);
|
|
68
68
|
const metric = `omcp_anomaly_score{${labelFilters.join(",")}}`;
|
|
69
69
|
// Fan out across every metrics connector; first non-empty answer wins.
|
|
70
|
+
// CRITICAL: pass the hand-built selector via `rawQuery`, NOT `metric`.
|
|
71
|
+
// The connector's curated path wraps a bare `metric` in `{ {{selector}} }`,
|
|
72
|
+
// which for our already-complete selector produces invalid double-brace
|
|
73
|
+
// PromQL (`omcp_anomaly_score{service="x"}{ job="x" }`) → 400 → the catch
|
|
74
|
+
// below swallowed it and the tool always reported "no history". rawQuery is
|
|
75
|
+
// sent verbatim to /api/v1/query_range (the R4 passthrough).
|
|
70
76
|
for (const c of candidates) {
|
|
71
77
|
if (!c.queryMetrics)
|
|
72
78
|
continue;
|
|
73
79
|
try {
|
|
74
80
|
const r = await c.queryMetrics({
|
|
75
81
|
service: args.service,
|
|
76
|
-
metric,
|
|
82
|
+
metric: "omcp_anomaly_score",
|
|
83
|
+
rawQuery: metric,
|
|
77
84
|
duration,
|
|
78
85
|
});
|
|
79
86
|
if (r && Array.isArray(r.values) && r.values.length > 0) {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { getAnomalyHistoryHandler } from "./get-anomaly-history.js";
|
|
4
|
+
// Regression guard for the wired-but-dead bug found in the v3.2 audit:
|
|
5
|
+
// get_anomaly_history hand-builds a complete PromQL selector
|
|
6
|
+
// (`omcp_anomaly_score{service="x",method="mad"}`) and must pass it via
|
|
7
|
+
// `rawQuery` (verbatim passthrough), NOT via `metric`. The curated `metric`
|
|
8
|
+
// path wraps the value in `{ {{selector}} }`, which for an already-complete
|
|
9
|
+
// selector yields invalid double-brace PromQL → Prometheus 400 → the handler
|
|
10
|
+
// swallowed it and always returned "no history". This test pins that the
|
|
11
|
+
// connector receives a verbatim rawQuery and never the manglable metric path.
|
|
12
|
+
function fakeRegistry(capture, result) {
|
|
13
|
+
const conn = {
|
|
14
|
+
name: "prom",
|
|
15
|
+
type: "prometheus",
|
|
16
|
+
signalType: "metrics",
|
|
17
|
+
async queryMetrics(q) {
|
|
18
|
+
capture(q);
|
|
19
|
+
if (!result)
|
|
20
|
+
throw new Error("no data");
|
|
21
|
+
return result;
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
return { getByTenant: () => [conn] };
|
|
25
|
+
}
|
|
26
|
+
function parse(r) {
|
|
27
|
+
return JSON.parse(r.content[0].text);
|
|
28
|
+
}
|
|
29
|
+
describe("get_anomaly_history — rawQuery wiring (audit regression)", () => {
|
|
30
|
+
it("routes the omcp_anomaly_score selector via rawQuery, not metric", async () => {
|
|
31
|
+
let captured;
|
|
32
|
+
const reg = fakeRegistry((q) => (captured = q), {
|
|
33
|
+
source: "prom",
|
|
34
|
+
service: "payment",
|
|
35
|
+
metric: "omcp_anomaly_score",
|
|
36
|
+
unit: "",
|
|
37
|
+
values: [{ timestamp: "2026-06-09T00:00:00.000Z", value: 0.7 }],
|
|
38
|
+
summary: { current: 0.7, average: 0.7, min: 0.7, max: 0.7, trend: "stable" },
|
|
39
|
+
});
|
|
40
|
+
const out = parse(await getAnomalyHistoryHandler(reg, { service: "payment", method: "mad", duration: "1h" }));
|
|
41
|
+
assert.ok(captured, "connector.queryMetrics must be called");
|
|
42
|
+
// The fix: rawQuery carries the verbatim selector.
|
|
43
|
+
assert.equal(captured.rawQuery, 'omcp_anomaly_score{service="payment",method="mad"}');
|
|
44
|
+
// And it must NOT be smuggled through the curated `metric` path (which would
|
|
45
|
+
// double-brace it). metric may be a bare name placeholder, but never the selector.
|
|
46
|
+
assert.doesNotMatch(String(captured.metric ?? ""), /\{/, "metric must not carry the brace selector");
|
|
47
|
+
// Sanity: the verbatim query has exactly one brace block (no double-brace).
|
|
48
|
+
assert.equal((captured.rawQuery.match(/\{/g) || []).length, 1);
|
|
49
|
+
assert.equal(out.isError, undefined);
|
|
50
|
+
assert.equal(out.values.length, 1);
|
|
51
|
+
});
|
|
52
|
+
it("omits the method filter when not given", async () => {
|
|
53
|
+
let captured;
|
|
54
|
+
const reg = fakeRegistry((q) => (captured = q), {
|
|
55
|
+
source: "prom", service: "api", metric: "omcp_anomaly_score", unit: "",
|
|
56
|
+
values: [{ timestamp: "2026-06-09T00:00:00.000Z", value: 1 }],
|
|
57
|
+
summary: { current: 1, average: 1, min: 1, max: 1, trend: "stable" },
|
|
58
|
+
});
|
|
59
|
+
await getAnomalyHistoryHandler(reg, { service: "api" });
|
|
60
|
+
assert.equal(captured.rawQuery, 'omcp_anomaly_score{service="api"}');
|
|
61
|
+
});
|
|
62
|
+
});
|
|
@@ -90,6 +90,21 @@ describe("listServicesHandler", () => {
|
|
|
90
90
|
assert.deepEqual(apiGw.sources.sort(), ["loki1", "prom1"]);
|
|
91
91
|
assert.deepEqual(apiGw.signalTypes.sort(), ["logs", "metrics"]);
|
|
92
92
|
});
|
|
93
|
+
it("carries per-service labels (e.g. discoveredVia) through the merge (audit: docs/loki.md)", async () => {
|
|
94
|
+
const reg = createRegistryWithMocks([
|
|
95
|
+
createMockConnector({
|
|
96
|
+
name: "loki1", type: "loki", signalType: "logs",
|
|
97
|
+
listServices: async () => [
|
|
98
|
+
{ name: "payment-service", source: "loki1", signalType: "logs", labels: { discoveredVia: "service_name" } },
|
|
99
|
+
],
|
|
100
|
+
}),
|
|
101
|
+
]);
|
|
102
|
+
const result = await listServicesHandler(reg, {});
|
|
103
|
+
const data = JSON.parse(result.content[0].text);
|
|
104
|
+
const svc = data.services.find((s) => s.name === "payment-service");
|
|
105
|
+
assert.ok(svc, "service must be present");
|
|
106
|
+
assert.equal(svc.labels?.discoveredVia, "service_name", "discoveredVia must surface in the tool output");
|
|
107
|
+
});
|
|
93
108
|
it("filters services case-insensitively", async () => {
|
|
94
109
|
const reg = createRegistryWithMocks([
|
|
95
110
|
createMockConnector({
|
|
@@ -25,7 +25,10 @@ export async function listServicesHandler(registry, args, ctx = defaultContext()
|
|
|
25
25
|
console.error(`Failed to list services from ${connector.name}:`, err);
|
|
26
26
|
}
|
|
27
27
|
}
|
|
28
|
-
// Deduplicate by name, merge signal types
|
|
28
|
+
// Deduplicate by name, merge signal types. Carry per-service `labels`
|
|
29
|
+
// (e.g. the Loki connector's `discoveredVia`, documented in docs/loki.md)
|
|
30
|
+
// through the merge so discovery metadata actually surfaces in the tool
|
|
31
|
+
// output; first source to set a given label key wins.
|
|
29
32
|
const merged = new Map();
|
|
30
33
|
for (const svc of allServices) {
|
|
31
34
|
const existing = merged.get(svc.name);
|
|
@@ -34,12 +37,15 @@ export async function listServicesHandler(registry, args, ctx = defaultContext()
|
|
|
34
37
|
existing.sources.push(svc.source);
|
|
35
38
|
if (!existing.signalTypes.includes(svc.signalType))
|
|
36
39
|
existing.signalTypes.push(svc.signalType);
|
|
40
|
+
if (svc.labels)
|
|
41
|
+
existing.labels = { ...svc.labels, ...(existing.labels ?? {}) };
|
|
37
42
|
}
|
|
38
43
|
else {
|
|
39
44
|
merged.set(svc.name, {
|
|
40
45
|
name: svc.name,
|
|
41
46
|
sources: [svc.source],
|
|
42
47
|
signalTypes: [svc.signalType],
|
|
48
|
+
labels: svc.labels ? { ...svc.labels } : undefined,
|
|
43
49
|
});
|
|
44
50
|
}
|
|
45
51
|
}
|
|
@@ -6,10 +6,12 @@
|
|
|
6
6
|
// summaries, and recomputes a global p50/p95 over the merged set
|
|
7
7
|
// (rather than blindly averaging per-source summaries).
|
|
8
8
|
//
|
|
9
|
-
// Backend support
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
9
|
+
// Backend support: no traces backend is bundled by default. The Tempo
|
|
10
|
+
// connector ships in the connector hub (install it to enable traces);
|
|
11
|
+
// there is no Jaeger connector today. Any connector that implements the
|
|
12
|
+
// optional queryTraces capability participates automatically — so on a
|
|
13
|
+
// stack without one the tool returns a clean "No trace backends
|
|
14
|
+
// configured" result rather than failing.
|
|
13
15
|
import { defaultContext } from "../context.js";
|
|
14
16
|
import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
|
|
15
17
|
export const queryTracesDefinition = {
|
|
@@ -18,7 +20,7 @@ export const queryTracesDefinition = {
|
|
|
18
20
|
"Query distributed traces for a service over a given timeframe.",
|
|
19
21
|
"Returns ranked trace summaries with duration, error status, and span count, plus a p50/p95 duration aggregate across the returned set.",
|
|
20
22
|
"When to use: investigating tail-latency outliers, walking call chains across services for a known time window, or pulling related traces for an anomaly the metric/log tools surfaced first.",
|
|
21
|
-
"Behavior: read-only; results may be capped via `limit` (default 50). `filter` accepts the backend's native query language (TraceQL on Tempo
|
|
23
|
+
"Behavior: read-only; results may be capped via `limit` (default 50). `filter` accepts the backend's native query language (e.g. TraceQL on Tempo). When `errorsOnly=true`, only traces with at least one error span are returned.",
|
|
22
24
|
"Related: `query_metrics` for the per-service latency series; `get_blast_radius` for the topology a trace traverses.",
|
|
23
25
|
].join(" "),
|
|
24
26
|
inputSchema: {
|
package/package.json
CHANGED