@thotischner/observability-mcp 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -58,6 +58,8 @@ import { listSourcesHandler } from "./tools/list-sources.js";
58
58
  import { listServicesHandler } from "./tools/list-services.js";
59
59
  import { queryMetricsHandler } from "./tools/query-metrics.js";
60
60
  import { queryLogsHandler } from "./tools/query-logs.js";
61
+ import { enrichIpsHandler } from "./tools/enrich-ips.js";
62
+ import { IpEnrichmentDataset } from "./enrich/ip-dataset.js";
61
63
  import { queryTracesHandler } from "./tools/query-traces.js";
62
64
  import { getAnomalyHistoryHandler } from "./tools/get-anomaly-history.js";
63
65
  import { generatePostmortemHandler } from "./tools/generate-postmortem.js";
@@ -269,6 +271,33 @@ async function main() {
269
271
  return applyBudgetDecision(result, decision, tokens, toolName);
270
272
  }
271
273
  const REDACTION_ENABLED = String(process.env.OMCP_REDACTION ?? "on").toLowerCase() !== "off";
274
+ // Raw PromQL/LogQL passthrough capability — default OFF. A raw query bypasses
275
+ // the curated metric/log surface (catalog, selector scoping), so it is an
276
+ // explicit operator opt-in. Enable with OMCP_RAW_QUERY=on (or true/1).
277
+ const RAW_QUERY_ENABLED = ["on", "true", "1"].includes(String(process.env.OMCP_RAW_QUERY ?? "off").toLowerCase());
278
+ // Opt the anonymous/default identity into per-call redaction bypass. In an
279
+ // anonymous deployment (no OMCP_API_KEYS) there is no named credential to
280
+ // add to OMCP_KEY_BYPASS_REDACTION, so a per-call bypass_redaction can never
281
+ // succeed — the only lever was the blunt global OMCP_REDACTION=off. This
282
+ // flag lets a single-user self-hosted agent see raw values on its own logs
283
+ // via the per-call arg, while redaction stays the default. Default OFF.
284
+ const BYPASS_REDACTION_ANON = ["on", "true", "1"].includes(String(process.env.OMCP_BYPASS_REDACTION_ANON ?? "false").toLowerCase());
285
+ // Offline IP-enrichment dataset (issue #415 Gap B) — loaded once at boot from
286
+ // a local CSV (OMCP_IP_ENRICH_FILE). No external geo/ASN API is ever called,
287
+ // so it stays air-gapped. Unset / unreadable → enrich_ips returns a clear
288
+ // "not configured" notice rather than failing.
289
+ let ipEnrichment = null;
290
+ const ipEnrichFile = process.env.OMCP_IP_ENRICH_FILE;
291
+ if (ipEnrichFile) {
292
+ try {
293
+ ipEnrichment = IpEnrichmentDataset.fromCsv(readFileSync(ipEnrichFile, "utf8"));
294
+ console.log(`[enrich] IP enrichment dataset loaded from ${ipEnrichFile}: ${ipEnrichment.size} ranges` +
295
+ (ipEnrichment.skipped ? ` (${ipEnrichment.skipped} rows skipped)` : ""));
296
+ }
297
+ catch (err) {
298
+ console.error(`[enrich] failed to load OMCP_IP_ENRICH_FILE (${ipEnrichFile}): ${err instanceof Error ? err.message : String(err)} — enrich_ips will report 'not configured'`);
299
+ }
300
+ }
272
301
  function redactToolText(result, opts = {}) {
273
302
  if (!REDACTION_ENABLED)
274
303
  return result;
@@ -403,10 +432,12 @@ async function main() {
403
432
  ].join(" "), {
404
433
  service: z
405
434
  .string()
406
- .describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'api-gateway', 'payment-service')."),
435
+ .optional()
436
+ .describe("Required (unless `raw_query` is set). Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'api-gateway', 'payment-service')."),
407
437
  metric: z
408
438
  .string()
409
- .describe(`Required. Exact metric name to query. One of: ${uniqueNames.join(", ")}.`),
439
+ .optional()
440
+ .describe(`Required (unless ` + "`raw_query`" + ` is set). Exact metric name to query. One of: ${uniqueNames.join(", ")}.`),
410
441
  duration: z
411
442
  .string()
412
443
  .optional()
@@ -419,9 +450,17 @@ async function main() {
419
450
  .string()
420
451
  .optional()
421
452
  .describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
453
+ labels: z
454
+ .record(z.string(), z.string())
455
+ .optional()
456
+ .describe("Optional. Exact-match label filters (e.g. {\"status\":\"500\",\"route\":\"/checkout\"}) AND'd into the metric's series selector — the PromQL equivalent of the query_logs `labels` param. Use this to scope a curated metric to a subset of series (e.g. error_rate for one route/status) instead of the all-series aggregate. Combine with `groupBy` to filter then break down. Label names must be valid Prometheus identifiers."),
457
+ raw_query: z
458
+ .string()
459
+ .optional()
460
+ .describe("Optional escape hatch: a verbatim PromQL expression, run as-is over the range — for ad-hoc queries the curated `metric` catalog can't express (any series, any function, broken down by any label). When set, `metric`/`service`/`groupBy`/`labels` are ignored. DISABLED by default; the operator must enable the raw-query capability (OMCP_RAW_QUERY=on) or the call is refused. Still tenant-scoped and source-allow-listed."),
422
461
  }, async (args) => {
423
462
  await enforceEntitledAccess(ctx, { tool: "query_metrics", source: args?.source, service: args?.service });
424
- const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx));
463
+ const result = await withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx, { allowRawQuery: RAW_QUERY_ENABLED }));
425
464
  return chargeTokenBudget(result, ctx, "query_metrics");
426
465
  });
427
466
  registerTool("query_logs", [
@@ -432,7 +471,8 @@ async function main() {
432
471
  ].join(" "), {
433
472
  service: z
434
473
  .string()
435
- .describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
474
+ .optional()
475
+ .describe("Required (unless `raw_query` is set). Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
436
476
  query: z
437
477
  .string()
438
478
  .optional()
@@ -480,10 +520,14 @@ async function main() {
480
520
  bypass_redaction: z
481
521
  .boolean()
482
522
  .optional()
483
- .describe("Optional. When true, request that PII/secret redaction be skipped for this single call. The server only honours this when the calling credential was explicitly authorised via OMCP_KEY_BYPASS_REDACTION; otherwise the request still gets redacted output. Default: false."),
523
+ .describe("Optional. When true, request that PII/secret redaction be skipped for this single call. The server only honours this when the calling identity is authorised to bypass — a credential listed in OMCP_KEY_BYPASS_REDACTION, or the anonymous identity when the operator set OMCP_BYPASS_REDACTION_ANON=true; otherwise the request still gets redacted output. Default: false."),
524
+ raw_query: z
525
+ .string()
526
+ .optional()
527
+ .describe("Optional escape hatch: a verbatim LogQL log query, run as-is — for selectors/pipelines the curated params can't express. When set, `service`/`labels`/`level`/`query` are ignored and it is mutually exclusive with `aggregate` (express aggregation in the LogQL itself). DISABLED by default; the operator must enable the raw-query capability (OMCP_RAW_QUERY=on) or the call is refused. Redaction still applies to the returned log lines."),
484
528
  }, async (args) => {
485
529
  await enforceEntitledAccess(ctx, { tool: "query_logs", source: args?.source, service: args?.service });
486
- const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx));
530
+ const result = await withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx, { allowRawQuery: RAW_QUERY_ENABLED }));
487
531
  // Redact PII / secrets from the log payload before it crosses the
488
532
  // MCP boundary into the agent's context. Per-call bypass kicks in
489
533
  // only when BOTH (a) the credential is OMCP_KEY_BYPASS_REDACTION
@@ -639,6 +683,19 @@ async function main() {
639
683
  await enforceEntitledAccess(ctx, { tool: "get_blast_radius" });
640
684
  return withToolMetrics("get_blast_radius", () => getBlastRadiusHandler(registry, args, ctx));
641
685
  });
686
+ registerTool("enrich_ips", [
687
+ "Resolve a batch of IPv4 addresses to geo (country/city), ASN/org, and a hosting/proxy flag.",
688
+ "When to use: answering 'where are these visitors from?' or 'which of these IPs are bots / datacenter / VPN exit nodes?' over access logs, without an out-of-band geo-API call per IP.",
689
+ "Behavior: read-only. Looks each IP up in a LOCAL offline dataset the operator configured (OMCP_IP_ENRICH_FILE) — there is no external network call, so it is safe in air-gapped deployments. Returns one row per input IP with found=true/false plus any known fields. If no dataset is configured it returns a clear notice explaining how to enable it.",
690
+ "Related: pull the IPs from `query_logs` (use `labels`/`aggregate` to find the IPs of interest first).",
691
+ ].join(" "), {
692
+ ips: z
693
+ .array(z.string())
694
+ .describe("Required. IPv4 address strings to enrich (e.g. ['203.0.113.5','198.51.100.9']). Max 1000 per call; invalid entries are returned with found=false rather than failing the batch."),
695
+ }, async (args) => {
696
+ await enforceEntitledAccess(ctx, { tool: "enrich_ips" });
697
+ return withToolMetrics("enrich_ips", async () => enrichIpsHandler(ipEnrichment, args, ctx));
698
+ });
642
699
  // Phase F10: federated tools — every upstream MCP server's tools
643
700
  // show up here under `<prefix>.<upstream-tool>`. The handler is a
644
701
  // pure proxy: it forwards args verbatim and returns the upstream's
@@ -3030,8 +3087,10 @@ async function main() {
3030
3087
  res.json({ ok: true });
3031
3088
  });
3032
3089
  // Stdio transport: one server over stdin/stdout, no HTTP listener.
3090
+ // Stdio is inherently a local single-user channel, so the anonymous
3091
+ // redaction-bypass opt-in applies here too.
3033
3092
  if (STDIO) {
3034
- const { mcpServer: server } = createMcpServer(defaultContext());
3093
+ const { mcpServer: server } = createMcpServer(defaultContext({ allowBypassRedaction: BYPASS_REDACTION_ANON }));
3035
3094
  await server.connect(new StdioServerTransport());
3036
3095
  console.error(`observability-mcp running on stdio transport · connectors: ${registry
3037
3096
  .getAll()
@@ -3127,7 +3186,7 @@ async function main() {
3127
3186
  // coarse source allow-list into the RequestContext.
3128
3187
  async function gateCtx(req, res) {
3129
3188
  if (!credentialsConfigured())
3130
- return defaultContext();
3189
+ return defaultContext({ allowBypassRedaction: BYPASS_REDACTION_ANON });
3131
3190
  const cred = resolveToken(extractToken(req.headers), loadCredentials());
3132
3191
  if (!cred) {
3133
3192
  res
@@ -0,0 +1,30 @@
1
+ import { IpEnrichmentDataset } from "../enrich/ip-dataset.js";
2
+ import { type RequestContext } from "../context.js";
3
+ export declare const enrichIpsDefinition: {
4
+ name: "enrich_ips";
5
+ description: string;
6
+ };
7
+ export interface EnrichIpsArgs {
8
+ ips?: string[];
9
+ }
10
+ export interface IpEnrichmentResult {
11
+ ip: string;
12
+ found: boolean;
13
+ country?: string;
14
+ city?: string;
15
+ asn?: string;
16
+ org?: string;
17
+ hosting?: boolean;
18
+ }
19
+ export declare function enrichIpsHandler(dataset: IpEnrichmentDataset | null, args: EnrichIpsArgs, _ctx?: RequestContext): {
20
+ content: {
21
+ type: "text";
22
+ text: string;
23
+ }[];
24
+ isError: boolean;
25
+ } | {
26
+ content: {
27
+ type: "text";
28
+ text: string;
29
+ }[];
30
+ };
@@ -0,0 +1,60 @@
1
+ import { ipv4ToInt } from "../enrich/ip-dataset.js";
2
+ import { defaultContext } from "../context.js";
3
+ import { errorResponse } from "./validation.js";
4
+ // enrich_ips (issue #415 Gap B): resolve a batch of IPs to geo / ASN / org /
5
+ // hosting-flag from the operator's LOCAL offline dataset. No external lookups,
6
+ // so it is safe in air-gapped deployments. Disabled (returns a clear message)
7
+ // when no dataset is configured.
8
+ export const enrichIpsDefinition = {
9
+ name: "enrich_ips",
10
+ description: "Resolve a batch of IPv4 addresses to geo (country/city), ASN/org, and a hosting/proxy flag from a local offline dataset. Use this to answer 'where are these visitors from / which are bots or datacenter IPs' without an out-of-band geo API call. Requires the operator to have configured an offline dataset (OMCP_IP_ENRICH_FILE); returns a clear notice otherwise.",
11
+ };
12
+ const MAX_IPS = 1000;
13
+ export function enrichIpsHandler(dataset, args,
14
+ // The RequestContext seam — enrich_ips doesn't scope by tenant today (the
15
+ // dataset is a single process-wide table), but every tool handler threads
16
+ // ctx so access-control / audit can attach without a signature change later.
17
+ _ctx = defaultContext()) {
18
+ if (!dataset) {
19
+ return errorResponse("IP enrichment is not configured. Set OMCP_IP_ENRICH_FILE to a local CSV " +
20
+ "(network,country,city,asn,org,hosting) to enable offline geo/ASN/hosting " +
21
+ "lookups — there is no external API call, so it stays air-gapped.");
22
+ }
23
+ const ips = args.ips;
24
+ if (!Array.isArray(ips) || ips.length === 0) {
25
+ return errorResponse("`ips` must be a non-empty array of IPv4 address strings.");
26
+ }
27
+ if (ips.length > MAX_IPS) {
28
+ return errorResponse(`Too many IPs (${ips.length}); max ${MAX_IPS} per call.`);
29
+ }
30
+ const results = [];
31
+ let invalid = 0;
32
+ let matched = 0;
33
+ for (const ip of ips) {
34
+ if (typeof ip !== "string" || ipv4ToInt(ip) === null) {
35
+ invalid++;
36
+ results.push({ ip: String(ip), found: false });
37
+ continue;
38
+ }
39
+ const hit = dataset.lookup(ip);
40
+ if (hit) {
41
+ matched++;
42
+ results.push({ ip, found: true, ...hit });
43
+ }
44
+ else {
45
+ results.push({ ip, found: false });
46
+ }
47
+ }
48
+ return {
49
+ content: [
50
+ {
51
+ type: "text",
52
+ text: JSON.stringify({
53
+ results,
54
+ summary: { total: ips.length, matched, unmatched: ips.length - matched - invalid, invalid },
55
+ datasetSize: dataset.size,
56
+ }, null, 2),
57
+ },
58
+ ],
59
+ };
60
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,38 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { IpEnrichmentDataset } from "../enrich/ip-dataset.js";
4
+ import { enrichIpsHandler } from "./enrich-ips.js";
5
+ function parse(result) {
6
+ return JSON.parse(result.content[0].text);
7
+ }
8
+ const ds = IpEnrichmentDataset.fromCsv(["1.2.3.0/24,US,Ashburn,AS14618,Example Cloud,true", "203.0.113.5,DE,Berlin,AS3320,Example ISP,false"].join("\n"));
9
+ describe("enrichIpsHandler (R6, issue #415 Gap B)", () => {
10
+ it("returns a clear 'not configured' notice when no dataset is loaded", () => {
11
+ const out = parse(enrichIpsHandler(null, { ips: ["1.2.3.4"] }));
12
+ assert.match(out.error, /not configured/i);
13
+ assert.match(out.error, /OMCP_IP_ENRICH_FILE/);
14
+ });
15
+ it("rejects empty / missing ips", () => {
16
+ assert.match(parse(enrichIpsHandler(ds, { ips: [] })).error, /non-empty array/i);
17
+ assert.match(parse(enrichIpsHandler(ds, {})).error, /non-empty array/i);
18
+ });
19
+ it("rejects an over-large batch", () => {
20
+ const many = Array.from({ length: 1001 }, (_, i) => `1.2.3.${i % 255}`);
21
+ assert.match(parse(enrichIpsHandler(ds, { ips: many })).error, /Too many IPs/i);
22
+ });
23
+ it("enriches known IPs and reports found=false for misses + invalid", () => {
24
+ const out = parse(enrichIpsHandler(ds, { ips: ["1.2.3.99", "8.8.8.8", "not-an-ip"] }));
25
+ assert.equal(out.results.length, 3);
26
+ const matched = out.results.find((r) => r.ip === "1.2.3.99");
27
+ assert.equal(matched.found, true);
28
+ assert.equal(matched.city, "Ashburn");
29
+ assert.equal(matched.hosting, true);
30
+ const miss = out.results.find((r) => r.ip === "8.8.8.8");
31
+ assert.equal(miss.found, false);
32
+ assert.equal(miss.city, undefined);
33
+ const invalid = out.results.find((r) => r.ip === "not-an-ip");
34
+ assert.equal(invalid.found, false);
35
+ assert.deepEqual(out.summary, { total: 3, matched: 1, unmatched: 1, invalid: 1 });
36
+ assert.equal(out.datasetSize, 2);
37
+ });
38
+ });
@@ -64,7 +64,7 @@ export declare const queryLogsDefinition: {
64
64
  };
65
65
  };
66
66
  export declare function queryLogsHandler(registry: ConnectorRegistry, args: {
67
- service: string;
67
+ service?: string;
68
68
  query?: string;
69
69
  duration?: string;
70
70
  level?: string;
@@ -76,7 +76,10 @@ export declare function queryLogsHandler(registry: ConnectorRegistry, args: {
76
76
  k?: number;
77
77
  step?: string;
78
78
  };
79
- }, ctx?: RequestContext): Promise<{
79
+ raw_query?: string;
80
+ }, ctx?: RequestContext, opts?: {
81
+ allowRawQuery?: boolean;
82
+ }): Promise<{
80
83
  content: {
81
84
  type: "text";
82
85
  text: string;
@@ -1,5 +1,5 @@
1
1
  import { defaultContext } from "../context.js";
2
- import { validateDuration, validateServiceName, validateLogLabels, validateLogAggregate, errorResponse } from "./validation.js";
2
+ import { validateDuration, validateServiceName, validateLogLabels, validateLogAggregate, validateRawQuery, errorResponse } from "./validation.js";
3
3
  export const queryLogsDefinition = {
4
4
  name: "query_logs",
5
5
  description: "Query logs for a service over a given timeframe. Returns log entries with a summary including error/warning counts and top error patterns. Filter by log level, a free-text/regex search, OR structured `labels` (exact-match on backend-extracted fields like method/status/url/environment — far more reliable than regex on structured JSON logs).",
@@ -46,20 +46,37 @@ export const queryLogsDefinition = {
46
46
  required: ["service"],
47
47
  },
48
48
  };
49
- export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
50
- const svcErr = validateServiceName(args.service);
51
- if (svcErr)
52
- return errorResponse(svcErr);
49
+ export async function queryLogsHandler(registry, args, ctx = defaultContext(), opts = {}) {
53
50
  const duration = args.duration || "5m";
54
51
  const durationErr = validateDuration(duration);
55
52
  if (durationErr)
56
53
  return errorResponse(durationErr);
57
- const labelsErr = validateLogLabels(args.labels);
58
- if (labelsErr)
59
- return errorResponse(labelsErr);
60
- const aggErr = validateLogAggregate(args.aggregate);
61
- if (aggErr)
62
- return errorResponse(aggErr);
54
+ // Raw LogQL passthrough — capability-gated, default off. Bypasses the curated
55
+ // stream-selector construction, so `service` is not required and is ignored.
56
+ // Mutually exclusive with `aggregate` (for metric LogQL use `aggregate`).
57
+ const rawErr = validateRawQuery(args.raw_query);
58
+ if (rawErr)
59
+ return errorResponse(rawErr);
60
+ const isRaw = !!args.raw_query;
61
+ if (isRaw && !opts.allowRawQuery) {
62
+ return errorResponse("raw_query is disabled. The operator must enable the raw-query capability (OMCP_RAW_QUERY=on) to run verbatim LogQL — it bypasses the curated log surface, so it is off by default.");
63
+ }
64
+ if (isRaw && args.aggregate) {
65
+ return errorResponse("raw_query and aggregate are mutually exclusive — a raw LogQL query expresses its own aggregation.");
66
+ }
67
+ if (!isRaw) {
68
+ if (!args.service)
69
+ return errorResponse("service is required (or set raw_query).");
70
+ const svcErr = validateServiceName(args.service);
71
+ if (svcErr)
72
+ return errorResponse(svcErr);
73
+ const labelsErr = validateLogLabels(args.labels);
74
+ if (labelsErr)
75
+ return errorResponse(labelsErr);
76
+ const aggErr = validateLogAggregate(args.aggregate);
77
+ if (aggErr)
78
+ return errorResponse(aggErr);
79
+ }
63
80
  const connectors = registry.getByTenant(ctx.tenant).filter((c) => c.signalType === "logs");
64
81
  if (connectors.length === 0) {
65
82
  return {
@@ -80,7 +97,7 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
80
97
  capable++;
81
98
  try {
82
99
  const q = {
83
- service: args.service,
100
+ service: args.service ?? "",
84
101
  duration,
85
102
  labels: args.labels,
86
103
  query: args.query,
@@ -119,12 +136,13 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
119
136
  continue;
120
137
  try {
121
138
  const result = await connector.queryLogs({
122
- service: args.service,
139
+ service: args.service ?? "",
123
140
  query: args.query,
124
141
  duration,
125
142
  level: args.level,
126
143
  limit: args.limit,
127
144
  labels: args.labels,
145
+ rawQuery: args.raw_query,
128
146
  });
129
147
  results.push(result);
130
148
  }
@@ -31,12 +31,16 @@ export declare const queryMetricsDefinition: {
31
31
  };
32
32
  };
33
33
  export declare function queryMetricsHandler(registry: ConnectorRegistry, args: {
34
- service: string;
35
- metric: string;
34
+ service?: string;
35
+ metric?: string;
36
36
  duration?: string;
37
37
  source?: string;
38
38
  groupBy?: string;
39
- }, ctx?: RequestContext): Promise<{
39
+ labels?: Record<string, string>;
40
+ raw_query?: string;
41
+ }, ctx?: RequestContext, opts?: {
42
+ allowRawQuery?: boolean;
43
+ }): Promise<{
40
44
  content: {
41
45
  type: "text";
42
46
  text: string;
@@ -1,5 +1,5 @@
1
1
  import { defaultContext } from "../context.js";
2
- import { validateDuration, validateMetricName, validateServiceName, errorResponse } from "./validation.js";
2
+ import { validateDuration, validateMetricName, validateServiceName, validateMetricLabels, validateRawQuery, errorResponse } from "./validation.js";
3
3
  export const queryMetricsDefinition = {
4
4
  name: "query_metrics",
5
5
  description: "Query a specific metric for a service over a given timeframe. Returns time-series data with pre-computed summary statistics (current, average, min, max, trend). Available metrics: cpu, memory, error_rate, request_rate, latency_p99, latency_p50, latency_avg.",
@@ -30,7 +30,7 @@ export const queryMetricsDefinition = {
30
30
  required: ["service", "metric"],
31
31
  },
32
32
  };
33
- export async function queryMetricsHandler(registry, args, ctx = defaultContext()) {
33
+ export async function queryMetricsHandler(registry, args, ctx = defaultContext(), opts = {}) {
34
34
  // Coarse single-tenant source scoping: if the principal is restricted to a
35
35
  // source allow-list, deny an explicit out-of-scope source.
36
36
  if (ctx.allowedSources &&
@@ -38,18 +38,37 @@ export async function queryMetricsHandler(registry, args, ctx = defaultContext()
38
38
  !ctx.allowedSources.includes(args.source)) {
39
39
  return errorResponse(`forbidden: source "${args.source}" is not in your allowed sources`);
40
40
  }
41
- const svcErr = validateServiceName(args.service);
42
- if (svcErr)
43
- return errorResponse(svcErr);
44
41
  const duration = args.duration || "5m";
45
42
  const durationErr = validateDuration(duration);
46
43
  if (durationErr)
47
44
  return errorResponse(durationErr);
48
- const metricErr = validateMetricName(args.metric, registry);
49
- if (metricErr)
50
- return errorResponse(metricErr);
51
- if (args.groupBy && !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(args.groupBy)) {
52
- return errorResponse(`Invalid groupBy "${args.groupBy}". Must be a valid Prometheus label name (alphanumeric + underscore, starting with letter/underscore).`);
45
+ // Raw PromQL passthrough — capability-gated, default off. Bypasses the
46
+ // curated metric catalog/selector, so service/metric/groupBy/labels are not
47
+ // required and are ignored. Still tenant-scoped + source-allow-listed below.
48
+ const rawErr = validateRawQuery(args.raw_query);
49
+ if (rawErr)
50
+ return errorResponse(rawErr);
51
+ const isRaw = !!args.raw_query;
52
+ if (isRaw && !opts.allowRawQuery) {
53
+ return errorResponse("raw_query is disabled. The operator must enable the raw-query capability (OMCP_RAW_QUERY=on) to run verbatim PromQL — it bypasses the curated metric surface, so it is off by default.");
54
+ }
55
+ if (!isRaw) {
56
+ if (!args.service)
57
+ return errorResponse("service is required (or set raw_query).");
58
+ const svcErr = validateServiceName(args.service);
59
+ if (svcErr)
60
+ return errorResponse(svcErr);
61
+ if (!args.metric)
62
+ return errorResponse("metric is required (or set raw_query).");
63
+ const metricErr = validateMetricName(args.metric, registry);
64
+ if (metricErr)
65
+ return errorResponse(metricErr);
66
+ if (args.groupBy && !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(args.groupBy)) {
67
+ return errorResponse(`Invalid groupBy "${args.groupBy}". Must be a valid Prometheus label name (alphanumeric + underscore, starting with letter/underscore).`);
68
+ }
69
+ const labelsErr = validateMetricLabels(args.labels);
70
+ if (labelsErr)
71
+ return errorResponse(labelsErr);
53
72
  }
54
73
  // Tenant-scoped resolution: an explicit `source` from the agent
55
74
  // must belong to the caller's tenant (or be a global / untagged
@@ -80,10 +99,12 @@ export async function queryMetricsHandler(registry, args, ctx = defaultContext()
80
99
  continue;
81
100
  try {
82
101
  const result = await connector.queryMetrics({
83
- service: args.service,
84
- metric: args.metric,
102
+ service: args.service ?? "",
103
+ metric: args.metric ?? "",
85
104
  duration,
86
105
  groupBy: args.groupBy,
106
+ labels: args.labels,
107
+ rawQuery: args.raw_query,
87
108
  });
88
109
  results.push(result);
89
110
  }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,52 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { ConnectorRegistry } from "../connectors/registry.js";
4
+ import { PluginLoader } from "../connectors/loader.js";
5
+ import { queryMetricsHandler } from "./query-metrics.js";
6
+ import { queryLogsHandler } from "./query-logs.js";
7
+ // R4 (issue #415 #3): raw_query is an escape hatch that bypasses the curated
8
+ // metric/log surface, so it MUST be refused unless the operator enabled the
9
+ // capability (opts.allowRawQuery, driven by OMCP_RAW_QUERY). These tests pin
10
+ // the gate: the denial fires before any backend is touched, so an empty
11
+ // registry is enough — and with the capability ON the call proceeds past the
12
+ // gate to the normal "no backend configured" path.
13
+ function parse(result) {
14
+ return JSON.parse(result.content[0].text);
15
+ }
16
+ describe("raw_query capability gate", () => {
17
+ const emptyRegistry = () => new ConnectorRegistry(new PluginLoader());
18
+ it("query_metrics refuses raw_query when capability is off (default)", async () => {
19
+ const out = parse(await queryMetricsHandler(emptyRegistry(), { raw_query: "up" }, undefined, { allowRawQuery: false }));
20
+ assert.match(out.error, /raw_query is disabled/i);
21
+ assert.match(out.error, /OMCP_RAW_QUERY/);
22
+ });
23
+ it("query_metrics defaults to refusing raw_query when no opts passed", async () => {
24
+ const out = parse(await queryMetricsHandler(emptyRegistry(), { raw_query: "up" }));
25
+ assert.match(out.error, /raw_query is disabled/i);
26
+ });
27
+ it("query_logs refuses raw_query when capability is off (default)", async () => {
28
+ const out = parse(await queryLogsHandler(emptyRegistry(), { raw_query: '{job="x"}' }, undefined, { allowRawQuery: false }));
29
+ assert.match(out.error, /raw_query is disabled/i);
30
+ });
31
+ it("query_metrics passes the gate when capability is on (reaches backend resolution)", async () => {
32
+ const out = parse(await queryMetricsHandler(emptyRegistry(), { raw_query: "up" }, undefined, { allowRawQuery: true }));
33
+ // Past the gate → normal no-backend path, NOT the capability denial.
34
+ assert.doesNotMatch(out.error ?? "", /raw_query is disabled/i);
35
+ assert.match(out.error, /No metrics backends configured/i);
36
+ });
37
+ it("query_logs passes the gate when capability is on (reaches backend resolution)", async () => {
38
+ const out = parse(await queryLogsHandler(emptyRegistry(), { raw_query: '{job="x"}' }, undefined, { allowRawQuery: true }));
39
+ assert.doesNotMatch(out.error ?? "", /raw_query is disabled/i);
40
+ assert.match(out.error, /No log backends configured/i);
41
+ });
42
+ it("query_logs rejects raw_query + aggregate as mutually exclusive", async () => {
43
+ const out = parse(await queryLogsHandler(emptyRegistry(), { raw_query: '{job="x"}', aggregate: { op: "count_over_time" } }, undefined, { allowRawQuery: true }));
44
+ assert.match(out.error, /mutually exclusive/i);
45
+ });
46
+ it("normal (non-raw) calls are unaffected by the capability flag", async () => {
47
+ // No raw_query → gate is a no-op even with capability off; falls through
48
+ // to normal validation/backend path.
49
+ const out = parse(await queryMetricsHandler(emptyRegistry(), { service: "api", metric: "cpu" }, undefined, { allowRawQuery: false }));
50
+ assert.doesNotMatch(out.error ?? "", /raw_query/i);
51
+ });
52
+ });
@@ -13,7 +13,7 @@
13
13
  * Keep this list and the registerTool("name", ...) calls in
14
14
  * createMcpServer in sync. The test enforces it.
15
15
  */
16
- export declare const REGISTERED_TOOL_NAMES: readonly ["list_sources", "list_services", "query_metrics", "query_logs", "query_traces", "get_service_health", "detect_anomalies", "get_anomaly_history", "generate_postmortem", "get_topology", "get_blast_radius"];
16
+ export declare const REGISTERED_TOOL_NAMES: readonly ["list_sources", "list_services", "query_metrics", "query_logs", "query_traces", "get_service_health", "detect_anomalies", "get_anomaly_history", "generate_postmortem", "get_topology", "get_blast_radius", "enrich_ips"];
17
17
  export type RegisteredToolName = typeof REGISTERED_TOOL_NAMES[number];
18
18
  /** Functional category of a tool, surfaced in /api/tools/registry and
19
19
  * used by the Products UI to group the multi-select picker. Keeps
@@ -25,6 +25,7 @@ export const REGISTERED_TOOL_NAMES = [
25
25
  "generate_postmortem",
26
26
  "get_topology",
27
27
  "get_blast_radius",
28
+ "enrich_ips",
28
29
  ];
29
30
  export const REGISTERED_TOOLS = [
30
31
  { name: "list_sources", category: "discovery", summary: "List configured observability backends + reachability." },
@@ -38,6 +39,7 @@ export const REGISTERED_TOOLS = [
38
39
  { name: "generate_postmortem", category: "diagnose", summary: "One-shot markdown post-mortem stitching anomaly history + traces + blast-radius + logs for a service." },
39
40
  { name: "get_topology", category: "topology", summary: "Return the infrastructure topology graph (resources + edges)." },
40
41
  { name: "get_blast_radius", category: "topology", summary: "Given a resource, return the impact set if its host(s) fail." },
42
+ { name: "enrich_ips", category: "query", summary: "Resolve IPv4 addresses to geo/ASN/org/hosting-flag from a local offline dataset." },
41
43
  ];
42
44
  /** Validate a candidate Product tools[] array. Returns the unknown
43
45
  * names (empty array = all OK). Pure helper — the caller decides
@@ -146,6 +146,20 @@ export async function getTopologyHandler(registry, args = {}, ctx = defaultConte
146
146
  total: { resources: agg.resources.length, edges: agg.edges.length },
147
147
  truncated,
148
148
  };
149
+ // Signal vs. silence: when NO topology-capable connector contributed a
150
+ // snapshot, an empty {resources:[],edges:[]} is ambiguous to an agent —
151
+ // it can't tell "graph is genuinely empty" from "no topology backend is
152
+ // wired up". Mirror query_traces' explicit "no backend" message so the
153
+ // agent gets a clear signal instead of silence (issue #415).
154
+ if (agg.sources.length === 0) {
155
+ payload.note =
156
+ "No topology-capable connector is configured, so the graph is empty. " +
157
+ "Topology comes from connectors like the built-in `kubernetes` source " +
158
+ "or the aws/gcp/istio/linkerd/consul providers — add one (see the " +
159
+ "Sources tab or docs/plugin-architecture) to populate this graph. " +
160
+ "A deployment with only metrics/logs backends (e.g. Prometheus/Loki) " +
161
+ "has no topology to report here.";
162
+ }
149
163
  return {
150
164
  content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
151
165
  };
@@ -153,6 +153,21 @@ describe("get_topology tool", () => {
153
153
  assert.equal(out.truncated, true);
154
154
  assert.equal(out.total.resources, fixture().resources.length);
155
155
  });
156
+ it("does not attach the no-connector note when topology is present", async () => {
157
+ const reg = await makeRegistry();
158
+ const out = parseTool(await getTopologyHandler(reg, {}));
159
+ assert.equal(out.note, undefined);
160
+ });
161
+ it("returns an explicit note when no topology connector is configured (issue #415)", async () => {
162
+ // Empty registry — no topology-capable connector. The agent must get a
163
+ // clear signal, not a silent empty graph.
164
+ const reg = new ConnectorRegistry(new PluginLoader());
165
+ const out = parseTool(await getTopologyHandler(reg, {}));
166
+ assert.deepEqual(out.resources, []);
167
+ assert.deepEqual(out.edges, []);
168
+ assert.equal(out.sources.length, 0);
169
+ assert.match(out.note, /no topology-capable connector/i);
170
+ });
156
171
  });
157
172
  describe("get_blast_radius tool", () => {
158
173
  it("reports shared-host blast radius for a co-located pod", async () => {
@@ -16,6 +16,23 @@ export declare function validateServiceName(service: string): string | null;
16
16
  * can't build a pathological query.
17
17
  */
18
18
  export declare function validateLogLabels(labels: unknown): string | null;
19
+ /**
20
+ * Validate a structured `labels` filter map for query_metrics. The rules are
21
+ * identical to the log-label validator (valid Prometheus label names, bounded
22
+ * map size + value length, fail-closed) — metric labels compile to PromQL
23
+ * label-equality matchers and the values are escaped for PromQL at injection
24
+ * time, exactly as the log path escapes for LogQL.
25
+ */
26
+ export declare const validateMetricLabels: typeof validateLogLabels;
27
+ /**
28
+ * Validate a raw PromQL/LogQL passthrough string. The capability gate (is raw
29
+ * query allowed at all) lives at the handler; this only bounds the shape:
30
+ * non-empty string, length-capped so a crafted query can't build a
31
+ * pathological request. The query is sent verbatim to the backend (that is the
32
+ * point of a passthrough), so there is no syntax check — an invalid query just
33
+ * yields the backend's own parse error.
34
+ */
35
+ export declare function validateRawQuery(raw: unknown): string | null;
19
36
  /**
20
37
  * Validate the query_logs `aggregate` spec. Fail-closed, like the labels
21
38
  * validator. Returns an error string or null.