@thotischner/observability-mcp 3.3.1 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ export function correlateSignals(anomalies, logResults, metricResults) {
21
21
  for (const metric of serviceMetrics) {
22
22
  if (metric.metric === anomaly.metric)
23
23
  continue;
24
- if (metric.summary.trend === "rising") {
24
+ if (metric.summary && metric.summary.trend === "rising") {
25
25
  correlations.push(`${anomaly.service}: ${anomaly.metric} anomaly coincides with rising ${metric.metric} ` +
26
26
  `(current: ${metric.summary.current.toFixed(2)})`);
27
27
  }
@@ -146,6 +146,23 @@ describe("Q-LOG2: buildAggregateLogQL", () => {
146
146
  assert.equal(r.step, "900s");
147
147
  assert.equal(r.logql, `sum by (url) (count_over_time(${PIPE} [900s]))`);
148
148
  });
149
+ it("count_over_time with a label-filter pipeline + no by → valid sum-wrapped LogQL (#452 leftover #2)", () => {
150
+ // The reporter saw an intermittent 400 on a label-filtered count_over_time
151
+ // and wondered if the collapse path emits different LogQL when a filter is
152
+ // present. It does not: the label filter lives in the streamPipeline
153
+ // (identical to the sum/topk path, which works), and the count_over_time
154
+ // branch wraps it verbatim. Assert the emitted LogQL is well-formed —
155
+ // `sum (count_over_time({sel} | json | environment="prod" [step]))` — so
156
+ // any future regression in the generated query is caught here.
157
+ const filtered = '{service_name="app"} | json | environment="prod"';
158
+ const r = buildAggregateLogQL(filtered, { op: "count_over_time", step: "1h" }, "6h");
159
+ assert.equal(r.mode, "range");
160
+ assert.equal(r.step, "3600s");
161
+ assert.equal(r.logql, `sum (count_over_time(${filtered} [3600s]))`);
162
+ // Structural sanity: balanced parens, sum-wrapped, single range selector.
163
+ assert.equal((r.logql.match(/\(/g) || []).length, (r.logql.match(/\)/g) || []).length);
164
+ assert.match(r.logql, /^sum \(count_over_time\(.*\[\d+s\]\)\)$/);
165
+ });
149
166
  it("count_over_time without by → sum-wrapped (single series), default step (#452)", () => {
150
167
  // Regression for issue #452: a bare count_over_time over a `| json` stream
151
168
  // keeps every extracted label as its own series. With no `by` we must
@@ -240,6 +240,10 @@ export class PrometheusConnector {
240
240
  resolvedSeries: promql,
241
241
  resolvedLabel: label,
242
242
  };
243
+ if (!result.summary) {
244
+ result.note = `No data: no '${params.metric}' series matched "${params.service}" in this window. ` +
245
+ "The service may expose logs only, or the metric name/label didn't match. Absent ≠ zero — summary is null rather than all-zeros.";
246
+ }
243
247
  if (params.groupBy && groups.length > 1) {
244
248
  result.groupBy = params.groupBy;
245
249
  result.groups = groups;
@@ -295,6 +299,9 @@ export class PrometheusConnector {
295
299
  resolvedSeries: rawQuery,
296
300
  resolvedLabel: "",
297
301
  };
302
+ if (!result.summary) {
303
+ result.note = "No data: the query returned no series in this window. Absent ≠ zero — summary is null rather than all-zeros.";
304
+ }
298
305
  if (groups.length > 1)
299
306
  result.groups = groups;
300
307
  return result;
@@ -475,7 +482,10 @@ export class PrometheusConnector {
475
482
  }
476
483
  computeSummary(values) {
477
484
  if (values.length === 0) {
478
- return { current: 0, average: 0, min: 0, max: 0, trend: "stable" };
485
+ // No data points no-data, NOT a confident all-zeros reading. Coercing
486
+ // an empty series to {current:0,trend:"stable"} is indistinguishable
487
+ // from a service genuinely idling at 0 (issue #462).
488
+ return null;
479
489
  }
480
490
  const current = values[values.length - 1];
481
491
  const average = values.reduce((a, b) => a + b, 0) / values.length;
@@ -53,13 +53,9 @@ describe("PrometheusConnector", () => {
53
53
  });
54
54
  });
55
55
  describe("computeSummary", () => {
56
- it("returns zeros for empty array", () => {
56
+ it("returns null for empty array — no-data, not a false all-zeros reading (#462)", () => {
57
57
  const s = proto.computeSummary([]);
58
- assert.equal(s.current, 0);
59
- assert.equal(s.average, 0);
60
- assert.equal(s.min, 0);
61
- assert.equal(s.max, 0);
62
- assert.equal(s.trend, "stable");
58
+ assert.equal(s, null);
63
59
  });
64
60
  it("computes correct summary for values", () => {
65
61
  const s = proto.computeSummary([10, 20, 30, 40]);
@@ -199,4 +195,34 @@ describe("PrometheusConnector", () => {
199
195
  }
200
196
  });
201
197
  });
198
+ describe("queryMetrics no-data → null summary, not zero-fill (#462)", () => {
199
+ const fakeSource = { name: "test", type: "prometheus", url: "http://localhost:9090", enabled: true };
200
+ it("an empty result set yields values:[], summary:null, and a no-data note", async () => {
201
+ const connector = new PrometheusConnector();
202
+ await connector.connect({ ...fakeSource });
203
+ const orig = globalThis.fetch;
204
+ // raw_query bypasses the candidate-probe / label-resolve path and runs
205
+ // query_range directly — here it returns an empty result set (the
206
+ // no-data case: a logs-only service has no such metric series).
207
+ globalThis.fetch = (async () => ({
208
+ ok: true,
209
+ status: 200,
210
+ json: async () => ({ data: { result: [] } }),
211
+ }));
212
+ try {
213
+ const result = await connector.queryMetrics({
214
+ service: "",
215
+ metric: "",
216
+ duration: "1h",
217
+ rawQuery: "rate(process_cpu_seconds_total{job=\"logs-only-svc\"}[1m]) * 100",
218
+ });
219
+ assert.deepEqual(result.values, [], "no data points");
220
+ assert.equal(result.summary, null, "summary must be null, not {current:0,...}");
221
+ assert.match(result.note ?? "", /No data/i, "must carry a no-data note");
222
+ }
223
+ finally {
224
+ globalThis.fetch = orig;
225
+ }
226
+ });
227
+ });
202
228
  });
package/dist/index.js CHANGED
@@ -522,7 +522,7 @@ async function main() {
522
522
  "Fetch the raw time-series for ONE metric of ONE service over a look-back window, returned together with pre-computed summary statistics.",
523
523
  "When to use: when you need the actual numeric values or the trend of a known metric. For a 'is this service OK?' verdict use `get_service_health`; to find which services are misbehaving use `detect_anomalies`.",
524
524
  "Prerequisites: get the exact service name from `list_services` and choose a metric from the list at the end of this description.",
525
- "Behavior: read-only, no side effects. Returns an ordered array of {timestamp, value} points plus a summary {current, average, min, max, trend}. With `groupBy` set, returns one labelled series per distinct label value under `groups` instead of a single aggregated series. Units depend on the metric (e.g. CPU as %, latency as ms, rates as per-second). An unknown service/metric or an unreachable backend yields a structured explanatory error, never an exception.",
525
+ "Behavior: read-only, no side effects. Returns an ordered array of {timestamp, value} points plus a summary {current, average, min, max, trend}. When no series matched (e.g. a logs-only service has no such metric), `values` is empty and `summary` is `null` (not all-zeros) with a `note` — absent data is not a real zero reading. With `groupBy` set, returns one labelled series per distinct label value under `groups` instead of a single aggregated series. Units depend on the metric (e.g. CPU as %, latency as ms, rates as per-second). An unknown service/metric or an unreachable backend yields a structured explanatory error, never an exception.",
526
526
  `Available metrics: ${metricsList}`,
527
527
  ].join(" "), {
528
528
  service: z
@@ -35,24 +35,24 @@ export async function getServiceHealthHandler(registry, args, ctx = defaultConte
35
35
  continue;
36
36
  try {
37
37
  const cpuResult = await connector.queryMetrics({ service: args.service, metric: "cpu", duration: "5m" });
38
- if (cpuResult.values.length > 0) {
38
+ if (cpuResult.summary) {
39
39
  cpu = cpuResult.summary.current;
40
40
  metricsHadData = true;
41
41
  }
42
42
  checkAnomaly(cpuResult.values.map(v => v.value), "cpu", args.service, connector.name, anomalies);
43
43
  const memResult = await connector.queryMetrics({ service: args.service, metric: "memory", duration: "5m" });
44
- if (memResult.values.length > 0) {
44
+ if (memResult.summary) {
45
45
  memory = memResult.summary.current / 1_000_000;
46
46
  metricsHadData = true;
47
47
  } // MB for display
48
48
  const errResult = await connector.queryMetrics({ service: args.service, metric: "error_rate", duration: "5m" });
49
- if (errResult.values.length > 0) {
49
+ if (errResult.summary) {
50
50
  errorRate = errResult.summary.current;
51
51
  metricsHadData = true;
52
52
  }
53
53
  checkAnomaly(errResult.values.map(v => v.value), "error_rate", args.service, connector.name, anomalies);
54
54
  const latResult = await connector.queryMetrics({ service: args.service, metric: "latency_p99", duration: "5m" });
55
- if (latResult.values.length > 0) {
55
+ if (latResult.summary) {
56
56
  latencyP99 = latResult.summary.current;
57
57
  metricsHadData = true;
58
58
  }
@@ -291,7 +291,9 @@ describe("getServiceHealthHandler — honest no-data / not-found (issue #453)",
291
291
  const emptySeries = () => ({
292
292
  source: "prom1", service: "x", metric: "x", unit: "",
293
293
  values: [],
294
- summary: { current: 0, average: 0, min: 0, max: 0, trend: "stable" },
294
+ // No data null summary (matches the real connector after #462), so the
295
+ // health handler treats it as no-coverage, not a real zero reading.
296
+ summary: null,
295
297
  });
296
298
  function metricsConnector(known) {
297
299
  return {
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,32 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { ConnectorRegistry } from "../connectors/registry.js";
4
+ import { queryLogsHandler } from "./query-logs.js";
5
+ // Inject a mock connector into the registry's internal maps.
6
+ function regWith(mock) {
7
+ const reg = new ConnectorRegistry();
8
+ reg.connectors.set(mock.name, mock);
9
+ reg.sourceConfigs.set(mock.name, { name: mock.name, type: mock.type, url: "http://mock", enabled: true });
10
+ return reg;
11
+ }
12
+ describe("queryLogsHandler error response shape (issue #452)", () => {
13
+ it("a failing query reports `window` (the look-back), not `duration` (read as wall-clock)", async () => {
14
+ // Mirrors the raw_query fail-fast case: the connector throws, the handler
15
+ // returns a structured error. The look-back window must be labelled
16
+ // `window`, never `duration` — an agent reading duration:"5m" on a <1s
17
+ // failure thinks it hung (the very symptom the fail-fast fix removed).
18
+ const mock = {
19
+ connect: async () => { }, disconnect: async () => { },
20
+ healthCheck: async () => ({ status: "up", latencyMs: 1 }),
21
+ getDefaultMetrics: () => [], getMetrics: () => [],
22
+ listServices: async () => [],
23
+ name: "loki1", type: "loki", signalType: "logs",
24
+ queryLogs: async () => { throw new Error("query_logs raw_query returned a 'matrix' result, but query_logs handles log lines (streams) only."); },
25
+ };
26
+ const result = await queryLogsHandler(regWith(mock), { raw_query: "sum(count_over_time({service_name=\"x\"} | json [1h]))", duration: "1h" }, undefined, { allowRawQuery: true });
27
+ const data = JSON.parse(result.content[0].text);
28
+ assert.ok(data.error, "must be an error response");
29
+ assert.equal(data.window, "1h", "look-back must be reported as `window`");
30
+ assert.equal("duration" in data, false, "must NOT carry a `duration` field (misread as elapsed time)");
31
+ });
32
+ });
@@ -119,7 +119,8 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext(), o
119
119
  }
120
120
  if (aggResults.length === 0) {
121
121
  return {
122
- content: [{ type: "text", text: JSON.stringify({ error: aggErrors.length ? `Aggregate failed: ${aggErrors.join("; ")}` : "No data returned", service: args.service, duration }) }],
122
+ // `window` = the requested look-back, not elapsed time (issue #452).
123
+ content: [{ type: "text", text: JSON.stringify({ error: aggErrors.length ? `Aggregate failed: ${aggErrors.join("; ")}` : "No data returned", service: args.service, window: duration }) }],
123
124
  isError: aggErrors.length > 0,
124
125
  };
125
126
  }
@@ -160,7 +161,9 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext(), o
160
161
  text: JSON.stringify({
161
162
  error: errors.length > 0 ? `Query failed: ${errors.join("; ")}` : "No logs returned",
162
163
  service: args.service,
163
- duration,
164
+ // The requested look-back window, NOT elapsed wall-clock time. Named
165
+ // `window` so a fast failure isn't misread as a 5-minute hang (#452).
166
+ window: duration,
164
167
  }),
165
168
  },
166
169
  ],
package/dist/types.d.ts CHANGED
@@ -182,7 +182,8 @@ export interface MetricSummary {
182
182
  export interface MetricGroup {
183
183
  key: string;
184
184
  values: DataPoint[];
185
- summary: MetricSummary;
185
+ /** null when this group has no data points — absent ≠ a real zero reading. */
186
+ summary: MetricSummary | null;
186
187
  }
187
188
  export interface MetricResult {
188
189
  source: string;
@@ -190,12 +191,15 @@ export interface MetricResult {
190
191
  metric: string;
191
192
  unit: string;
192
193
  values: DataPoint[];
193
- summary: MetricSummary;
194
+ /** null when `values` is empty (no series matched this service/metric) — a
195
+ * no-data signal, not a confident all-zeros reading (issue #462). */
196
+ summary: MetricSummary | null;
194
197
  resolvedSeries?: string;
195
198
  resolvedLabel?: string;
196
199
  groupBy?: string;
197
200
  groups?: MetricGroup[];
198
201
  hint?: string;
202
+ note?: string;
199
203
  }
200
204
  export interface LogEntry {
201
205
  timestamp: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thotischner/observability-mcp",
3
- "version": "3.3.1",
3
+ "version": "3.3.2",
4
4
  "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",