npm - @thotischner/observability-mcp - Versions diffs - 3.3.1 → 3.3.2 - Mend

@thotischner/observability-mcp 3.3.1 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/analysis/correlator.js +1 -1
package/dist/connectors/loki.test.js +17 -0
package/dist/connectors/prometheus.js +11 -1
package/dist/connectors/prometheus.test.js +32 -6
package/dist/index.js +1 -1
package/dist/tools/get-service-health.js +4 -4
package/dist/tools/handlers.test.js +3 -1
package/dist/tools/query-logs-error-shape.test.d.ts +1 -0
package/dist/tools/query-logs-error-shape.test.js +32 -0
package/dist/tools/query-logs.js +5 -2
package/dist/types.d.ts +6 -2
package/package.json +1 -1

package/dist/analysis/correlator.js CHANGED Viewed

@@ -21,7 +21,7 @@ export function correlateSignals(anomalies, logResults, metricResults) {
         for (const metric of serviceMetrics) {
             if (metric.metric === anomaly.metric)
                 continue;
-            if (metric.summary.trend === "rising") {
+            if (metric.summary && metric.summary.trend === "rising") {
                 correlations.push(`${anomaly.service}: ${anomaly.metric} anomaly coincides with rising ${metric.metric} ` +
                     `(current: ${metric.summary.current.toFixed(2)})`);
             }

package/dist/connectors/loki.test.js CHANGED Viewed

@@ -146,6 +146,23 @@ describe("Q-LOG2: buildAggregateLogQL", () => {
         assert.equal(r.step, "900s");
         assert.equal(r.logql, `sum by (url) (count_over_time(${PIPE} [900s]))`);
     });
+    it("count_over_time with a label-filter pipeline + no by → valid sum-wrapped LogQL (#452 leftover #2)", () => {
+        // The reporter saw an intermittent 400 on a label-filtered count_over_time
+        // and wondered if the collapse path emits different LogQL when a filter is
+        // present. It does not: the label filter lives in the streamPipeline
+        // (identical to the sum/topk path, which works), and the count_over_time
+        // branch wraps it verbatim. Assert the emitted LogQL is well-formed —
+        // `sum (count_over_time({sel} | json | environment="prod" [step]))` — so
+        // any future regression in the generated query is caught here.
+        const filtered = '{service_name="app"} | json | environment="prod"';
+        const r = buildAggregateLogQL(filtered, { op: "count_over_time", step: "1h" }, "6h");
+        assert.equal(r.mode, "range");
+        assert.equal(r.step, "3600s");
+        assert.equal(r.logql, `sum (count_over_time(${filtered} [3600s]))`);
+        // Structural sanity: balanced parens, sum-wrapped, single range selector.
+        assert.equal((r.logql.match(/\(/g) || []).length, (r.logql.match(/\)/g) || []).length);
+        assert.match(r.logql, /^sum \(count_over_time\(.*\[\d+s\]\)\)$/);
+    });
     it("count_over_time without by → sum-wrapped (single series), default step (#452)", () => {
         // Regression for issue #452: a bare count_over_time over a `| json` stream
         // keeps every extracted label as its own series. With no `by` we must

package/dist/connectors/prometheus.js CHANGED Viewed

@@ -240,6 +240,10 @@ export class PrometheusConnector {
             resolvedSeries: promql,
             resolvedLabel: label,
         };
+        if (!result.summary) {
+            result.note = `No data: no '${params.metric}' series matched "${params.service}" in this window. ` +
+                "The service may expose logs only, or the metric name/label didn't match. Absent ≠ zero — summary is null rather than all-zeros.";
+        }
         if (params.groupBy && groups.length > 1) {
             result.groupBy = params.groupBy;
             result.groups = groups;
@@ -295,6 +299,9 @@ export class PrometheusConnector {
             resolvedSeries: rawQuery,
             resolvedLabel: "",
         };
+        if (!result.summary) {
+            result.note = "No data: the query returned no series in this window. Absent ≠ zero — summary is null rather than all-zeros.";
+        }
         if (groups.length > 1)
             result.groups = groups;
         return result;
@@ -475,7 +482,10 @@ export class PrometheusConnector {
     }
     computeSummary(values) {
         if (values.length === 0) {
-            return { current: 0, average: 0, min: 0, max: 0, trend: "stable" };
+            // No data points → no-data, NOT a confident all-zeros reading. Coercing
+            // an empty series to {current:0,trend:"stable"} is indistinguishable
+            // from a service genuinely idling at 0 (issue #462).
+            return null;
         }
         const current = values[values.length - 1];
         const average = values.reduce((a, b) => a + b, 0) / values.length;

package/dist/connectors/prometheus.test.js CHANGED Viewed

@@ -53,13 +53,9 @@ describe("PrometheusConnector", () => {
         });
     });
     describe("computeSummary", () => {
-        it("returns zeros for empty array", () => {
+        it("returns null for empty array — no-data, not a false all-zeros reading (#462)", () => {
             const s = proto.computeSummary([]);
-            assert.equal(s.current, 0);
-            assert.equal(s.average, 0);
-            assert.equal(s.min, 0);
-            assert.equal(s.max, 0);
-            assert.equal(s.trend, "stable");
+            assert.equal(s, null);
         });
         it("computes correct summary for values", () => {
             const s = proto.computeSummary([10, 20, 30, 40]);
@@ -199,4 +195,34 @@ describe("PrometheusConnector", () => {
             }
         });
     });
+    describe("queryMetrics no-data → null summary, not zero-fill (#462)", () => {
+        const fakeSource = { name: "test", type: "prometheus", url: "http://localhost:9090", enabled: true };
+        it("an empty result set yields values:[], summary:null, and a no-data note", async () => {
+            const connector = new PrometheusConnector();
+            await connector.connect({ ...fakeSource });
+            const orig = globalThis.fetch;
+            // raw_query bypasses the candidate-probe / label-resolve path and runs
+            // query_range directly — here it returns an empty result set (the
+            // no-data case: a logs-only service has no such metric series).
+            globalThis.fetch = (async () => ({
+                ok: true,
+                status: 200,
+                json: async () => ({ data: { result: [] } }),
+            }));
+            try {
+                const result = await connector.queryMetrics({
+                    service: "",
+                    metric: "",
+                    duration: "1h",
+                    rawQuery: "rate(process_cpu_seconds_total{job=\"logs-only-svc\"}[1m]) * 100",
+                });
+                assert.deepEqual(result.values, [], "no data points");
+                assert.equal(result.summary, null, "summary must be null, not {current:0,...}");
+                assert.match(result.note ?? "", /No data/i, "must carry a no-data note");
+            }
+            finally {
+                globalThis.fetch = orig;
+            }
+        });
+    });
 });

package/dist/index.js CHANGED Viewed

@@ -522,7 +522,7 @@ async function main() {
             "Fetch the raw time-series for ONE metric of ONE service over a look-back window, returned together with pre-computed summary statistics.",
             "When to use: when you need the actual numeric values or the trend of a known metric. For a 'is this service OK?' verdict use `get_service_health`; to find which services are misbehaving use `detect_anomalies`.",
             "Prerequisites: get the exact service name from `list_services` and choose a metric from the list at the end of this description.",
-            "Behavior: read-only, no side effects. Returns an ordered array of {timestamp, value} points plus a summary {current, average, min, max, trend}. With `groupBy` set, returns one labelled series per distinct label value under `groups` instead of a single aggregated series. Units depend on the metric (e.g. CPU as %, latency as ms, rates as per-second). An unknown service/metric or an unreachable backend yields a structured explanatory error, never an exception.",
+            "Behavior: read-only, no side effects. Returns an ordered array of {timestamp, value} points plus a summary {current, average, min, max, trend}. When no series matched (e.g. a logs-only service has no such metric), `values` is empty and `summary` is `null` (not all-zeros) with a `note` — absent data is not a real zero reading. With `groupBy` set, returns one labelled series per distinct label value under `groups` instead of a single aggregated series. Units depend on the metric (e.g. CPU as %, latency as ms, rates as per-second). An unknown service/metric or an unreachable backend yields a structured explanatory error, never an exception.",
             `Available metrics: ${metricsList}`,
         ].join(" "), {
             service: z

package/dist/tools/get-service-health.js CHANGED Viewed

@@ -35,24 +35,24 @@ export async function getServiceHealthHandler(registry, args, ctx = defaultConte
             continue;
         try {
             const cpuResult = await connector.queryMetrics({ service: args.service, metric: "cpu", duration: "5m" });
-            if (cpuResult.values.length > 0) {
+            if (cpuResult.summary) {
                 cpu = cpuResult.summary.current;
                 metricsHadData = true;
             }
             checkAnomaly(cpuResult.values.map(v => v.value), "cpu", args.service, connector.name, anomalies);
             const memResult = await connector.queryMetrics({ service: args.service, metric: "memory", duration: "5m" });
-            if (memResult.values.length > 0) {
+            if (memResult.summary) {
                 memory = memResult.summary.current / 1_000_000;
                 metricsHadData = true;
             } // MB for display
             const errResult = await connector.queryMetrics({ service: args.service, metric: "error_rate", duration: "5m" });
-            if (errResult.values.length > 0) {
+            if (errResult.summary) {
                 errorRate = errResult.summary.current;
                 metricsHadData = true;
             }
             checkAnomaly(errResult.values.map(v => v.value), "error_rate", args.service, connector.name, anomalies);
             const latResult = await connector.queryMetrics({ service: args.service, metric: "latency_p99", duration: "5m" });
-            if (latResult.values.length > 0) {
+            if (latResult.summary) {
                 latencyP99 = latResult.summary.current;
                 metricsHadData = true;
             }

package/dist/tools/handlers.test.js CHANGED Viewed

@@ -291,7 +291,9 @@ describe("getServiceHealthHandler — honest no-data / not-found (issue #453)",
     const emptySeries = () => ({
         source: "prom1", service: "x", metric: "x", unit: "",
         values: [],
-        summary: { current: 0, average: 0, min: 0, max: 0, trend: "stable" },
+        // No data → null summary (matches the real connector after #462), so the
+        // health handler treats it as no-coverage, not a real zero reading.
+        summary: null,
     });
     function metricsConnector(known) {
         return {

package/dist/tools/query-logs-error-shape.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/tools/query-logs-error-shape.test.js ADDED Viewed

@@ -0,0 +1,32 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { ConnectorRegistry } from "../connectors/registry.js";
+import { queryLogsHandler } from "./query-logs.js";
+// Inject a mock connector into the registry's internal maps.
+function regWith(mock) {
+    const reg = new ConnectorRegistry();
+    reg.connectors.set(mock.name, mock);
+    reg.sourceConfigs.set(mock.name, { name: mock.name, type: mock.type, url: "http://mock", enabled: true });
+    return reg;
+}
+describe("queryLogsHandler error response shape (issue #452)", () => {
+    it("a failing query reports `window` (the look-back), not `duration` (read as wall-clock)", async () => {
+        // Mirrors the raw_query fail-fast case: the connector throws, the handler
+        // returns a structured error. The look-back window must be labelled
+        // `window`, never `duration` — an agent reading duration:"5m" on a <1s
+        // failure thinks it hung (the very symptom the fail-fast fix removed).
+        const mock = {
+            connect: async () => { }, disconnect: async () => { },
+            healthCheck: async () => ({ status: "up", latencyMs: 1 }),
+            getDefaultMetrics: () => [], getMetrics: () => [],
+            listServices: async () => [],
+            name: "loki1", type: "loki", signalType: "logs",
+            queryLogs: async () => { throw new Error("query_logs raw_query returned a 'matrix' result, but query_logs handles log lines (streams) only."); },
+        };
+        const result = await queryLogsHandler(regWith(mock), { raw_query: "sum(count_over_time({service_name=\"x\"} | json [1h]))", duration: "1h" }, undefined, { allowRawQuery: true });
+        const data = JSON.parse(result.content[0].text);
+        assert.ok(data.error, "must be an error response");
+        assert.equal(data.window, "1h", "look-back must be reported as `window`");
+        assert.equal("duration" in data, false, "must NOT carry a `duration` field (misread as elapsed time)");
+    });
+});

package/dist/tools/query-logs.js CHANGED Viewed

@@ -119,7 +119,8 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext(), o
         }
         if (aggResults.length === 0) {
             return {
-                content: [{ type: "text", text: JSON.stringify({ error: aggErrors.length ? `Aggregate failed: ${aggErrors.join("; ")}` : "No data returned", service: args.service, duration }) }],
+                // `window` = the requested look-back, not elapsed time (issue #452).
+                content: [{ type: "text", text: JSON.stringify({ error: aggErrors.length ? `Aggregate failed: ${aggErrors.join("; ")}` : "No data returned", service: args.service, window: duration }) }],
                 isError: aggErrors.length > 0,
             };
         }
@@ -160,7 +161,9 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext(), o
                     text: JSON.stringify({
                         error: errors.length > 0 ? `Query failed: ${errors.join("; ")}` : "No logs returned",
                         service: args.service,
-                        duration,
+                        // The requested look-back window, NOT elapsed wall-clock time. Named
+                        // `window` so a fast failure isn't misread as a 5-minute hang (#452).
+                        window: duration,
                     }),
                 },
             ],

package/dist/types.d.ts CHANGED Viewed

@@ -182,7 +182,8 @@ export interface MetricSummary {
 export interface MetricGroup {
     key: string;
     values: DataPoint[];
-    summary: MetricSummary;
+    /** null when this group has no data points — absent ≠ a real zero reading. */
+    summary: MetricSummary | null;
 }
 export interface MetricResult {
     source: string;
@@ -190,12 +191,15 @@ export interface MetricResult {
     metric: string;
     unit: string;
     values: DataPoint[];
-    summary: MetricSummary;
+    /** null when `values` is empty (no series matched this service/metric) — a
+     *  no-data signal, not a confident all-zeros reading (issue #462). */
+    summary: MetricSummary | null;
     resolvedSeries?: string;
     resolvedLabel?: string;
     groupBy?: string;
     groups?: MetricGroup[];
     hint?: string;
+    note?: string;
 }
 export interface LogEntry {
     timestamp: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@thotischner/observability-mcp",
-  "version": "3.3.1",
+  "version": "3.3.2",
   "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
   "type": "module",
   "license": "Apache-2.0",