@thotischner/observability-mcp 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ export declare class PrometheusConnector implements ObservabilityConnector {
10
10
  private metrics;
11
11
  private serviceLabels;
12
12
  private labelValuesCache;
13
- private metricNamesCache;
13
+ private candidateCache;
14
14
  private userOverrides;
15
15
  connect(config: SourceConfig): Promise<void>;
16
16
  getDefaultMetrics(): MetricDefinition[];
@@ -24,7 +24,7 @@ export declare class PrometheusConnector implements ObservabilityConnector {
24
24
  queryMetrics(params: MetricQuery): Promise<MetricResult>;
25
25
  private buildQuery;
26
26
  private pickMetricCandidate;
27
- private getAllMetricNames;
27
+ private seriesExistsForService;
28
28
  private resolveServiceLabel;
29
29
  private getLabelValues;
30
30
  private getUnit;
@@ -78,7 +78,7 @@ export class PrometheusConnector {
78
78
  metrics = [];
79
79
  serviceLabels = DEFAULT_SERVICE_LABELS;
80
80
  labelValuesCache = new Map();
81
- metricNamesCache = null;
81
+ candidateCache = new Map();
82
82
  userOverrides = new Set();
83
83
  async connect(config) {
84
84
  this.name = config.name;
@@ -213,57 +213,73 @@ export class PrometheusConnector {
213
213
  }
214
214
  // --- Private helpers ---
215
215
  async buildQuery(service, metric) {
216
- // Pick the query template. For built-in metrics with no user override,
217
- // probe candidate series in the backend and pick the first that exists
218
- // (e.g. prom-client process_cpu_seconds_total → falls back to
219
- // node_exporter node_cpu_seconds_total). User-overridden metrics use
220
- // their query verbatim.
216
+ // Resolve the service-filter label first. Candidate probing uses this
217
+ // label to scope existence checks per-service rather than per-source.
218
+ const escaped = service.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
219
+ let label = "job";
221
220
  let template;
222
221
  if (!this.userOverrides.has(metric) && PROMETHEUS_METRIC_CANDIDATES[metric]) {
223
- const candidate = await this.pickMetricCandidate(metric);
222
+ label = await this.resolveServiceLabel(service);
223
+ const candidate = await this.pickMetricCandidate(metric, label, escaped);
224
224
  template = candidate?.query || PROMETHEUS_METRIC_CANDIDATES[metric][0].query;
225
225
  }
226
226
  else {
227
227
  const def = this.metrics.find((m) => m.name === metric);
228
228
  template = def?.query || `${metric}{ {{selector}} }`;
229
229
  }
230
- const escaped = service.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
231
230
  let promql = template;
232
- let label = "job";
233
231
  if (template.includes("{{selector}}")) {
234
- label = await this.resolveServiceLabel(service);
232
+ // Resolve label here for non-candidate paths that haven't done it yet.
233
+ if (label === "job" && !PROMETHEUS_METRIC_CANDIDATES[metric]) {
234
+ label = await this.resolveServiceLabel(service);
235
+ }
235
236
  const selector = `${label}="${escaped}"`;
236
237
  promql = promql.replace(/\{\{selector\}\}/g, selector);
237
238
  }
238
239
  promql = promql.replace(/\{\{service\}\}/g, escaped);
239
240
  return { promql, label };
240
241
  }
241
- async pickMetricCandidate(metric) {
242
+ async pickMetricCandidate(metric, label, escapedService) {
242
243
  const candidates = PROMETHEUS_METRIC_CANDIDATES[metric];
243
244
  if (!candidates || candidates.length === 0)
244
245
  return null;
245
246
  if (candidates.length === 1)
246
247
  return candidates[0];
247
- const allNames = await this.getAllMetricNames();
248
+ // Per-service cache: a source can have BOTH process_* and node_* series
249
+ // present (e.g. an apps stack alongside node_exporter), so probing has
250
+ // to check whether THIS service has the series, not whether the source
251
+ // has it anywhere.
252
+ const cacheKey = `${metric}|${label}|${escapedService}`;
253
+ const cached = this.candidateCache.get(cacheKey);
254
+ if (cached && cached.expiresAt > Date.now())
255
+ return cached.candidate;
248
256
  for (const c of candidates) {
249
- if (allNames.has(c.seriesName))
257
+ if (await this.seriesExistsForService(c.seriesName, label, escapedService)) {
258
+ this.candidateCache.set(cacheKey, {
259
+ candidate: c,
260
+ expiresAt: Date.now() + LABEL_CACHE_TTL_MS,
261
+ });
250
262
  return c;
263
+ }
251
264
  }
252
- return candidates[0];
265
+ // Nothing found — return first candidate as best-effort. Cache the
266
+ // negative outcome so we don't probe again for 60s.
267
+ const fallback = candidates[0];
268
+ this.candidateCache.set(cacheKey, {
269
+ candidate: fallback,
270
+ expiresAt: Date.now() + LABEL_CACHE_TTL_MS,
271
+ });
272
+ return fallback;
253
273
  }
254
- async getAllMetricNames() {
255
- if (this.metricNamesCache && this.metricNamesCache.expiresAt > Date.now()) {
256
- return this.metricNamesCache.values;
257
- }
274
+ async seriesExistsForService(seriesName, label, escapedService) {
258
275
  try {
259
- const data = await this.apiGet("/api/v1/label/__name__/values");
260
- const values = new Set(data?.data || []);
261
- this.metricNamesCache = { values, expiresAt: Date.now() + LABEL_CACHE_TTL_MS };
262
- return values;
276
+ const matchExpr = `${seriesName}{${label}="${escapedService}"}`;
277
+ const url = `/api/v1/series?match[]=${encodeURIComponent(matchExpr)}`;
278
+ const data = await this.apiGet(url);
279
+ return Array.isArray(data?.data) && data.data.length > 0;
263
280
  }
264
281
  catch {
265
- this.metricNamesCache = { values: new Set(), expiresAt: Date.now() + LABEL_CACHE_TTL_MS };
266
- return new Set();
282
+ return false;
267
283
  }
268
284
  }
269
285
  async resolveServiceLabel(service) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thotischner/observability-mcp",
3
- "version": "1.2.1",
3
+ "version": "1.2.2",
4
4
  "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
5
5
  "type": "module",
6
6
  "license": "MIT",