@thotischner/observability-mcp 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,8 @@ export declare class LokiConnector implements ObservabilityConnector {
7
7
  private baseUrl;
8
8
  private auth?;
9
9
  private tlsAgent?;
10
+ private serviceLabels;
11
+ private labelValuesCache;
10
12
  connect(config: SourceConfig): Promise<void>;
11
13
  getDefaultMetrics(): MetricDefinition[];
12
14
  getMetrics(): MetricDefinition[];
@@ -15,6 +17,8 @@ export declare class LokiConnector implements ObservabilityConnector {
15
17
  disconnect(): Promise<void>;
16
18
  listServices(): Promise<ServiceInfo[]>;
17
19
  queryLogs(params: LogQuery): Promise<LogResult>;
20
+ private getLabelValues;
21
+ private resolveServiceLabel;
18
22
  private parseLine;
19
23
  private extractTopPatterns;
20
24
  private parseTimeRange;
@@ -1,4 +1,6 @@
1
1
  import { buildTlsAgent } from "./tls.js";
2
+ const DEFAULT_SERVICE_LABELS = ["service_name", "service", "job", "app", "container"];
3
+ const LABEL_CACHE_TTL_MS = 60_000;
2
4
  export class LokiConnector {
3
5
  type = "loki";
4
6
  signalType = "logs";
@@ -6,11 +8,17 @@ export class LokiConnector {
6
8
  baseUrl = "";
7
9
  auth;
8
10
  tlsAgent;
11
+ serviceLabels = DEFAULT_SERVICE_LABELS;
12
+ labelValuesCache = new Map();
9
13
  async connect(config) {
10
14
  this.name = config.name;
11
15
  this.baseUrl = config.url.replace(/\/$/, "");
12
16
  this.auth = config.auth;
13
17
  this.tlsAgent = buildTlsAgent(config);
18
+ const envLabels = process.env.LOKI_SERVICE_LABELS;
19
+ if (envLabels) {
20
+ this.serviceLabels = envLabels.split(",").map((s) => s.trim()).filter(Boolean);
21
+ }
14
22
  }
15
23
  getDefaultMetrics() {
16
24
  // Loki is a log backend — no metric definitions by default
@@ -30,13 +38,14 @@ export class LokiConnector {
30
38
  async healthCheck() {
31
39
  const start = Date.now();
32
40
  try {
33
- const res = await fetch(`${this.baseUrl}/ready`, this.fetchOptions());
34
- const text = await res.text();
35
- const isReady = res.ok && text.trim() === "ready";
41
+ // Use the labels query API instead of /ready: managed Loki (Grafana
42
+ // Cloud, etc.) does not expose the operational health endpoint.
43
+ // /loki/api/v1/labels returns 200 with auth on any reachable Loki.
44
+ const res = await fetch(`${this.baseUrl}/loki/api/v1/labels`, this.fetchOptions());
36
45
  return {
37
- status: isReady ? "up" : "down",
46
+ status: res.ok ? "up" : "down",
38
47
  latencyMs: Date.now() - start,
39
- message: isReady ? "Loki is ready" : `HTTP ${res.status}: ${text}`,
48
+ message: res.ok ? "Loki is ready" : `HTTP ${res.status}`,
40
49
  };
41
50
  }
42
51
  catch (err) {
@@ -45,23 +54,35 @@ export class LokiConnector {
45
54
  }
46
55
  async disconnect() { }
47
56
  async listServices() {
48
- try {
49
- const data = await this.apiGet("/loki/api/v1/label/service/values");
50
- return (data?.data || []).map((name) => ({
51
- name,
52
- source: this.name,
53
- signalType: "logs",
54
- }));
55
- }
56
- catch {
57
- return [];
57
+ // Probe each candidate label and merge values. Loki streams may identify
58
+ // services via service_name, service, job, app, or container depending on
59
+ // the shipper configuration. Walking all candidates ensures historical
60
+ // streams remain reachable when label conventions change over time.
61
+ const seen = new Map();
62
+ for (const label of this.serviceLabels) {
63
+ const values = await this.getLabelValues(label);
64
+ for (const name of values) {
65
+ if (!seen.has(name)) {
66
+ seen.set(name, {
67
+ name,
68
+ source: this.name,
69
+ signalType: "logs",
70
+ labels: { discoveredVia: label },
71
+ });
72
+ }
73
+ }
58
74
  }
75
+ return Array.from(seen.values());
59
76
  }
60
77
  async queryLogs(params) {
61
78
  const { start, end } = this.parseTimeRange(params.duration);
62
79
  const limit = Math.min(Math.max(params.limit || 100, 1), 1000);
80
+ // Resolve which label this service identifier lives under. Falls back to
81
+ // the first configured label when no exact match is found, preserving
82
+ // legacy behavior for callers passing labels that aren't in the cache yet.
83
+ const matchedLabel = await this.resolveServiceLabel(params.service);
63
84
  const service = this.escapeLogQLValue(params.service);
64
- let logql = `{service="${service}"}`;
85
+ let logql = `{${matchedLabel}="${service}"}`;
65
86
  if (params.level) {
66
87
  const level = this.escapeLogQLValue(params.level);
67
88
  logql += ` | json | level="${level}"`;
@@ -108,6 +129,33 @@ export class LokiConnector {
108
129
  };
109
130
  }
110
131
  // --- Private helpers ---
132
+ async getLabelValues(label) {
133
+ const cached = this.labelValuesCache.get(label);
134
+ if (cached && cached.expiresAt > Date.now()) {
135
+ return cached.values;
136
+ }
137
+ try {
138
+ const data = await this.apiGet(`/loki/api/v1/label/${encodeURIComponent(label)}/values`);
139
+ const values = data?.data || [];
140
+ this.labelValuesCache.set(label, {
141
+ values,
142
+ expiresAt: Date.now() + LABEL_CACHE_TTL_MS,
143
+ });
144
+ return values;
145
+ }
146
+ catch {
147
+ this.labelValuesCache.set(label, { values: [], expiresAt: Date.now() + LABEL_CACHE_TTL_MS });
148
+ return [];
149
+ }
150
+ }
151
+ async resolveServiceLabel(service) {
152
+ for (const label of this.serviceLabels) {
153
+ const values = await this.getLabelValues(label);
154
+ if (values.includes(service))
155
+ return label;
156
+ }
157
+ return this.serviceLabels[0] || "service_name";
158
+ }
111
159
  parseLine(line) {
112
160
  try {
113
161
  return JSON.parse(line);
@@ -15,6 +15,7 @@ export declare class PrometheusConnector implements ObservabilityConnector {
15
15
  healthCheck(): Promise<ConnectorHealth>;
16
16
  disconnect(): Promise<void>;
17
17
  listServices(): Promise<ServiceInfo[]>;
18
+ private listServicesFromJobLabel;
18
19
  listAvailableMetrics(_service: string): Promise<MetricInfo[]>;
19
20
  queryMetrics(params: MetricQuery): Promise<MetricResult>;
20
21
  private buildQuery;
@@ -38,7 +38,11 @@ export class PrometheusConnector {
38
38
  async healthCheck() {
39
39
  const start = Date.now();
40
40
  try {
41
- const res = await fetch(`${this.baseUrl}/-/ready`, this.fetchOptions());
41
+ // Use the query API instead of /-/ready: works on both OSS Prometheus
42
+ // and managed offerings (Grafana Cloud / Mimir, AWS Managed Prometheus,
43
+ // Chronosphere) which do not expose the operational health endpoint.
44
+ // 'up' is a synthetic metric guaranteed to exist on any Prometheus.
45
+ const res = await fetch(`${this.baseUrl}/api/v1/query?query=up`, this.fetchOptions());
42
46
  return {
43
47
  status: res.ok ? "up" : "down",
44
48
  latencyMs: Date.now() - start,
@@ -51,21 +55,47 @@ export class PrometheusConnector {
51
55
  }
52
56
  async disconnect() { }
53
57
  async listServices() {
54
- const data = await this.apiGet("/api/v1/targets");
55
- const targets = data?.data?.activeTargets || [];
56
- const services = new Map();
57
- for (const t of targets) {
58
- const name = t.labels?.service || t.labels?.job || t.discoveredLabels?.__address__ || "unknown";
59
- if (!services.has(name)) {
60
- services.set(name, {
61
- name,
62
- source: this.name,
63
- signalType: "metrics",
64
- labels: t.labels,
65
- });
58
+ // Prefer /api/v1/targets — gives full label detail incl. service/job/address.
59
+ // Managed Prometheus (Mimir, AMP, Chronosphere) returns 404 on this path
60
+ // because targets are an operational concept of the OSS scraper. Fall back
61
+ // to /api/v1/label/job/values, which is the canonical query-time source
62
+ // for service names and is supported everywhere.
63
+ try {
64
+ const data = await this.apiGet("/api/v1/targets");
65
+ const targets = data?.data?.activeTargets || [];
66
+ if (targets.length === 0) {
67
+ return await this.listServicesFromJobLabel();
68
+ }
69
+ const services = new Map();
70
+ for (const t of targets) {
71
+ const name = t.labels?.service || t.labels?.job || t.discoveredLabels?.__address__ || "unknown";
72
+ if (!services.has(name)) {
73
+ services.set(name, {
74
+ name,
75
+ source: this.name,
76
+ signalType: "metrics",
77
+ labels: t.labels,
78
+ });
79
+ }
80
+ }
81
+ return Array.from(services.values());
82
+ }
83
+ catch (err) {
84
+ const msg = String(err);
85
+ if (msg.includes("404")) {
86
+ return await this.listServicesFromJobLabel();
66
87
  }
88
+ throw err;
67
89
  }
68
- return Array.from(services.values());
90
+ }
91
+ async listServicesFromJobLabel() {
92
+ const data = await this.apiGet("/api/v1/label/job/values");
93
+ const jobs = data?.data || [];
94
+ return jobs.map((name) => ({
95
+ name,
96
+ source: this.name,
97
+ signalType: "metrics",
98
+ }));
69
99
  }
70
100
  async listAvailableMetrics(_service) {
71
101
  const data = await this.apiGet("/api/v1/metadata");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thotischner/observability-mcp",
3
- "version": "1.1.0",
3
+ "version": "1.1.2",
4
4
  "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -9,11 +9,21 @@
9
9
  "type": "git",
10
10
  "url": "https://github.com/ThoTischner/observability-mcp"
11
11
  },
12
- "keywords": ["mcp", "observability", "prometheus", "loki", "model-context-protocol", "anomaly-detection"],
12
+ "keywords": [
13
+ "mcp",
14
+ "observability",
15
+ "prometheus",
16
+ "loki",
17
+ "model-context-protocol",
18
+ "anomaly-detection"
19
+ ],
13
20
  "bin": {
14
21
  "observability-mcp": "./dist/index.js"
15
22
  },
16
- "files": ["dist", "config"],
23
+ "files": [
24
+ "dist",
25
+ "config"
26
+ ],
17
27
  "scripts": {
18
28
  "dev": "tsx watch src/index.ts",
19
29
  "build": "tsc && cp -r src/ui dist/ui",