@thotischner/observability-mcp 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import type { Config, GeneralSettings, HealthThresholds } from "../types.js";
2
2
  export declare const DEFAULT_SETTINGS: GeneralSettings;
3
3
  export declare const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds;
4
+ export declare function substituteEnv(raw: string): string;
4
5
  export declare function loadConfig(): Config;
5
6
  export declare function saveConfig(config: Config): void;
@@ -23,10 +23,21 @@ export const DEFAULT_HEALTH_THRESHOLDS = {
23
23
  logErrors: { good: 1, warn: 5, crit: 20 },
24
24
  statusBoundaries: { healthy: 80, degraded: 50 },
25
25
  };
26
+ export function substituteEnv(raw) {
27
+ return raw.replace(/\$\{([A-Z_][A-Z0-9_]*)(?::-([^}]*))?\}/gi, (_match, name, fallback) => {
28
+ const val = process.env[name];
29
+ if (val !== undefined)
30
+ return val;
31
+ if (fallback !== undefined)
32
+ return fallback;
33
+ console.warn(`[config] env var \${${name}} is undefined`);
34
+ return "";
35
+ });
36
+ }
26
37
  export function loadConfig() {
27
38
  try {
28
39
  const raw = readFileSync(CONFIG_PATH, "utf-8");
29
- const parsed = yaml.load(raw);
40
+ const parsed = yaml.load(substituteEnv(raw));
30
41
  return {
31
42
  sources: parsed?.sources || [],
32
43
  settings: { ...DEFAULT_SETTINGS, ...parsed?.settings },
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
3
3
  import { writeFileSync, mkdirSync, rmSync, existsSync } from "node:fs";
4
4
  import { join } from "node:path";
5
5
  import { tmpdir } from "node:os";
6
+ import { substituteEnv } from "./loader.js";
6
7
  // We test the helper functions by importing the module fresh with different env vars.
7
8
  // Since the config path is resolved at import time, we use dynamic imports.
8
9
  const TMP_DIR = join(tmpdir(), "observability-mcp-test-" + Date.now());
@@ -129,6 +130,67 @@ sources:
129
130
  assert.equal(config.healthThresholds.statusBoundaries.healthy, 80);
130
131
  });
131
132
  });
133
+ describe("substituteEnv", () => {
134
+ it("replaces ${VAR} with process.env value", () => {
135
+ process.env.TEST_FOO = "bar";
136
+ assert.equal(substituteEnv('value: "${TEST_FOO}"'), 'value: "bar"');
137
+ delete process.env.TEST_FOO;
138
+ });
139
+ it("uses default with ${VAR:-default} when unset", () => {
140
+ delete process.env.TEST_UNSET;
141
+ assert.equal(substituteEnv('value: "${TEST_UNSET:-fallback}"'), 'value: "fallback"');
142
+ });
143
+ it("prefers env value over default", () => {
144
+ process.env.TEST_SET = "real";
145
+ assert.equal(substituteEnv('value: "${TEST_SET:-fallback}"'), 'value: "real"');
146
+ delete process.env.TEST_SET;
147
+ });
148
+ it("returns empty string for undefined var without default", () => {
149
+ delete process.env.TEST_MISSING;
150
+ const origWarn = console.warn;
151
+ console.warn = () => { };
152
+ try {
153
+ assert.equal(substituteEnv('value: "${TEST_MISSING}"'), 'value: ""');
154
+ }
155
+ finally {
156
+ console.warn = origWarn;
157
+ }
158
+ });
159
+ it("leaves yaml without placeholders unchanged", () => {
160
+ const yaml = "sources:\n - name: prom\n url: http://localhost:9090\n";
161
+ assert.equal(substituteEnv(yaml), yaml);
162
+ });
163
+ it("handles multiple substitutions in one string", () => {
164
+ process.env.A = "1";
165
+ process.env.B = "2";
166
+ assert.equal(substituteEnv("${A}-${B}-${C:-3}"), "1-2-3");
167
+ delete process.env.A;
168
+ delete process.env.B;
169
+ });
170
+ it("substitutes inside loaded YAML config", async () => {
171
+ process.env.GRAFANA_USER = "12345";
172
+ process.env.GRAFANA_TOKEN = "secret-token";
173
+ const configPath = join(TMP_DIR, "envsubst.yaml");
174
+ writeFileSync(configPath, `
175
+ sources:
176
+ - name: grafana
177
+ type: prometheus
178
+ url: https://grafana.example.com
179
+ enabled: true
180
+ auth:
181
+ type: basic
182
+ username: "\${GRAFANA_USER}"
183
+ password: "\${GRAFANA_TOKEN}"
184
+ `);
185
+ process.env.CONFIG_PATH = configPath;
186
+ const mod = await import("./loader.js?" + Date.now());
187
+ const config = mod.loadConfig();
188
+ assert.equal(config.sources[0].auth?.username, "12345");
189
+ assert.equal(config.sources[0].auth?.password, "secret-token");
190
+ delete process.env.GRAFANA_USER;
191
+ delete process.env.GRAFANA_TOKEN;
192
+ });
193
+ });
132
194
  describe("config merging", () => {
133
195
  it("merges partial settings with defaults", async () => {
134
196
  const configPath = join(TMP_DIR, "partial.yaml");
@@ -30,13 +30,14 @@ export class LokiConnector {
30
30
  async healthCheck() {
31
31
  const start = Date.now();
32
32
  try {
33
- const res = await fetch(`${this.baseUrl}/ready`, this.fetchOptions());
34
- const text = await res.text();
35
- const isReady = res.ok && text.trim() === "ready";
33
+ // Use the labels query API instead of /ready: managed Loki (Grafana
34
+ // Cloud, etc.) does not expose the operational health endpoint.
35
+ // /loki/api/v1/labels returns 200 with auth on any reachable Loki.
36
+ const res = await fetch(`${this.baseUrl}/loki/api/v1/labels`, this.fetchOptions());
36
37
  return {
37
- status: isReady ? "up" : "down",
38
+ status: res.ok ? "up" : "down",
38
39
  latencyMs: Date.now() - start,
39
- message: isReady ? "Loki is ready" : `HTTP ${res.status}: ${text}`,
40
+ message: res.ok ? "Loki is ready" : `HTTP ${res.status}`,
40
41
  };
41
42
  }
42
43
  catch (err) {
@@ -15,6 +15,7 @@ export declare class PrometheusConnector implements ObservabilityConnector {
15
15
  healthCheck(): Promise<ConnectorHealth>;
16
16
  disconnect(): Promise<void>;
17
17
  listServices(): Promise<ServiceInfo[]>;
18
+ private listServicesFromJobLabel;
18
19
  listAvailableMetrics(_service: string): Promise<MetricInfo[]>;
19
20
  queryMetrics(params: MetricQuery): Promise<MetricResult>;
20
21
  private buildQuery;
@@ -38,7 +38,11 @@ export class PrometheusConnector {
38
38
  async healthCheck() {
39
39
  const start = Date.now();
40
40
  try {
41
- const res = await fetch(`${this.baseUrl}/-/ready`, this.fetchOptions());
41
+ // Use the query API instead of /-/ready: works on both OSS Prometheus
42
+ // and managed offerings (Grafana Cloud / Mimir, AWS Managed Prometheus,
43
+ // Chronosphere) which do not expose the operational health endpoint.
44
+ // 'up' is a synthetic metric guaranteed to exist on any Prometheus.
45
+ const res = await fetch(`${this.baseUrl}/api/v1/query?query=up`, this.fetchOptions());
42
46
  return {
43
47
  status: res.ok ? "up" : "down",
44
48
  latencyMs: Date.now() - start,
@@ -51,21 +55,47 @@ export class PrometheusConnector {
51
55
  }
52
56
  async disconnect() { }
53
57
  async listServices() {
54
- const data = await this.apiGet("/api/v1/targets");
55
- const targets = data?.data?.activeTargets || [];
56
- const services = new Map();
57
- for (const t of targets) {
58
- const name = t.labels?.service || t.labels?.job || t.discoveredLabels?.__address__ || "unknown";
59
- if (!services.has(name)) {
60
- services.set(name, {
61
- name,
62
- source: this.name,
63
- signalType: "metrics",
64
- labels: t.labels,
65
- });
58
+ // Prefer /api/v1/targets — gives full label detail incl. service/job/address.
59
+ // Managed Prometheus (Mimir, AMP, Chronosphere) returns 404 on this path
60
+ // because targets are an operational concept of the OSS scraper. Fall back
61
+ // to /api/v1/label/job/values, which is the canonical query-time source
62
+ // for service names and is supported everywhere.
63
+ try {
64
+ const data = await this.apiGet("/api/v1/targets");
65
+ const targets = data?.data?.activeTargets || [];
66
+ if (targets.length === 0) {
67
+ return await this.listServicesFromJobLabel();
68
+ }
69
+ const services = new Map();
70
+ for (const t of targets) {
71
+ const name = t.labels?.service || t.labels?.job || t.discoveredLabels?.__address__ || "unknown";
72
+ if (!services.has(name)) {
73
+ services.set(name, {
74
+ name,
75
+ source: this.name,
76
+ signalType: "metrics",
77
+ labels: t.labels,
78
+ });
79
+ }
80
+ }
81
+ return Array.from(services.values());
82
+ }
83
+ catch (err) {
84
+ const msg = String(err);
85
+ if (msg.includes("404")) {
86
+ return await this.listServicesFromJobLabel();
66
87
  }
88
+ throw err;
67
89
  }
68
- return Array.from(services.values());
90
+ }
91
+ async listServicesFromJobLabel() {
92
+ const data = await this.apiGet("/api/v1/label/job/values");
93
+ const jobs = data?.data || [];
94
+ return jobs.map((name) => ({
95
+ name,
96
+ source: this.name,
97
+ signalType: "metrics",
98
+ }));
69
99
  }
70
100
  async listAvailableMetrics(_service) {
71
101
  const data = await this.apiGet("/api/v1/metadata");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thotischner/observability-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.1.1",
4
4
  "description": "Unified observability gateway for AI agents — one MCP server for Prometheus, Loki, and any backend",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -9,11 +9,21 @@
9
9
  "type": "git",
10
10
  "url": "https://github.com/ThoTischner/observability-mcp"
11
11
  },
12
- "keywords": ["mcp", "observability", "prometheus", "loki", "model-context-protocol", "anomaly-detection"],
12
+ "keywords": [
13
+ "mcp",
14
+ "observability",
15
+ "prometheus",
16
+ "loki",
17
+ "model-context-protocol",
18
+ "anomaly-detection"
19
+ ],
13
20
  "bin": {
14
21
  "observability-mcp": "./dist/index.js"
15
22
  },
16
- "files": ["dist", "config"],
23
+ "files": [
24
+ "dist",
25
+ "config"
26
+ ],
17
27
  "scripts": {
18
28
  "dev": "tsx watch src/index.ts",
19
29
  "build": "tsc && cp -r src/ui dist/ui",