@thotischner/observability-mcp 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,15 @@
1
1
  import { describe, it, beforeEach, afterEach } from "node:test";
2
2
  import assert from "node:assert/strict";
3
- import { writeFileSync, mkdirSync, rmSync, existsSync } from "node:fs";
3
+ import { writeFileSync, mkdtempSync, rmSync, existsSync } from "node:fs";
4
4
  import { join } from "node:path";
5
5
  import { tmpdir } from "node:os";
6
6
  import { substituteEnv } from "./loader.js";
7
7
  // We test the helper functions by importing the module fresh with different env vars.
8
8
  // Since the config path is resolved at import time, we use dynamic imports.
9
- const TMP_DIR = join(tmpdir(), "observability-mcp-test-" + Date.now());
9
+ let TMP_DIR;
10
10
  describe("config/loader", () => {
11
11
  beforeEach(() => {
12
- mkdirSync(TMP_DIR, { recursive: true });
12
+ TMP_DIR = mkdtempSync(join(tmpdir(), "observability-mcp-test-"));
13
13
  });
14
14
  afterEach(() => {
15
15
  rmSync(TMP_DIR, { recursive: true, force: true });
@@ -0,0 +1,43 @@
1
+ import type { ObservabilityConnector } from "./interface.js";
2
+ import type { ConnectorFactory, ConnectorManifest } from "../sdk/index.js";
3
+ export interface LoadedConnector {
4
+ /** Connector type id, e.g. "prometheus". Matches `source.type` in sources.yaml. */
5
+ name: string;
6
+ /** Where this connector came from (debug + UI display). */
7
+ source: "builtin" | "filesystem" | "config";
8
+ /** Optional metadata for plugins that ship a manifest.json. */
9
+ manifest?: ConnectorManifest;
10
+ factory: ConnectorFactory;
11
+ }
12
+ /**
13
+ * Resolves which connector implementations the server should know about,
14
+ * applying three sources in order (later overrides earlier):
15
+ * 1. builtin shim — Prometheus/Loki bundled with the server
16
+ * 2. filesystem — every subdir of PLUGINS_DIR with a valid package.json
17
+ * 3. config-pinned — `plugins:` block in sources.yaml (not yet wired)
18
+ *
19
+ * The legacy `connectorFactories` map in registry.ts can be replaced
20
+ * with this loader's output without changing observable behaviour.
21
+ */
22
+ export declare class PluginLoader {
23
+ private connectors;
24
+ private pluginsDir;
25
+ private disabled;
26
+ constructor(opts?: {
27
+ pluginsDir?: string;
28
+ disabled?: string[];
29
+ });
30
+ load(): Promise<void>;
31
+ list(): LoadedConnector[];
32
+ get(name: string): LoadedConnector | undefined;
33
+ has(name: string): boolean;
34
+ supportedTypes(): string[];
35
+ /** Create a fresh instance of a connector. Returns undefined for unknown types. */
36
+ create(name: string): ObservabilityConnector | undefined;
37
+ private loadBuiltins;
38
+ private loadFilesystem;
39
+ private loadFilesystemPlugin;
40
+ private register;
41
+ }
42
+ export declare function getPluginLoader(): PluginLoader;
43
+ export declare function setPluginLoader(loader: PluginLoader): void;
@@ -0,0 +1,170 @@
1
+ import { readdirSync, readFileSync, existsSync, statSync } from "node:fs";
2
+ import { join, resolve } from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ import { manifestSchema } from "../sdk/manifest-schema.js";
5
+ import { PrometheusConnector } from "./prometheus.js";
6
+ import { LokiConnector } from "./loki.js";
7
+ import { sanitizeForLog } from "../util/sanitize.js";
8
+ import { instrumentConnector } from "../metrics/instrument-connector.js";
9
+ /**
10
+ * Resolves which connector implementations the server should know about,
11
+ * applying three sources in order (later overrides earlier):
12
+ * 1. builtin shim — Prometheus/Loki bundled with the server
13
+ * 2. filesystem — every subdir of PLUGINS_DIR with a valid package.json
14
+ * 3. config-pinned — `plugins:` block in sources.yaml (not yet wired)
15
+ *
16
+ * The legacy `connectorFactories` map in registry.ts can be replaced
17
+ * with this loader's output without changing observable behaviour.
18
+ */
19
+ export class PluginLoader {
20
+ connectors = new Map();
21
+ pluginsDir;
22
+ disabled;
23
+ constructor(opts = {}) {
24
+ this.pluginsDir = opts.pluginsDir
25
+ ?? process.env.PLUGINS_DIR
26
+ ?? "/app/plugins";
27
+ // Per-plugin disable via env: PLUGINS_DISABLED="prometheus,loki"
28
+ const envDisabled = (process.env.PLUGINS_DISABLED ?? "")
29
+ .split(",")
30
+ .map((s) => s.trim())
31
+ .filter(Boolean);
32
+ this.disabled = new Set([...(opts.disabled ?? []), ...envDisabled]);
33
+ }
34
+ async load() {
35
+ this.loadBuiltins();
36
+ await this.loadFilesystem();
37
+ }
38
+ list() {
39
+ return Array.from(this.connectors.values());
40
+ }
41
+ get(name) {
42
+ return this.connectors.get(name);
43
+ }
44
+ has(name) {
45
+ return this.connectors.has(name);
46
+ }
47
+ supportedTypes() {
48
+ return Array.from(this.connectors.keys());
49
+ }
50
+ /** Create a fresh instance of a connector. Returns undefined for unknown types. */
51
+ create(name) {
52
+ const entry = this.connectors.get(name);
53
+ if (!entry)
54
+ return undefined;
55
+ const c = entry.factory();
56
+ if (c instanceof Promise) {
57
+ // For now connectors are sync-constructed; if a plugin returns a
58
+ // Promise we await it lazily in the consumer. Document if/when
59
+ // this becomes a real pattern.
60
+ throw new Error(`Connector ${name} returned a Promise; async factories not yet wired`);
61
+ }
62
+ return instrumentConnector(c);
63
+ }
64
+ loadBuiltins() {
65
+ this.register({
66
+ name: "prometheus",
67
+ source: "builtin",
68
+ factory: () => new PrometheusConnector(),
69
+ });
70
+ this.register({
71
+ name: "loki",
72
+ source: "builtin",
73
+ factory: () => new LokiConnector(),
74
+ });
75
+ }
76
+ async loadFilesystem() {
77
+ const dir = this.pluginsDir;
78
+ if (!existsSync(dir))
79
+ return;
80
+ let entries;
81
+ try {
82
+ entries = readdirSync(dir);
83
+ }
84
+ catch {
85
+ return;
86
+ }
87
+ for (const entry of entries) {
88
+ const pluginRoot = join(dir, entry);
89
+ try {
90
+ if (!statSync(pluginRoot).isDirectory())
91
+ continue;
92
+ await this.loadFilesystemPlugin(pluginRoot);
93
+ }
94
+ catch (err) {
95
+ console.warn("Failed to load plugin %s: %s", sanitizeForLog(entry), sanitizeForLog(String(err)));
96
+ }
97
+ }
98
+ }
99
+ async loadFilesystemPlugin(pluginRoot) {
100
+ const pkgPath = join(pluginRoot, "package.json");
101
+ if (!existsSync(pkgPath))
102
+ return;
103
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
104
+ const marker = pkg.observabilityMcp;
105
+ if (!marker || marker.kind !== "connector" || !marker.name)
106
+ return;
107
+ let manifest;
108
+ if (marker.manifest) {
109
+ const manifestPath = resolve(pluginRoot, marker.manifest);
110
+ if (existsSync(manifestPath)) {
111
+ const raw = JSON.parse(readFileSync(manifestPath, "utf8"));
112
+ const parsed = manifestSchema.safeParse(raw);
113
+ if (!parsed.success) {
114
+ const issues = parsed.error.issues
115
+ .map((i) => `${i.path.join(".")}: ${i.message}`)
116
+ .join("; ");
117
+ console.warn("Plugin %s has invalid manifest.json — %s; skipping", sanitizeForLog(marker.name), sanitizeForLog(issues));
118
+ return;
119
+ }
120
+ manifest = parsed.data;
121
+ if (manifest.name !== marker.name) {
122
+ console.warn("Plugin %s package.json marker name does not match manifest.json (%s); skipping", sanitizeForLog(marker.name), sanitizeForLog(manifest.name));
123
+ return;
124
+ }
125
+ }
126
+ }
127
+ const entryFile = pkg.main || "index.js";
128
+ const entryPath = resolve(pluginRoot, entryFile);
129
+ if (!existsSync(entryPath)) {
130
+ console.warn("Plugin %s missing entry file %s", sanitizeForLog(marker.name), sanitizeForLog(entryFile));
131
+ return;
132
+ }
133
+ const mod = await import(pathToFileURL(entryPath).href);
134
+ const factory = mod.default ?? mod.createConnector;
135
+ if (typeof factory !== "function") {
136
+ console.warn("Plugin %s has no default export factory", sanitizeForLog(marker.name));
137
+ return;
138
+ }
139
+ this.register({
140
+ name: marker.name,
141
+ source: "filesystem",
142
+ manifest,
143
+ factory,
144
+ });
145
+ console.log('Connector plugin "%s" loaded from %s', sanitizeForLog(marker.name), sanitizeForLog(pluginRoot));
146
+ }
147
+ register(entry) {
148
+ if (this.disabled.has(entry.name)) {
149
+ console.log("Connector %s disabled via PLUGINS_DISABLED; skipping", sanitizeForLog(entry.name));
150
+ return;
151
+ }
152
+ // Later sources override earlier ones; current call order is
153
+ // builtin → filesystem → config-pinned, matching the design doc.
154
+ this.connectors.set(entry.name, entry);
155
+ }
156
+ }
157
+ /**
158
+ * Singleton loader populated at server startup. The registry consults
159
+ * this for connector creation. Tests may swap in their own instance
160
+ * with `setPluginLoader`.
161
+ */
162
+ let activeLoader = null;
163
+ export function getPluginLoader() {
164
+ if (!activeLoader)
165
+ activeLoader = new PluginLoader();
166
+ return activeLoader;
167
+ }
168
+ export function setPluginLoader(loader) {
169
+ activeLoader = loader;
170
+ }
@@ -200,8 +200,9 @@ export class LokiConnector {
200
200
  return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
201
201
  }
202
202
  escapeLogQLRegex(value) {
203
- // Escape backticks which would break the LogQL regex delimiter
204
- return value.replace(/`/g, "\\`");
203
+ // Escape backslash first (so we don't double-escape sequences we add),
204
+ // then the backtick that delimits LogQL regex literals.
205
+ return value.replace(/\\/g, "\\\\").replace(/`/g, "\\`");
205
206
  }
206
207
  buildAuthHeaders() {
207
208
  if (!this.auth || this.auth.type === "none")
@@ -1,9 +1,12 @@
1
1
  import type { ObservabilityConnector } from "./interface.js";
2
2
  import type { Config, ConnectorHealth, SignalType, SourceConfig } from "../types.js";
3
+ import { type PluginLoader } from "./loader.js";
3
4
  export declare function getSupportedTypes(): string[];
4
5
  export declare class ConnectorRegistry {
5
6
  private connectors;
6
7
  private sourceConfigs;
8
+ private loader;
9
+ constructor(loader?: PluginLoader);
7
10
  initialize(config: Config): Promise<void>;
8
11
  private connectSource;
9
12
  addSource(source: SourceConfig): Promise<void>;
@@ -1,15 +1,15 @@
1
- import { PrometheusConnector } from "./prometheus.js";
2
- import { LokiConnector } from "./loki.js";
3
- const connectorFactories = {
4
- prometheus: () => new PrometheusConnector(),
5
- loki: () => new LokiConnector(),
6
- };
1
+ import { getPluginLoader } from "./loader.js";
2
+ import { sanitizeForLog } from "../util/sanitize.js";
7
3
  export function getSupportedTypes() {
8
- return Object.keys(connectorFactories);
4
+ return getPluginLoader().supportedTypes();
9
5
  }
10
6
  export class ConnectorRegistry {
11
7
  connectors = new Map();
12
8
  sourceConfigs = new Map();
9
+ loader;
10
+ constructor(loader = getPluginLoader()) {
11
+ this.loader = loader;
12
+ }
13
13
  async initialize(config) {
14
14
  for (const source of config.sources) {
15
15
  this.sourceConfigs.set(source.name, source);
@@ -19,19 +19,20 @@ export class ConnectorRegistry {
19
19
  }
20
20
  }
21
21
  async connectSource(source) {
22
- const factory = connectorFactories[source.type];
23
- if (!factory) {
24
- console.warn(`Unknown connector type: ${source.type}, skipping ${source.name}`);
22
+ const connector = this.loader.create(source.type);
23
+ const safeName = sanitizeForLog(source.name);
24
+ const safeType = sanitizeForLog(source.type);
25
+ if (!connector) {
26
+ console.warn("Unknown connector type: %s, skipping %s", safeType, safeName);
25
27
  return;
26
28
  }
27
- const connector = factory();
28
29
  try {
29
30
  await connector.connect(source);
30
31
  this.connectors.set(source.name, connector);
31
- console.log(`Connector "${source.name}" (${source.type}) connected`);
32
+ console.log('Connector "%s" (%s) connected', safeName, safeType);
32
33
  }
33
34
  catch (err) {
34
- console.error(`Failed to connect "${source.name}":`, err);
35
+ console.error('Failed to connect "%s":', safeName, err);
35
36
  }
36
37
  }
37
38
  async addSource(source) {
@@ -53,11 +54,10 @@ export class ConnectorRegistry {
53
54
  await this.addSource(source);
54
55
  }
55
56
  async testConnection(source) {
56
- const factory = connectorFactories[source.type];
57
- if (!factory) {
57
+ const connector = this.loader.create(source.type);
58
+ if (!connector) {
58
59
  return { status: "down", latencyMs: 0, message: `Unknown type: ${source.type}` };
59
60
  }
60
- const connector = factory();
61
61
  try {
62
62
  await connector.connect(source);
63
63
  const health = await connector.healthCheck();
@@ -1,11 +1,11 @@
1
1
  import { describe, it, before, after } from "node:test";
2
2
  import assert from "node:assert/strict";
3
3
  import { Agent } from "node:https";
4
- import { writeFileSync, mkdirSync, rmSync } from "node:fs";
4
+ import { writeFileSync, mkdtempSync, rmSync } from "node:fs";
5
5
  import { join } from "node:path";
6
6
  import { tmpdir } from "node:os";
7
7
  import { buildTlsAgent } from "./tls.js";
8
- const TMP_DIR = join(tmpdir(), "tls-test-" + Date.now());
8
+ let TMP_DIR;
9
9
  function makeConfig(overrides = {}) {
10
10
  return { name: "test", type: "prometheus", url: "https://localhost:9090", enabled: true, ...overrides };
11
11
  }
@@ -38,7 +38,7 @@ describe("buildTlsAgent", () => {
38
38
  });
39
39
  describe("with certificate files", () => {
40
40
  before(() => {
41
- mkdirSync(TMP_DIR, { recursive: true });
41
+ TMP_DIR = mkdtempSync(join(tmpdir(), "tls-test-"));
42
42
  writeFileSync(join(TMP_DIR, "ca.pem"), "-----BEGIN CERTIFICATE-----\nfake-ca\n-----END CERTIFICATE-----\n");
43
43
  writeFileSync(join(TMP_DIR, "client.pem"), "-----BEGIN CERTIFICATE-----\nfake-client\n-----END CERTIFICATE-----\n");
44
44
  writeFileSync(join(TMP_DIR, "client-key.pem"), "-----BEGIN PRIVATE KEY-----\nfake-key\n-----END PRIVATE KEY-----\n");
package/dist/index.js CHANGED
@@ -6,6 +6,9 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
6
6
  import { z } from "zod";
7
7
  import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
8
8
  import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
9
+ import { getPluginLoader } from "./connectors/loader.js";
10
+ import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
11
+ import { buildOpenApiSpec } from "./openapi.js";
9
12
  import { listSourcesHandler } from "./tools/list-sources.js";
10
13
  import { listServicesHandler } from "./tools/list-services.js";
11
14
  import { queryMetricsHandler } from "./tools/query-metrics.js";
@@ -14,7 +17,19 @@ import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-servic
14
17
  import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
15
18
  import { fileURLToPath } from "node:url";
16
19
  import { dirname, join } from "node:path";
20
+ import { readFileSync } from "node:fs";
17
21
  const __dirname = dirname(fileURLToPath(import.meta.url));
22
+ // Read once at startup; the file is shipped inside the image so this
23
+ // is the source of truth even when the user runs from `npx`.
24
+ const SERVER_VERSION = (() => {
25
+ try {
26
+ const pkg = JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf8"));
27
+ return pkg.version ?? "unknown";
28
+ }
29
+ catch {
30
+ return "unknown";
31
+ }
32
+ })();
18
33
  function applyConfigToRuntime(config, registry) {
19
34
  setHealthThresholds(config.healthThresholds);
20
35
  }
@@ -54,6 +69,7 @@ function validateSourceUrl(url) {
54
69
  }
55
70
  async function main() {
56
71
  let config = loadConfig();
72
+ await getPluginLoader().load();
57
73
  const registry = new ConnectorRegistry();
58
74
  await registry.initialize(config);
59
75
  applyConfigToRuntime(config, registry);
@@ -64,11 +80,11 @@ async function main() {
64
80
  function createMcpServer() {
65
81
  const mcpServer = new McpServer({
66
82
  name: "observability-mcp",
67
- version: "1.3.0",
83
+ version: SERVER_VERSION,
68
84
  });
69
85
  // --- Register tools with Zod schemas ---
70
- mcpServer.tool("list_sources", "List all configured observability backends and their connection status. Use this to discover what data sources are available.", {}, async () => listSourcesHandler(registry));
71
- mcpServer.tool("list_services", "List all monitored services discovered across all connected backends. Returns service names, their data sources, and signal types (metrics/logs).", { filter: z.string().optional().describe("Optional filter to match service names") }, async (args) => listServicesHandler(registry, args));
86
+ mcpServer.tool("list_sources", "List all configured observability backends and their connection status. Use this to discover what data sources are available.", {}, async () => withToolMetrics("list_sources", () => listSourcesHandler(registry)));
87
+ mcpServer.tool("list_services", "List all monitored services discovered across all connected backends. Returns service names, their data sources, and signal types (metrics/logs).", { filter: z.string().optional().describe("Optional filter to match service names") }, async (args) => withToolMetrics("list_services", () => listServicesHandler(registry, args)));
72
88
  const metricsList = getAvailableMetricNames(registry);
73
89
  const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
74
90
  const uniqueNames = [...new Set(metricNames)];
@@ -78,22 +94,22 @@ async function main() {
78
94
  duration: z.string().optional().describe("Time range (e.g. '5m', '1h', '24h'). Default: '5m'"),
79
95
  source: z.string().optional().describe("Specific source name. If omitted, queries all metrics backends."),
80
96
  groupBy: z.string().optional().describe("Label to break the result down by, e.g. 'instance', 'pod', 'node'. Returns one series per distinct value in 'groups'."),
81
- }, async (args) => queryMetricsHandler(registry, args));
97
+ }, async (args) => withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args)));
82
98
  mcpServer.tool("query_logs", "Query logs for a service over a given timeframe. Returns log entries with a summary including error/warning counts and top error patterns.", {
83
99
  service: z.string().describe("Service name (e.g. 'payment-service')"),
84
100
  query: z.string().optional().describe("Optional search query to filter log messages (regex supported)"),
85
101
  duration: z.string().optional().describe("Time range (e.g. '5m', '1h', '24h'). Default: '5m'"),
86
102
  level: z.string().optional().describe("Filter by log level: 'error', 'warn', 'info', 'debug'"),
87
103
  limit: z.number().optional().describe("Maximum log entries to return. Default: 100"),
88
- }, async (args) => queryLogsHandler(registry, args));
104
+ }, async (args) => withToolMetrics("query_logs", () => queryLogsHandler(registry, args)));
89
105
  mcpServer.tool("get_service_health", "Get an aggregated health overview for a service combining metrics AND logs. Returns health score (0-100), status (healthy/degraded/critical), key metrics, log error summary, anomalies, and cross-signal correlations.", {
90
106
  service: z.string().describe("Service name to check health for"),
91
- }, async (args) => getServiceHealthHandler(registry, args));
107
+ }, async (args) => withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args)));
92
108
  mcpServer.tool("detect_anomalies", "Scan for anomalies across all monitored services (or a specific one). Uses z-score analysis on metrics, checks log error spikes, and correlates signals. Returns anomalies with severity ratings.", {
93
109
  service: z.string().optional().describe("Specific service to scan. If omitted, scans all."),
94
110
  duration: z.string().optional().describe("Time range to analyze (e.g. '5m', '15m', '1h'). Default: '10m'"),
95
111
  sensitivity: z.enum(["low", "medium", "high"]).optional().describe("Detection sensitivity: low (>3σ), medium (>2σ), high (>1.5σ). Default: 'medium'"),
96
- }, async (args) => detectAnomaliesHandler(registry, args));
112
+ }, async (args) => withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args)));
97
113
  return mcpServer;
98
114
  }
99
115
  // --- HTTP server ---
@@ -107,6 +123,45 @@ async function main() {
107
123
  res.setHeader("Referrer-Policy", "strict-origin-when-cross-origin");
108
124
  next();
109
125
  });
126
+ // API request counter — emitted at response time so the `status` label
127
+ // is the real outcome. /metrics itself is excluded to avoid self-scrape
128
+ // amplification.
129
+ app.use((req, res, next) => {
130
+ if (req.path === "/metrics")
131
+ return next();
132
+ res.on("finish", () => {
133
+ // Group dynamic segments by the registered Express route when we
134
+ // have one, otherwise fall back to the literal path. This keeps
135
+ // label cardinality bounded.
136
+ const route = req.route?.path ?? req.path;
137
+ apiRequests.inc({ route, method: req.method, status: String(res.statusCode) });
138
+ });
139
+ next();
140
+ });
141
+ // k8s-convention liveness/readiness probes at the root of the path
142
+ // tree, no /api prefix. Helm chart points its probes here. Cheap
143
+ // enough to skip the request-counter middleware.
144
+ let ready = false;
145
+ app.get("/healthz", (_req, res) => res.type("text").send("ok"));
146
+ app.get("/readyz", (_req, res) => {
147
+ if (ready)
148
+ return res.type("text").send("ok");
149
+ return res.status(503).type("text").send("starting");
150
+ });
151
+ // OpenAPI 3.1 document for the /api/* surface.
152
+ app.get("/api/openapi.json", (_req, res) => {
153
+ res.json(buildOpenApiSpec(SERVER_VERSION));
154
+ });
155
+ // Self-monitoring — Prometheus scrape endpoint.
156
+ // Disabled with METRICS_ENABLED=false for environments that prefer
157
+ // sidecar agents. The Helm chart's ServiceMonitor template targets
158
+ // this endpoint when enabled.
159
+ if (process.env.METRICS_ENABLED !== "false") {
160
+ app.get("/metrics", async (_req, res) => {
161
+ res.set("Content-Type", selfRegistry.contentType);
162
+ res.end(await selfRegistry.metrics());
163
+ });
164
+ }
110
165
  // Serve Web UI
111
166
  app.use(express.static(join(__dirname, "ui")));
112
167
  // --- API endpoints for Web UI ---
@@ -135,6 +190,31 @@ async function main() {
135
190
  app.get("/api/source-types", (_req, res) => {
136
191
  res.json(getSupportedTypes());
137
192
  });
193
+ // Server info — version, loaded plugins, MCP protocol version, build metadata.
194
+ // Used by the Web UI footer and by operators to confirm what's deployed.
195
+ app.get("/api/info", async (_req, res) => {
196
+ const loader = getPluginLoader();
197
+ res.json({
198
+ name: "observability-mcp",
199
+ version: SERVER_VERSION,
200
+ mcpProtocolVersion: "2025-03-26",
201
+ build: {
202
+ commit: process.env.GIT_COMMIT || null,
203
+ date: process.env.BUILD_DATE || null,
204
+ },
205
+ runtime: {
206
+ node: process.version,
207
+ platform: process.platform,
208
+ arch: process.arch,
209
+ },
210
+ plugins: loader.list().map((p) => ({
211
+ name: p.name,
212
+ source: p.source,
213
+ version: p.manifest?.version ?? null,
214
+ signalTypes: p.manifest?.signalTypes ?? null,
215
+ })),
216
+ });
217
+ });
138
218
  // Add a new source
139
219
  app.post("/api/sources", async (req, res) => {
140
220
  const { name, type, url, enabled, auth, tls } = req.body;
@@ -366,6 +446,7 @@ async function main() {
366
446
  console.log(`Session ${sid} expired (idle)`);
367
447
  }
368
448
  }
449
+ mcpActiveSessions.set(transports.size);
369
450
  }, 5 * 60 * 1000);
370
451
  app.post("/mcp", async (req, res) => {
371
452
  const sessionId = req.headers["mcp-session-id"];
@@ -385,6 +466,7 @@ async function main() {
385
466
  break;
386
467
  }
387
468
  }
469
+ mcpActiveSessions.set(transports.size);
388
470
  };
389
471
  const sessionMcpServer = createMcpServer();
390
472
  await sessionMcpServer.connect(transport);
@@ -397,6 +479,7 @@ async function main() {
397
479
  transports.set(sid, transport);
398
480
  sessionLastActive.set(sid, Date.now());
399
481
  }
482
+ mcpActiveSessions.set(transports.size);
400
483
  });
401
484
  app.get("/mcp", async (req, res) => {
402
485
  const sessionId = req.headers["mcp-session-id"];
@@ -421,6 +504,7 @@ async function main() {
421
504
  });
422
505
  const PORT = parseInt(process.env.PORT || "3000");
423
506
  app.listen(PORT, () => {
507
+ ready = true;
424
508
  console.log(`observability-mcp server running on port ${PORT}`);
425
509
  console.log(` MCP endpoint: http://localhost:${PORT}/mcp`);
426
510
  console.log(` Web UI: http://localhost:${PORT}`);
@@ -0,0 +1,8 @@
1
+ import type { ObservabilityConnector } from "../connectors/interface.js";
2
+ /**
3
+ * Decorate a connector so every observable backend call increments
4
+ * obsmcp_connector_calls_total{source,type,operation,outcome}. The
5
+ * `source` label is filled in on first `connect()` once the config
6
+ * is known. Keeps connector implementations free of metrics code.
7
+ */
8
+ export declare function instrumentConnector<T extends ObservabilityConnector>(c: T): T;
@@ -0,0 +1,41 @@
1
+ import { connectorCalls } from "./self.js";
2
+ const OPS = [
3
+ "healthCheck",
4
+ "listServices",
5
+ "queryMetrics",
6
+ "queryLogs",
7
+ "listAvailableMetrics",
8
+ ];
9
+ /**
10
+ * Decorate a connector so every observable backend call increments
11
+ * obsmcp_connector_calls_total{source,type,operation,outcome}. The
12
+ * `source` label is filled in on first `connect()` once the config
13
+ * is known. Keeps connector implementations free of metrics code.
14
+ */
15
+ export function instrumentConnector(c) {
16
+ let source = "";
17
+ const type = c.type;
18
+ const wrappedConnect = c.connect.bind(c);
19
+ c.connect = async (config) => {
20
+ source = config.name;
21
+ return wrappedConnect(config);
22
+ };
23
+ for (const op of OPS) {
24
+ const fn = c[op];
25
+ if (typeof fn !== "function")
26
+ continue;
27
+ const bound = fn.bind(c);
28
+ c[op] = async (...args) => {
29
+ try {
30
+ const r = await bound(...args);
31
+ connectorCalls.inc({ source: source || "<pending>", type, operation: op, outcome: "ok" });
32
+ return r;
33
+ }
34
+ catch (err) {
35
+ connectorCalls.inc({ source: source || "<pending>", type, operation: op, outcome: "error" });
36
+ throw err;
37
+ }
38
+ };
39
+ }
40
+ return c;
41
+ }
@@ -0,0 +1,12 @@
1
+ import { Registry, Counter, Histogram, Gauge } from "prom-client";
2
+ export declare const selfRegistry: Registry<"text/plain; version=0.0.4; charset=utf-8">;
3
+ export declare const mcpToolCalls: Counter<"tool" | "outcome">;
4
+ export declare const mcpToolLatency: Histogram<"tool">;
5
+ export declare const connectorCalls: Counter<"type" | "source" | "outcome" | "operation">;
6
+ export declare const apiRequests: Counter<"status" | "route" | "method">;
7
+ export declare const mcpActiveSessions: Gauge<string>;
8
+ /**
9
+ * Wrap a (potentially async) tool handler to record call count + latency.
10
+ * Outcome is "ok" or "error" — never throws on its own.
11
+ */
12
+ export declare function withToolMetrics<T>(tool: string, fn: () => Promise<T>): Promise<T>;
@@ -0,0 +1,61 @@
1
+ // Server self-metrics exposed at /metrics for Prometheus scraping.
2
+ // Pairs with the Helm chart's ServiceMonitor template.
3
+ //
4
+ // Default Node metrics (CPU, memory, event loop lag, heap) come from
5
+ // prom-client's collectDefaultMetrics. On top of that we ship four
6
+ // product-specific counters/histograms that operators actually need
7
+ // to graph: MCP tool calls, connector backend calls, /api/* requests,
8
+ // active session count.
9
+ import { Registry, collectDefaultMetrics, Counter, Histogram, Gauge, } from "prom-client";
10
+ export const selfRegistry = new Registry();
11
+ selfRegistry.setDefaultLabels({ service: "observability-mcp" });
12
+ collectDefaultMetrics({ register: selfRegistry, prefix: "obsmcp_" });
13
+ export const mcpToolCalls = new Counter({
14
+ name: "obsmcp_mcp_tool_calls_total",
15
+ help: "MCP tool invocations by tool and outcome.",
16
+ labelNames: ["tool", "outcome"],
17
+ registers: [selfRegistry],
18
+ });
19
+ export const mcpToolLatency = new Histogram({
20
+ name: "obsmcp_mcp_tool_duration_seconds",
21
+ help: "MCP tool invocation latency.",
22
+ labelNames: ["tool"],
23
+ buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
24
+ registers: [selfRegistry],
25
+ });
26
+ export const connectorCalls = new Counter({
27
+ name: "obsmcp_connector_calls_total",
28
+ help: "Calls to a configured connector, by source and outcome.",
29
+ labelNames: ["source", "type", "operation", "outcome"],
30
+ registers: [selfRegistry],
31
+ });
32
+ export const apiRequests = new Counter({
33
+ name: "obsmcp_api_requests_total",
34
+ help: "Web UI / API request count, by route and status.",
35
+ labelNames: ["route", "method", "status"],
36
+ registers: [selfRegistry],
37
+ });
38
+ export const mcpActiveSessions = new Gauge({
39
+ name: "obsmcp_mcp_active_sessions",
40
+ help: "Active MCP Streamable HTTP sessions.",
41
+ registers: [selfRegistry],
42
+ });
43
+ /**
44
+ * Wrap a (potentially async) tool handler to record call count + latency.
45
+ * Outcome is "ok" or "error" — never throws on its own.
46
+ */
47
+ export async function withToolMetrics(tool, fn) {
48
+ const end = mcpToolLatency.startTimer({ tool });
49
+ try {
50
+ const r = await fn();
51
+ mcpToolCalls.inc({ tool, outcome: "ok" });
52
+ return r;
53
+ }
54
+ catch (err) {
55
+ mcpToolCalls.inc({ tool, outcome: "error" });
56
+ throw err;
57
+ }
58
+ finally {
59
+ end();
60
+ }
61
+ }