@thotischner/observability-mcp 1.3.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/loader.test.js +3 -3
- package/dist/connectors/loader.d.ts +43 -0
- package/dist/connectors/loader.js +170 -0
- package/dist/connectors/loki.js +3 -2
- package/dist/connectors/registry.d.ts +3 -0
- package/dist/connectors/registry.js +16 -16
- package/dist/connectors/tls.test.js +3 -3
- package/dist/index.js +91 -7
- package/dist/metrics/instrument-connector.d.ts +8 -0
- package/dist/metrics/instrument-connector.js +41 -0
- package/dist/metrics/self.d.ts +12 -0
- package/dist/metrics/self.js +61 -0
- package/dist/openapi.d.ts +2 -0
- package/dist/openapi.js +186 -0
- package/dist/sdk/index.d.ts +46 -0
- package/dist/sdk/index.js +13 -0
- package/dist/sdk/manifest-schema.d.ts +27 -0
- package/dist/sdk/manifest-schema.js +36 -0
- package/dist/sdk/manifest-schema.test.d.ts +1 -0
- package/dist/sdk/manifest-schema.test.js +50 -0
- package/dist/tools/get-service-health.js +3 -2
- package/dist/ui/index.html +568 -111
- package/dist/util/sanitize.d.ts +1 -0
- package/dist/util/sanitize.js +6 -0
- package/package.json +13 -5
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { describe, it, beforeEach, afterEach } from "node:test";
|
|
2
2
|
import assert from "node:assert/strict";
|
|
3
|
-
import { writeFileSync,
|
|
3
|
+
import { writeFileSync, mkdtempSync, rmSync, existsSync } from "node:fs";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { tmpdir } from "node:os";
|
|
6
6
|
import { substituteEnv } from "./loader.js";
|
|
7
7
|
// We test the helper functions by importing the module fresh with different env vars.
|
|
8
8
|
// Since the config path is resolved at import time, we use dynamic imports.
|
|
9
|
-
|
|
9
|
+
let TMP_DIR;
|
|
10
10
|
describe("config/loader", () => {
|
|
11
11
|
beforeEach(() => {
|
|
12
|
-
|
|
12
|
+
TMP_DIR = mkdtempSync(join(tmpdir(), "observability-mcp-test-"));
|
|
13
13
|
});
|
|
14
14
|
afterEach(() => {
|
|
15
15
|
rmSync(TMP_DIR, { recursive: true, force: true });
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { ObservabilityConnector } from "./interface.js";
|
|
2
|
+
import type { ConnectorFactory, ConnectorManifest } from "../sdk/index.js";
|
|
3
|
+
export interface LoadedConnector {
|
|
4
|
+
/** Connector type id, e.g. "prometheus". Matches `source.type` in sources.yaml. */
|
|
5
|
+
name: string;
|
|
6
|
+
/** Where this connector came from (debug + UI display). */
|
|
7
|
+
source: "builtin" | "filesystem" | "config";
|
|
8
|
+
/** Optional metadata for plugins that ship a manifest.json. */
|
|
9
|
+
manifest?: ConnectorManifest;
|
|
10
|
+
factory: ConnectorFactory;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Resolves which connector implementations the server should know about,
|
|
14
|
+
* applying three sources in order (later overrides earlier):
|
|
15
|
+
* 1. builtin shim — Prometheus/Loki bundled with the server
|
|
16
|
+
* 2. filesystem — every subdir of PLUGINS_DIR with a valid package.json
|
|
17
|
+
* 3. config-pinned — `plugins:` block in sources.yaml (not yet wired)
|
|
18
|
+
*
|
|
19
|
+
* The legacy `connectorFactories` map in registry.ts can be replaced
|
|
20
|
+
* with this loader's output without changing observable behaviour.
|
|
21
|
+
*/
|
|
22
|
+
export declare class PluginLoader {
|
|
23
|
+
private connectors;
|
|
24
|
+
private pluginsDir;
|
|
25
|
+
private disabled;
|
|
26
|
+
constructor(opts?: {
|
|
27
|
+
pluginsDir?: string;
|
|
28
|
+
disabled?: string[];
|
|
29
|
+
});
|
|
30
|
+
load(): Promise<void>;
|
|
31
|
+
list(): LoadedConnector[];
|
|
32
|
+
get(name: string): LoadedConnector | undefined;
|
|
33
|
+
has(name: string): boolean;
|
|
34
|
+
supportedTypes(): string[];
|
|
35
|
+
/** Create a fresh instance of a connector. Returns undefined for unknown types. */
|
|
36
|
+
create(name: string): ObservabilityConnector | undefined;
|
|
37
|
+
private loadBuiltins;
|
|
38
|
+
private loadFilesystem;
|
|
39
|
+
private loadFilesystemPlugin;
|
|
40
|
+
private register;
|
|
41
|
+
}
|
|
42
|
+
export declare function getPluginLoader(): PluginLoader;
|
|
43
|
+
export declare function setPluginLoader(loader: PluginLoader): void;
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { readdirSync, readFileSync, existsSync, statSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
import { manifestSchema } from "../sdk/manifest-schema.js";
|
|
5
|
+
import { PrometheusConnector } from "./prometheus.js";
|
|
6
|
+
import { LokiConnector } from "./loki.js";
|
|
7
|
+
import { sanitizeForLog } from "../util/sanitize.js";
|
|
8
|
+
import { instrumentConnector } from "../metrics/instrument-connector.js";
|
|
9
|
+
/**
|
|
10
|
+
* Resolves which connector implementations the server should know about,
|
|
11
|
+
* applying three sources in order (later overrides earlier):
|
|
12
|
+
* 1. builtin shim — Prometheus/Loki bundled with the server
|
|
13
|
+
* 2. filesystem — every subdir of PLUGINS_DIR with a valid package.json
|
|
14
|
+
* 3. config-pinned — `plugins:` block in sources.yaml (not yet wired)
|
|
15
|
+
*
|
|
16
|
+
* The legacy `connectorFactories` map in registry.ts can be replaced
|
|
17
|
+
* with this loader's output without changing observable behaviour.
|
|
18
|
+
*/
|
|
19
|
+
export class PluginLoader {
|
|
20
|
+
connectors = new Map();
|
|
21
|
+
pluginsDir;
|
|
22
|
+
disabled;
|
|
23
|
+
constructor(opts = {}) {
|
|
24
|
+
this.pluginsDir = opts.pluginsDir
|
|
25
|
+
?? process.env.PLUGINS_DIR
|
|
26
|
+
?? "/app/plugins";
|
|
27
|
+
// Per-plugin disable via env: PLUGINS_DISABLED="prometheus,loki"
|
|
28
|
+
const envDisabled = (process.env.PLUGINS_DISABLED ?? "")
|
|
29
|
+
.split(",")
|
|
30
|
+
.map((s) => s.trim())
|
|
31
|
+
.filter(Boolean);
|
|
32
|
+
this.disabled = new Set([...(opts.disabled ?? []), ...envDisabled]);
|
|
33
|
+
}
|
|
34
|
+
async load() {
|
|
35
|
+
this.loadBuiltins();
|
|
36
|
+
await this.loadFilesystem();
|
|
37
|
+
}
|
|
38
|
+
list() {
|
|
39
|
+
return Array.from(this.connectors.values());
|
|
40
|
+
}
|
|
41
|
+
get(name) {
|
|
42
|
+
return this.connectors.get(name);
|
|
43
|
+
}
|
|
44
|
+
has(name) {
|
|
45
|
+
return this.connectors.has(name);
|
|
46
|
+
}
|
|
47
|
+
supportedTypes() {
|
|
48
|
+
return Array.from(this.connectors.keys());
|
|
49
|
+
}
|
|
50
|
+
/** Create a fresh instance of a connector. Returns undefined for unknown types. */
|
|
51
|
+
create(name) {
|
|
52
|
+
const entry = this.connectors.get(name);
|
|
53
|
+
if (!entry)
|
|
54
|
+
return undefined;
|
|
55
|
+
const c = entry.factory();
|
|
56
|
+
if (c instanceof Promise) {
|
|
57
|
+
// For now connectors are sync-constructed; if a plugin returns a
|
|
58
|
+
// Promise we await it lazily in the consumer. Document if/when
|
|
59
|
+
// this becomes a real pattern.
|
|
60
|
+
throw new Error(`Connector ${name} returned a Promise; async factories not yet wired`);
|
|
61
|
+
}
|
|
62
|
+
return instrumentConnector(c);
|
|
63
|
+
}
|
|
64
|
+
loadBuiltins() {
|
|
65
|
+
this.register({
|
|
66
|
+
name: "prometheus",
|
|
67
|
+
source: "builtin",
|
|
68
|
+
factory: () => new PrometheusConnector(),
|
|
69
|
+
});
|
|
70
|
+
this.register({
|
|
71
|
+
name: "loki",
|
|
72
|
+
source: "builtin",
|
|
73
|
+
factory: () => new LokiConnector(),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
async loadFilesystem() {
|
|
77
|
+
const dir = this.pluginsDir;
|
|
78
|
+
if (!existsSync(dir))
|
|
79
|
+
return;
|
|
80
|
+
let entries;
|
|
81
|
+
try {
|
|
82
|
+
entries = readdirSync(dir);
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
for (const entry of entries) {
|
|
88
|
+
const pluginRoot = join(dir, entry);
|
|
89
|
+
try {
|
|
90
|
+
if (!statSync(pluginRoot).isDirectory())
|
|
91
|
+
continue;
|
|
92
|
+
await this.loadFilesystemPlugin(pluginRoot);
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
console.warn("Failed to load plugin %s: %s", sanitizeForLog(entry), sanitizeForLog(String(err)));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
async loadFilesystemPlugin(pluginRoot) {
|
|
100
|
+
const pkgPath = join(pluginRoot, "package.json");
|
|
101
|
+
if (!existsSync(pkgPath))
|
|
102
|
+
return;
|
|
103
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
104
|
+
const marker = pkg.observabilityMcp;
|
|
105
|
+
if (!marker || marker.kind !== "connector" || !marker.name)
|
|
106
|
+
return;
|
|
107
|
+
let manifest;
|
|
108
|
+
if (marker.manifest) {
|
|
109
|
+
const manifestPath = resolve(pluginRoot, marker.manifest);
|
|
110
|
+
if (existsSync(manifestPath)) {
|
|
111
|
+
const raw = JSON.parse(readFileSync(manifestPath, "utf8"));
|
|
112
|
+
const parsed = manifestSchema.safeParse(raw);
|
|
113
|
+
if (!parsed.success) {
|
|
114
|
+
const issues = parsed.error.issues
|
|
115
|
+
.map((i) => `${i.path.join(".")}: ${i.message}`)
|
|
116
|
+
.join("; ");
|
|
117
|
+
console.warn("Plugin %s has invalid manifest.json — %s; skipping", sanitizeForLog(marker.name), sanitizeForLog(issues));
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
manifest = parsed.data;
|
|
121
|
+
if (manifest.name !== marker.name) {
|
|
122
|
+
console.warn("Plugin %s package.json marker name does not match manifest.json (%s); skipping", sanitizeForLog(marker.name), sanitizeForLog(manifest.name));
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const entryFile = pkg.main || "index.js";
|
|
128
|
+
const entryPath = resolve(pluginRoot, entryFile);
|
|
129
|
+
if (!existsSync(entryPath)) {
|
|
130
|
+
console.warn("Plugin %s missing entry file %s", sanitizeForLog(marker.name), sanitizeForLog(entryFile));
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
const mod = await import(pathToFileURL(entryPath).href);
|
|
134
|
+
const factory = mod.default ?? mod.createConnector;
|
|
135
|
+
if (typeof factory !== "function") {
|
|
136
|
+
console.warn("Plugin %s has no default export factory", sanitizeForLog(marker.name));
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
this.register({
|
|
140
|
+
name: marker.name,
|
|
141
|
+
source: "filesystem",
|
|
142
|
+
manifest,
|
|
143
|
+
factory,
|
|
144
|
+
});
|
|
145
|
+
console.log('Connector plugin "%s" loaded from %s', sanitizeForLog(marker.name), sanitizeForLog(pluginRoot));
|
|
146
|
+
}
|
|
147
|
+
register(entry) {
|
|
148
|
+
if (this.disabled.has(entry.name)) {
|
|
149
|
+
console.log("Connector %s disabled via PLUGINS_DISABLED; skipping", sanitizeForLog(entry.name));
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
// Later sources override earlier ones; current call order is
|
|
153
|
+
// builtin → filesystem → config-pinned, matching the design doc.
|
|
154
|
+
this.connectors.set(entry.name, entry);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Singleton loader populated at server startup. The registry consults
|
|
159
|
+
* this for connector creation. Tests may swap in their own instance
|
|
160
|
+
* with `setPluginLoader`.
|
|
161
|
+
*/
|
|
162
|
+
let activeLoader = null;
|
|
163
|
+
export function getPluginLoader() {
|
|
164
|
+
if (!activeLoader)
|
|
165
|
+
activeLoader = new PluginLoader();
|
|
166
|
+
return activeLoader;
|
|
167
|
+
}
|
|
168
|
+
export function setPluginLoader(loader) {
|
|
169
|
+
activeLoader = loader;
|
|
170
|
+
}
|
package/dist/connectors/loki.js
CHANGED
|
@@ -200,8 +200,9 @@ export class LokiConnector {
|
|
|
200
200
|
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
201
201
|
}
|
|
202
202
|
escapeLogQLRegex(value) {
|
|
203
|
-
// Escape
|
|
204
|
-
|
|
203
|
+
// Escape backslash first (so we don't double-escape sequences we add),
|
|
204
|
+
// then the backtick that delimits LogQL regex literals.
|
|
205
|
+
return value.replace(/\\/g, "\\\\").replace(/`/g, "\\`");
|
|
205
206
|
}
|
|
206
207
|
buildAuthHeaders() {
|
|
207
208
|
if (!this.auth || this.auth.type === "none")
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import type { ObservabilityConnector } from "./interface.js";
|
|
2
2
|
import type { Config, ConnectorHealth, SignalType, SourceConfig } from "../types.js";
|
|
3
|
+
import { type PluginLoader } from "./loader.js";
|
|
3
4
|
export declare function getSupportedTypes(): string[];
|
|
4
5
|
export declare class ConnectorRegistry {
|
|
5
6
|
private connectors;
|
|
6
7
|
private sourceConfigs;
|
|
8
|
+
private loader;
|
|
9
|
+
constructor(loader?: PluginLoader);
|
|
7
10
|
initialize(config: Config): Promise<void>;
|
|
8
11
|
private connectSource;
|
|
9
12
|
addSource(source: SourceConfig): Promise<void>;
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
const connectorFactories = {
|
|
4
|
-
prometheus: () => new PrometheusConnector(),
|
|
5
|
-
loki: () => new LokiConnector(),
|
|
6
|
-
};
|
|
1
|
+
import { getPluginLoader } from "./loader.js";
|
|
2
|
+
import { sanitizeForLog } from "../util/sanitize.js";
|
|
7
3
|
export function getSupportedTypes() {
|
|
8
|
-
return
|
|
4
|
+
return getPluginLoader().supportedTypes();
|
|
9
5
|
}
|
|
10
6
|
export class ConnectorRegistry {
|
|
11
7
|
connectors = new Map();
|
|
12
8
|
sourceConfigs = new Map();
|
|
9
|
+
loader;
|
|
10
|
+
constructor(loader = getPluginLoader()) {
|
|
11
|
+
this.loader = loader;
|
|
12
|
+
}
|
|
13
13
|
async initialize(config) {
|
|
14
14
|
for (const source of config.sources) {
|
|
15
15
|
this.sourceConfigs.set(source.name, source);
|
|
@@ -19,19 +19,20 @@ export class ConnectorRegistry {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
async connectSource(source) {
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
const connector = this.loader.create(source.type);
|
|
23
|
+
const safeName = sanitizeForLog(source.name);
|
|
24
|
+
const safeType = sanitizeForLog(source.type);
|
|
25
|
+
if (!connector) {
|
|
26
|
+
console.warn("Unknown connector type: %s, skipping %s", safeType, safeName);
|
|
25
27
|
return;
|
|
26
28
|
}
|
|
27
|
-
const connector = factory();
|
|
28
29
|
try {
|
|
29
30
|
await connector.connect(source);
|
|
30
31
|
this.connectors.set(source.name, connector);
|
|
31
|
-
console.log(
|
|
32
|
+
console.log('Connector "%s" (%s) connected', safeName, safeType);
|
|
32
33
|
}
|
|
33
34
|
catch (err) {
|
|
34
|
-
console.error(
|
|
35
|
+
console.error('Failed to connect "%s":', safeName, err);
|
|
35
36
|
}
|
|
36
37
|
}
|
|
37
38
|
async addSource(source) {
|
|
@@ -53,11 +54,10 @@ export class ConnectorRegistry {
|
|
|
53
54
|
await this.addSource(source);
|
|
54
55
|
}
|
|
55
56
|
async testConnection(source) {
|
|
56
|
-
const
|
|
57
|
-
if (!
|
|
57
|
+
const connector = this.loader.create(source.type);
|
|
58
|
+
if (!connector) {
|
|
58
59
|
return { status: "down", latencyMs: 0, message: `Unknown type: ${source.type}` };
|
|
59
60
|
}
|
|
60
|
-
const connector = factory();
|
|
61
61
|
try {
|
|
62
62
|
await connector.connect(source);
|
|
63
63
|
const health = await connector.healthCheck();
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { describe, it, before, after } from "node:test";
|
|
2
2
|
import assert from "node:assert/strict";
|
|
3
3
|
import { Agent } from "node:https";
|
|
4
|
-
import { writeFileSync,
|
|
4
|
+
import { writeFileSync, mkdtempSync, rmSync } from "node:fs";
|
|
5
5
|
import { join } from "node:path";
|
|
6
6
|
import { tmpdir } from "node:os";
|
|
7
7
|
import { buildTlsAgent } from "./tls.js";
|
|
8
|
-
|
|
8
|
+
let TMP_DIR;
|
|
9
9
|
function makeConfig(overrides = {}) {
|
|
10
10
|
return { name: "test", type: "prometheus", url: "https://localhost:9090", enabled: true, ...overrides };
|
|
11
11
|
}
|
|
@@ -38,7 +38,7 @@ describe("buildTlsAgent", () => {
|
|
|
38
38
|
});
|
|
39
39
|
describe("with certificate files", () => {
|
|
40
40
|
before(() => {
|
|
41
|
-
|
|
41
|
+
TMP_DIR = mkdtempSync(join(tmpdir(), "tls-test-"));
|
|
42
42
|
writeFileSync(join(TMP_DIR, "ca.pem"), "-----BEGIN CERTIFICATE-----\nfake-ca\n-----END CERTIFICATE-----\n");
|
|
43
43
|
writeFileSync(join(TMP_DIR, "client.pem"), "-----BEGIN CERTIFICATE-----\nfake-client\n-----END CERTIFICATE-----\n");
|
|
44
44
|
writeFileSync(join(TMP_DIR, "client-key.pem"), "-----BEGIN PRIVATE KEY-----\nfake-key\n-----END PRIVATE KEY-----\n");
|
package/dist/index.js
CHANGED
|
@@ -6,6 +6,9 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
|
|
|
6
6
|
import { z } from "zod";
|
|
7
7
|
import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
|
|
8
8
|
import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
|
|
9
|
+
import { getPluginLoader } from "./connectors/loader.js";
|
|
10
|
+
import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
|
|
11
|
+
import { buildOpenApiSpec } from "./openapi.js";
|
|
9
12
|
import { listSourcesHandler } from "./tools/list-sources.js";
|
|
10
13
|
import { listServicesHandler } from "./tools/list-services.js";
|
|
11
14
|
import { queryMetricsHandler } from "./tools/query-metrics.js";
|
|
@@ -14,7 +17,19 @@ import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-servic
|
|
|
14
17
|
import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
|
|
15
18
|
import { fileURLToPath } from "node:url";
|
|
16
19
|
import { dirname, join } from "node:path";
|
|
20
|
+
import { readFileSync } from "node:fs";
|
|
17
21
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
// Read once at startup; the file is shipped inside the image so this
|
|
23
|
+
// is the source of truth even when the user runs from `npx`.
|
|
24
|
+
const SERVER_VERSION = (() => {
|
|
25
|
+
try {
|
|
26
|
+
const pkg = JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf8"));
|
|
27
|
+
return pkg.version ?? "unknown";
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return "unknown";
|
|
31
|
+
}
|
|
32
|
+
})();
|
|
18
33
|
function applyConfigToRuntime(config, registry) {
|
|
19
34
|
setHealthThresholds(config.healthThresholds);
|
|
20
35
|
}
|
|
@@ -54,6 +69,7 @@ function validateSourceUrl(url) {
|
|
|
54
69
|
}
|
|
55
70
|
async function main() {
|
|
56
71
|
let config = loadConfig();
|
|
72
|
+
await getPluginLoader().load();
|
|
57
73
|
const registry = new ConnectorRegistry();
|
|
58
74
|
await registry.initialize(config);
|
|
59
75
|
applyConfigToRuntime(config, registry);
|
|
@@ -64,11 +80,11 @@ async function main() {
|
|
|
64
80
|
function createMcpServer() {
|
|
65
81
|
const mcpServer = new McpServer({
|
|
66
82
|
name: "observability-mcp",
|
|
67
|
-
version:
|
|
83
|
+
version: SERVER_VERSION,
|
|
68
84
|
});
|
|
69
85
|
// --- Register tools with Zod schemas ---
|
|
70
|
-
mcpServer.tool("list_sources", "List all configured observability backends and their connection status. Use this to discover what data sources are available.", {}, async () => listSourcesHandler(registry));
|
|
71
|
-
mcpServer.tool("list_services", "List all monitored services discovered across all connected backends. Returns service names, their data sources, and signal types (metrics/logs).", { filter: z.string().optional().describe("Optional filter to match service names") }, async (args) => listServicesHandler(registry, args));
|
|
86
|
+
mcpServer.tool("list_sources", "List all configured observability backends and their connection status. Use this to discover what data sources are available.", {}, async () => withToolMetrics("list_sources", () => listSourcesHandler(registry)));
|
|
87
|
+
mcpServer.tool("list_services", "List all monitored services discovered across all connected backends. Returns service names, their data sources, and signal types (metrics/logs).", { filter: z.string().optional().describe("Optional filter to match service names") }, async (args) => withToolMetrics("list_services", () => listServicesHandler(registry, args)));
|
|
72
88
|
const metricsList = getAvailableMetricNames(registry);
|
|
73
89
|
const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
|
|
74
90
|
const uniqueNames = [...new Set(metricNames)];
|
|
@@ -78,22 +94,22 @@ async function main() {
|
|
|
78
94
|
duration: z.string().optional().describe("Time range (e.g. '5m', '1h', '24h'). Default: '5m'"),
|
|
79
95
|
source: z.string().optional().describe("Specific source name. If omitted, queries all metrics backends."),
|
|
80
96
|
groupBy: z.string().optional().describe("Label to break the result down by, e.g. 'instance', 'pod', 'node'. Returns one series per distinct value in 'groups'."),
|
|
81
|
-
}, async (args) => queryMetricsHandler(registry, args));
|
|
97
|
+
}, async (args) => withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args)));
|
|
82
98
|
mcpServer.tool("query_logs", "Query logs for a service over a given timeframe. Returns log entries with a summary including error/warning counts and top error patterns.", {
|
|
83
99
|
service: z.string().describe("Service name (e.g. 'payment-service')"),
|
|
84
100
|
query: z.string().optional().describe("Optional search query to filter log messages (regex supported)"),
|
|
85
101
|
duration: z.string().optional().describe("Time range (e.g. '5m', '1h', '24h'). Default: '5m'"),
|
|
86
102
|
level: z.string().optional().describe("Filter by log level: 'error', 'warn', 'info', 'debug'"),
|
|
87
103
|
limit: z.number().optional().describe("Maximum log entries to return. Default: 100"),
|
|
88
|
-
}, async (args) => queryLogsHandler(registry, args));
|
|
104
|
+
}, async (args) => withToolMetrics("query_logs", () => queryLogsHandler(registry, args)));
|
|
89
105
|
mcpServer.tool("get_service_health", "Get an aggregated health overview for a service combining metrics AND logs. Returns health score (0-100), status (healthy/degraded/critical), key metrics, log error summary, anomalies, and cross-signal correlations.", {
|
|
90
106
|
service: z.string().describe("Service name to check health for"),
|
|
91
|
-
}, async (args) => getServiceHealthHandler(registry, args));
|
|
107
|
+
}, async (args) => withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args)));
|
|
92
108
|
mcpServer.tool("detect_anomalies", "Scan for anomalies across all monitored services (or a specific one). Uses z-score analysis on metrics, checks log error spikes, and correlates signals. Returns anomalies with severity ratings.", {
|
|
93
109
|
service: z.string().optional().describe("Specific service to scan. If omitted, scans all."),
|
|
94
110
|
duration: z.string().optional().describe("Time range to analyze (e.g. '5m', '15m', '1h'). Default: '10m'"),
|
|
95
111
|
sensitivity: z.enum(["low", "medium", "high"]).optional().describe("Detection sensitivity: low (>3σ), medium (>2σ), high (>1.5σ). Default: 'medium'"),
|
|
96
|
-
}, async (args) => detectAnomaliesHandler(registry, args));
|
|
112
|
+
}, async (args) => withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args)));
|
|
97
113
|
return mcpServer;
|
|
98
114
|
}
|
|
99
115
|
// --- HTTP server ---
|
|
@@ -107,6 +123,45 @@ async function main() {
|
|
|
107
123
|
res.setHeader("Referrer-Policy", "strict-origin-when-cross-origin");
|
|
108
124
|
next();
|
|
109
125
|
});
|
|
126
|
+
// API request counter — emitted at response time so the `status` label
|
|
127
|
+
// is the real outcome. /metrics itself is excluded to avoid self-scrape
|
|
128
|
+
// amplification.
|
|
129
|
+
app.use((req, res, next) => {
|
|
130
|
+
if (req.path === "/metrics")
|
|
131
|
+
return next();
|
|
132
|
+
res.on("finish", () => {
|
|
133
|
+
// Group dynamic segments by the registered Express route when we
|
|
134
|
+
// have one, otherwise fall back to the literal path. This keeps
|
|
135
|
+
// label cardinality bounded.
|
|
136
|
+
const route = req.route?.path ?? req.path;
|
|
137
|
+
apiRequests.inc({ route, method: req.method, status: String(res.statusCode) });
|
|
138
|
+
});
|
|
139
|
+
next();
|
|
140
|
+
});
|
|
141
|
+
// k8s-convention liveness/readiness probes at the root of the path
|
|
142
|
+
// tree, no /api prefix. Helm chart points its probes here. Cheap
|
|
143
|
+
// enough to skip the request-counter middleware.
|
|
144
|
+
let ready = false;
|
|
145
|
+
app.get("/healthz", (_req, res) => res.type("text").send("ok"));
|
|
146
|
+
app.get("/readyz", (_req, res) => {
|
|
147
|
+
if (ready)
|
|
148
|
+
return res.type("text").send("ok");
|
|
149
|
+
return res.status(503).type("text").send("starting");
|
|
150
|
+
});
|
|
151
|
+
// OpenAPI 3.1 document for the /api/* surface.
|
|
152
|
+
app.get("/api/openapi.json", (_req, res) => {
|
|
153
|
+
res.json(buildOpenApiSpec(SERVER_VERSION));
|
|
154
|
+
});
|
|
155
|
+
// Self-monitoring — Prometheus scrape endpoint.
|
|
156
|
+
// Disabled with METRICS_ENABLED=false for environments that prefer
|
|
157
|
+
// sidecar agents. The Helm chart's ServiceMonitor template targets
|
|
158
|
+
// this endpoint when enabled.
|
|
159
|
+
if (process.env.METRICS_ENABLED !== "false") {
|
|
160
|
+
app.get("/metrics", async (_req, res) => {
|
|
161
|
+
res.set("Content-Type", selfRegistry.contentType);
|
|
162
|
+
res.end(await selfRegistry.metrics());
|
|
163
|
+
});
|
|
164
|
+
}
|
|
110
165
|
// Serve Web UI
|
|
111
166
|
app.use(express.static(join(__dirname, "ui")));
|
|
112
167
|
// --- API endpoints for Web UI ---
|
|
@@ -135,6 +190,31 @@ async function main() {
|
|
|
135
190
|
app.get("/api/source-types", (_req, res) => {
|
|
136
191
|
res.json(getSupportedTypes());
|
|
137
192
|
});
|
|
193
|
+
// Server info — version, loaded plugins, MCP protocol version, build metadata.
|
|
194
|
+
// Used by the Web UI footer and by operators to confirm what's deployed.
|
|
195
|
+
app.get("/api/info", async (_req, res) => {
|
|
196
|
+
const loader = getPluginLoader();
|
|
197
|
+
res.json({
|
|
198
|
+
name: "observability-mcp",
|
|
199
|
+
version: SERVER_VERSION,
|
|
200
|
+
mcpProtocolVersion: "2025-03-26",
|
|
201
|
+
build: {
|
|
202
|
+
commit: process.env.GIT_COMMIT || null,
|
|
203
|
+
date: process.env.BUILD_DATE || null,
|
|
204
|
+
},
|
|
205
|
+
runtime: {
|
|
206
|
+
node: process.version,
|
|
207
|
+
platform: process.platform,
|
|
208
|
+
arch: process.arch,
|
|
209
|
+
},
|
|
210
|
+
plugins: loader.list().map((p) => ({
|
|
211
|
+
name: p.name,
|
|
212
|
+
source: p.source,
|
|
213
|
+
version: p.manifest?.version ?? null,
|
|
214
|
+
signalTypes: p.manifest?.signalTypes ?? null,
|
|
215
|
+
})),
|
|
216
|
+
});
|
|
217
|
+
});
|
|
138
218
|
// Add a new source
|
|
139
219
|
app.post("/api/sources", async (req, res) => {
|
|
140
220
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
@@ -366,6 +446,7 @@ async function main() {
|
|
|
366
446
|
console.log(`Session ${sid} expired (idle)`);
|
|
367
447
|
}
|
|
368
448
|
}
|
|
449
|
+
mcpActiveSessions.set(transports.size);
|
|
369
450
|
}, 5 * 60 * 1000);
|
|
370
451
|
app.post("/mcp", async (req, res) => {
|
|
371
452
|
const sessionId = req.headers["mcp-session-id"];
|
|
@@ -385,6 +466,7 @@ async function main() {
|
|
|
385
466
|
break;
|
|
386
467
|
}
|
|
387
468
|
}
|
|
469
|
+
mcpActiveSessions.set(transports.size);
|
|
388
470
|
};
|
|
389
471
|
const sessionMcpServer = createMcpServer();
|
|
390
472
|
await sessionMcpServer.connect(transport);
|
|
@@ -397,6 +479,7 @@ async function main() {
|
|
|
397
479
|
transports.set(sid, transport);
|
|
398
480
|
sessionLastActive.set(sid, Date.now());
|
|
399
481
|
}
|
|
482
|
+
mcpActiveSessions.set(transports.size);
|
|
400
483
|
});
|
|
401
484
|
app.get("/mcp", async (req, res) => {
|
|
402
485
|
const sessionId = req.headers["mcp-session-id"];
|
|
@@ -421,6 +504,7 @@ async function main() {
|
|
|
421
504
|
});
|
|
422
505
|
const PORT = parseInt(process.env.PORT || "3000");
|
|
423
506
|
app.listen(PORT, () => {
|
|
507
|
+
ready = true;
|
|
424
508
|
console.log(`observability-mcp server running on port ${PORT}`);
|
|
425
509
|
console.log(` MCP endpoint: http://localhost:${PORT}/mcp`);
|
|
426
510
|
console.log(` Web UI: http://localhost:${PORT}`);
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ObservabilityConnector } from "../connectors/interface.js";
|
|
2
|
+
/**
|
|
3
|
+
* Decorate a connector so every observable backend call increments
|
|
4
|
+
* obsmcp_connector_calls_total{source,type,operation,outcome}. The
|
|
5
|
+
* `source` label is filled in on first `connect()` once the config
|
|
6
|
+
* is known. Keeps connector implementations free of metrics code.
|
|
7
|
+
*/
|
|
8
|
+
export declare function instrumentConnector<T extends ObservabilityConnector>(c: T): T;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { connectorCalls } from "./self.js";
|
|
2
|
+
const OPS = [
|
|
3
|
+
"healthCheck",
|
|
4
|
+
"listServices",
|
|
5
|
+
"queryMetrics",
|
|
6
|
+
"queryLogs",
|
|
7
|
+
"listAvailableMetrics",
|
|
8
|
+
];
|
|
9
|
+
/**
|
|
10
|
+
* Decorate a connector so every observable backend call increments
|
|
11
|
+
* obsmcp_connector_calls_total{source,type,operation,outcome}. The
|
|
12
|
+
* `source` label is filled in on first `connect()` once the config
|
|
13
|
+
* is known. Keeps connector implementations free of metrics code.
|
|
14
|
+
*/
|
|
15
|
+
export function instrumentConnector(c) {
|
|
16
|
+
let source = "";
|
|
17
|
+
const type = c.type;
|
|
18
|
+
const wrappedConnect = c.connect.bind(c);
|
|
19
|
+
c.connect = async (config) => {
|
|
20
|
+
source = config.name;
|
|
21
|
+
return wrappedConnect(config);
|
|
22
|
+
};
|
|
23
|
+
for (const op of OPS) {
|
|
24
|
+
const fn = c[op];
|
|
25
|
+
if (typeof fn !== "function")
|
|
26
|
+
continue;
|
|
27
|
+
const bound = fn.bind(c);
|
|
28
|
+
c[op] = async (...args) => {
|
|
29
|
+
try {
|
|
30
|
+
const r = await bound(...args);
|
|
31
|
+
connectorCalls.inc({ source: source || "<pending>", type, operation: op, outcome: "ok" });
|
|
32
|
+
return r;
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
connectorCalls.inc({ source: source || "<pending>", type, operation: op, outcome: "error" });
|
|
36
|
+
throw err;
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
return c;
|
|
41
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Registry, Counter, Histogram, Gauge } from "prom-client";
|
|
2
|
+
export declare const selfRegistry: Registry<"text/plain; version=0.0.4; charset=utf-8">;
|
|
3
|
+
export declare const mcpToolCalls: Counter<"tool" | "outcome">;
|
|
4
|
+
export declare const mcpToolLatency: Histogram<"tool">;
|
|
5
|
+
export declare const connectorCalls: Counter<"type" | "source" | "outcome" | "operation">;
|
|
6
|
+
export declare const apiRequests: Counter<"status" | "route" | "method">;
|
|
7
|
+
export declare const mcpActiveSessions: Gauge<string>;
|
|
8
|
+
/**
|
|
9
|
+
* Wrap a (potentially async) tool handler to record call count + latency.
|
|
10
|
+
* Outcome is "ok" or "error" — never throws on its own.
|
|
11
|
+
*/
|
|
12
|
+
export declare function withToolMetrics<T>(tool: string, fn: () => Promise<T>): Promise<T>;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Server self-metrics exposed at /metrics for Prometheus scraping.
|
|
2
|
+
// Pairs with the Helm chart's ServiceMonitor template.
|
|
3
|
+
//
|
|
4
|
+
// Default Node metrics (CPU, memory, event loop lag, heap) come from
|
|
5
|
+
// prom-client's collectDefaultMetrics. On top of that we ship four
|
|
6
|
+
// product-specific counters/histograms that operators actually need
|
|
7
|
+
// to graph: MCP tool calls, connector backend calls, /api/* requests,
|
|
8
|
+
// active session count.
|
|
9
|
+
import { Registry, collectDefaultMetrics, Counter, Histogram, Gauge, } from "prom-client";
|
|
10
|
+
export const selfRegistry = new Registry();
|
|
11
|
+
selfRegistry.setDefaultLabels({ service: "observability-mcp" });
|
|
12
|
+
collectDefaultMetrics({ register: selfRegistry, prefix: "obsmcp_" });
|
|
13
|
+
export const mcpToolCalls = new Counter({
|
|
14
|
+
name: "obsmcp_mcp_tool_calls_total",
|
|
15
|
+
help: "MCP tool invocations by tool and outcome.",
|
|
16
|
+
labelNames: ["tool", "outcome"],
|
|
17
|
+
registers: [selfRegistry],
|
|
18
|
+
});
|
|
19
|
+
export const mcpToolLatency = new Histogram({
|
|
20
|
+
name: "obsmcp_mcp_tool_duration_seconds",
|
|
21
|
+
help: "MCP tool invocation latency.",
|
|
22
|
+
labelNames: ["tool"],
|
|
23
|
+
buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
|
|
24
|
+
registers: [selfRegistry],
|
|
25
|
+
});
|
|
26
|
+
export const connectorCalls = new Counter({
|
|
27
|
+
name: "obsmcp_connector_calls_total",
|
|
28
|
+
help: "Calls to a configured connector, by source and outcome.",
|
|
29
|
+
labelNames: ["source", "type", "operation", "outcome"],
|
|
30
|
+
registers: [selfRegistry],
|
|
31
|
+
});
|
|
32
|
+
export const apiRequests = new Counter({
|
|
33
|
+
name: "obsmcp_api_requests_total",
|
|
34
|
+
help: "Web UI / API request count, by route and status.",
|
|
35
|
+
labelNames: ["route", "method", "status"],
|
|
36
|
+
registers: [selfRegistry],
|
|
37
|
+
});
|
|
38
|
+
export const mcpActiveSessions = new Gauge({
|
|
39
|
+
name: "obsmcp_mcp_active_sessions",
|
|
40
|
+
help: "Active MCP Streamable HTTP sessions.",
|
|
41
|
+
registers: [selfRegistry],
|
|
42
|
+
});
|
|
43
|
+
/**
|
|
44
|
+
* Wrap a (potentially async) tool handler to record call count + latency.
|
|
45
|
+
* Outcome is "ok" or "error" — never throws on its own.
|
|
46
|
+
*/
|
|
47
|
+
export async function withToolMetrics(tool, fn) {
|
|
48
|
+
const end = mcpToolLatency.startTimer({ tool });
|
|
49
|
+
try {
|
|
50
|
+
const r = await fn();
|
|
51
|
+
mcpToolCalls.inc({ tool, outcome: "ok" });
|
|
52
|
+
return r;
|
|
53
|
+
}
|
|
54
|
+
catch (err) {
|
|
55
|
+
mcpToolCalls.inc({ tool, outcome: "error" });
|
|
56
|
+
throw err;
|
|
57
|
+
}
|
|
58
|
+
finally {
|
|
59
|
+
end();
|
|
60
|
+
}
|
|
61
|
+
}
|