@thotischner/observability-mcp 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/anomaly.d.ts +89 -0
- package/dist/analysis/anomaly.js +235 -0
- package/dist/analysis/anomaly.test.js +149 -1
- package/dist/analysis/backtest.d.ts +31 -0
- package/dist/analysis/backtest.js +206 -0
- package/dist/analysis/backtest.test.d.ts +1 -0
- package/dist/analysis/backtest.test.js +34 -0
- package/dist/analysis/correlator.d.ts +35 -0
- package/dist/analysis/correlator.js +95 -0
- package/dist/analysis/correlator.test.js +60 -1
- package/dist/analysis/health.d.ts +2 -3
- package/dist/analysis/index.d.ts +32 -0
- package/dist/analysis/index.js +29 -0
- package/dist/analysis/library.test.d.ts +1 -0
- package/dist/analysis/library.test.js +44 -0
- package/dist/auth/credentials.d.ts +29 -0
- package/dist/auth/credentials.js +76 -0
- package/dist/auth/credentials.test.d.ts +1 -0
- package/dist/auth/credentials.test.js +57 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +370 -0
- package/dist/cli/lib.d.ts +95 -0
- package/dist/cli/lib.js +185 -0
- package/dist/cli/lib.test.d.ts +1 -0
- package/dist/cli/lib.test.js +134 -0
- package/dist/connectors/hub.d.ts +48 -0
- package/dist/connectors/hub.js +51 -0
- package/dist/connectors/hub.test.d.ts +1 -0
- package/dist/connectors/hub.test.js +52 -0
- package/dist/connectors/install.d.ts +24 -0
- package/dist/connectors/install.js +100 -0
- package/dist/connectors/install.test.d.ts +1 -0
- package/dist/connectors/install.test.js +58 -0
- package/dist/connectors/loader.d.ts +5 -0
- package/dist/connectors/loader.js +54 -2
- package/dist/connectors/loki.js +11 -4
- package/dist/connectors/loki.test.js +27 -0
- package/dist/connectors/verify.d.ts +19 -0
- package/dist/connectors/verify.js +87 -0
- package/dist/connectors/verify.test.d.ts +1 -0
- package/dist/connectors/verify.test.js +63 -0
- package/dist/context.d.ts +27 -0
- package/dist/context.js +18 -0
- package/dist/index.js +322 -34
- package/dist/net/egress-policy.d.ts +31 -0
- package/dist/net/egress-policy.js +37 -0
- package/dist/net/egress-policy.test.d.ts +1 -0
- package/dist/net/egress-policy.test.js +52 -0
- package/dist/sdk/index.d.ts +6 -0
- package/dist/sdk/manifest-schema.d.ts +1 -0
- package/dist/sdk/manifest-schema.js +11 -0
- package/dist/tools/context-seam.test.d.ts +1 -0
- package/dist/tools/context-seam.test.js +23 -0
- package/dist/tools/detect-anomalies.d.ts +2 -1
- package/dist/tools/detect-anomalies.js +47 -11
- package/dist/tools/get-service-health.d.ts +2 -1
- package/dist/tools/get-service-health.js +2 -1
- package/dist/tools/handlers.test.js +73 -0
- package/dist/tools/list-services.d.ts +2 -1
- package/dist/tools/list-services.js +2 -1
- package/dist/tools/list-sources.d.ts +2 -1
- package/dist/tools/list-sources.js +2 -1
- package/dist/tools/query-logs.d.ts +2 -1
- package/dist/tools/query-logs.js +2 -1
- package/dist/tools/query-metrics.d.ts +2 -1
- package/dist/tools/query-metrics.js +9 -1
- package/dist/ui/index.html +119 -4
- package/package.json +18 -5
package/dist/index.js
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import express from "express";
|
|
3
|
+
import rateLimit from "express-rate-limit";
|
|
3
4
|
import { randomUUID } from "node:crypto";
|
|
4
5
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
5
6
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
7
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
8
|
import { z } from "zod";
|
|
7
9
|
import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
|
|
8
10
|
import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
|
|
11
|
+
import { defaultContext, principalContext } from "./context.js";
|
|
12
|
+
import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
|
|
9
13
|
import { getPluginLoader } from "./connectors/loader.js";
|
|
14
|
+
import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
|
|
15
|
+
import { isValidConnectorName, installTarball } from "./connectors/install.js";
|
|
16
|
+
import { PluginVerificationError } from "./connectors/verify.js";
|
|
10
17
|
import { selfRegistry, withToolMetrics, apiRequests, mcpActiveSessions } from "./metrics/self.js";
|
|
11
18
|
import { buildOpenApiSpec } from "./openapi.js";
|
|
12
19
|
import { listSourcesHandler } from "./tools/list-sources.js";
|
|
@@ -17,7 +24,8 @@ import { getServiceHealthHandler, setHealthThresholds } from "./tools/get-servic
|
|
|
17
24
|
import { detectAnomaliesHandler } from "./tools/detect-anomalies.js";
|
|
18
25
|
import { fileURLToPath } from "node:url";
|
|
19
26
|
import { dirname, join } from "node:path";
|
|
20
|
-
import { readFileSync } from "node:fs";
|
|
27
|
+
import { readFileSync, writeFileSync, mkdtempSync, rmSync } from "node:fs";
|
|
28
|
+
import { tmpdir } from "node:os";
|
|
21
29
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
30
|
// Read once at startup; the file is shipped inside the image so this
|
|
23
31
|
// is the source of truth even when the user runs from `npx`.
|
|
@@ -67,7 +75,32 @@ function validateSourceUrl(url) {
|
|
|
67
75
|
return `Invalid URL: "${url}"`;
|
|
68
76
|
}
|
|
69
77
|
}
|
|
78
|
+
// Hard cap for a downloaded/uploaded connector tarball (defence against
|
|
79
|
+
// a hostile or accidental huge artifact OOM-ing the server).
|
|
80
|
+
const MAX_CONNECTOR_TGZ_BYTES = 64 * 1024 * 1024;
|
|
81
|
+
// Per-client rate limiter for the expensive runtime routes (connector
|
|
82
|
+
// install/upload: fetch + extract + verify + fs write + loader rescan;
|
|
83
|
+
// add/test source: outbound backend connect). Uses express-rate-limit
|
|
84
|
+
// so the control is explicit and well-tested. Bounds abuse even with
|
|
85
|
+
// ENABLE_UI_INSTALL on.
|
|
86
|
+
const installRateLimit = rateLimit({
|
|
87
|
+
windowMs: 60_000,
|
|
88
|
+
limit: 5,
|
|
89
|
+
standardHeaders: true,
|
|
90
|
+
legacyHeaders: false,
|
|
91
|
+
message: { error: "rate limit exceeded — too many attempts, slow down" },
|
|
92
|
+
});
|
|
70
93
|
async function main() {
|
|
94
|
+
// Stdio transport mode (MCP catalogs / desktop clients / Glama's
|
|
95
|
+
// mcp-proxy spawn a stdio MCP server and read JSON-RPC from stdout).
|
|
96
|
+
// The protocol stream MUST be the only thing on stdout, so route all
|
|
97
|
+
// console.log to stderr before anything logs.
|
|
98
|
+
const STDIO = process.argv.includes("--stdio") ||
|
|
99
|
+
process.env.MCP_TRANSPORT === "stdio" ||
|
|
100
|
+
!!process.env.MCP_STDIO;
|
|
101
|
+
if (STDIO) {
|
|
102
|
+
console.log = (...a) => console.error(...a);
|
|
103
|
+
}
|
|
71
104
|
let config = loadConfig();
|
|
72
105
|
await getPluginLoader().load();
|
|
73
106
|
const registry = new ConnectorRegistry();
|
|
@@ -77,50 +110,133 @@ async function main() {
|
|
|
77
110
|
// so we cannot share a single McpServer across HTTP sessions. Each new
|
|
78
111
|
// session needs its own server. The factory captures the live registry
|
|
79
112
|
// by reference so tool handlers always see the current configuration.
|
|
80
|
-
function createMcpServer() {
|
|
113
|
+
function createMcpServer(ctx) {
|
|
81
114
|
const mcpServer = new McpServer({
|
|
82
115
|
name: "observability-mcp",
|
|
83
116
|
version: SERVER_VERSION,
|
|
84
117
|
});
|
|
85
118
|
// --- Register tools with Zod schemas ---
|
|
86
|
-
mcpServer.tool("list_sources",
|
|
87
|
-
|
|
119
|
+
mcpServer.tool("list_sources", [
|
|
120
|
+
"List the configured observability backends (Prometheus, Loki, and any connector) and whether each is currently reachable.",
|
|
121
|
+
"When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
|
|
122
|
+
"Behavior: read-only, no side effects. Returns one entry per source with its name, type, configured URL, signal types (metrics/logs), and a live up/down status. Never throws for an unreachable backend — the backend is reported as down instead.",
|
|
123
|
+
"Related: use `list_services` to see what is monitored within these sources.",
|
|
124
|
+
].join(" "), {}, async () => withToolMetrics("list_sources", () => listSourcesHandler(registry, ctx)));
|
|
125
|
+
mcpServer.tool("list_services", [
|
|
126
|
+
"Discover the service names that can be queried, aggregated across every connected backend.",
|
|
127
|
+
"When to use: call this before `query_metrics`, `query_logs`, or `get_service_health` to obtain the exact, case-sensitive service name those tools require.",
|
|
128
|
+
"Behavior: read-only, no side effects. Returns one entry per service with the service name, the source(s) it was discovered in, and which signals are available for it (metrics, logs, or both).",
|
|
129
|
+
"Related: `list_sources` for backend health; `get_service_health` for a per-service overview.",
|
|
130
|
+
].join(" "), {
|
|
131
|
+
filter: z
|
|
132
|
+
.string()
|
|
133
|
+
.optional()
|
|
134
|
+
.describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
|
|
135
|
+
}, async (args) => withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx)));
|
|
88
136
|
const metricsList = getAvailableMetricNames(registry);
|
|
89
137
|
const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
|
|
90
138
|
const uniqueNames = [...new Set(metricNames)];
|
|
91
|
-
mcpServer.tool("query_metrics",
|
|
92
|
-
service
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
139
|
+
mcpServer.tool("query_metrics", [
|
|
140
|
+
"Fetch the raw time-series for ONE metric of ONE service over a look-back window, returned together with pre-computed summary statistics.",
|
|
141
|
+
"When to use: when you need the actual numeric values or the trend of a known metric. For a 'is this service OK?' verdict use `get_service_health`; to find which services are misbehaving use `detect_anomalies`.",
|
|
142
|
+
"Prerequisites: get the exact service name from `list_services` and choose a metric from the list at the end of this description.",
|
|
143
|
+
"Behavior: read-only, no side effects. Returns an ordered array of {timestamp, value} points plus a summary {current, average, min, max, trend}. With `groupBy` set, returns one labelled series per distinct label value under `groups` instead of a single aggregated series. Units depend on the metric (e.g. CPU as %, latency as ms, rates as per-second). An unknown service/metric or an unreachable backend yields a structured explanatory error, never an exception.",
|
|
144
|
+
`Available metrics: ${metricsList}`,
|
|
145
|
+
].join(" "), {
|
|
146
|
+
service: z
|
|
147
|
+
.string()
|
|
148
|
+
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'api-gateway', 'payment-service')."),
|
|
149
|
+
metric: z
|
|
150
|
+
.string()
|
|
151
|
+
.describe(`Required. Exact metric name to query. One of: ${uniqueNames.join(", ")}.`),
|
|
152
|
+
duration: z
|
|
153
|
+
.string()
|
|
154
|
+
.optional()
|
|
155
|
+
.describe("Optional. Look-back window ending at 'now', written as <number><unit> with unit s|m|h|d (e.g. '5m', '90m', '1h', '24h'). Default: '5m'."),
|
|
156
|
+
source: z
|
|
157
|
+
.string()
|
|
158
|
+
.optional()
|
|
159
|
+
.describe("Optional. Restrict the query to a single backend by its source name (see `list_sources`). Default: query and merge all metrics backends."),
|
|
160
|
+
groupBy: z
|
|
161
|
+
.string()
|
|
162
|
+
.optional()
|
|
163
|
+
.describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
|
|
164
|
+
}, async (args) => withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx)));
|
|
165
|
+
mcpServer.tool("query_logs", [
|
|
166
|
+
"Fetch recent log entries for ONE service over a look-back window, with a pre-computed summary (error/warning counts and the most frequent error patterns).",
|
|
167
|
+
"When to use: to inspect what a service actually logged, or to investigate an error spike surfaced by `detect_anomalies` / `get_service_health`. For numeric metrics use `query_metrics` instead.",
|
|
168
|
+
"Prerequisites: get the exact service name from `list_services` (the service must expose a logs signal).",
|
|
169
|
+
"Behavior: read-only, no side effects. Returns the matching log entries (newest first, capped by `limit`) plus a summary with total/error/warn counts and top recurring error patterns. No matches yields an empty result with a zeroed summary; an unreachable backend yields a structured explanatory error, never an exception.",
|
|
170
|
+
].join(" "), {
|
|
171
|
+
service: z
|
|
172
|
+
.string()
|
|
173
|
+
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
|
|
174
|
+
query: z
|
|
175
|
+
.string()
|
|
176
|
+
.optional()
|
|
177
|
+
.describe("Optional. Filter expression matched against the log message; regular expressions are supported. Omit to return all entries in the window."),
|
|
178
|
+
duration: z
|
|
179
|
+
.string()
|
|
180
|
+
.optional()
|
|
181
|
+
.describe("Optional. Look-back window ending at 'now', written as <number><unit> with unit s|m|h|d (e.g. '5m', '1h', '24h'). Default: '5m'."),
|
|
182
|
+
level: z
|
|
183
|
+
.enum(["error", "warn", "info", "debug"])
|
|
184
|
+
.optional()
|
|
185
|
+
.describe("Optional. Return only entries at this severity. Default: all levels."),
|
|
186
|
+
limit: z
|
|
187
|
+
.number()
|
|
188
|
+
.int()
|
|
189
|
+
.positive()
|
|
190
|
+
.optional()
|
|
191
|
+
.describe("Optional. Maximum number of log entries to return (most recent first). Default: 100."),
|
|
192
|
+
}, async (args) => withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx)));
|
|
193
|
+
mcpServer.tool("get_service_health", [
|
|
194
|
+
"Produce a single aggregated health verdict for ONE service by combining its metrics and logs.",
|
|
195
|
+
"When to use: the fastest way to answer 'is this service healthy right now and why?'. Use `query_metrics`/`query_logs` to drill into the underlying numbers, or `detect_anomalies` to scan many services at once.",
|
|
196
|
+
"Prerequisites: get the exact service name from `list_services`.",
|
|
197
|
+
"Behavior: read-only, no side effects. Returns a weighted health score (0–100), a status of healthy | degraded | critical, the key contributing metrics, a log error summary, detected anomalies, and cross-signal correlations explaining the score. A service with no data yields an explanatory result rather than an exception.",
|
|
198
|
+
].join(" "), {
|
|
199
|
+
service: z
|
|
200
|
+
.string()
|
|
201
|
+
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
|
|
202
|
+
}, async (args) => withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx)));
|
|
203
|
+
mcpServer.tool("detect_anomalies", [
|
|
204
|
+
"Scan one or all monitored services for abnormal behavior and return the findings ranked by severity.",
|
|
205
|
+
"When to use: the entry point for 'is anything wrong anywhere?' triage. Once a service is flagged, follow up with `get_service_health` for the verdict or `query_metrics`/`query_logs` for the raw evidence.",
|
|
206
|
+
"Behavior: read-only, no side effects. Applies z-score analysis to metrics, detects log error-rate spikes, and correlates the two. Returns a list of anomalies, each with the affected service, metric/signal, severity, the deviation (e.g. σ and % change), and a short explanation. No anomalies yields an empty list, not an error.",
|
|
207
|
+
"Related: `get_service_health` (single-service verdict), `query_metrics` (raw series behind a flagged metric).",
|
|
208
|
+
].join(" "), {
|
|
209
|
+
service: z
|
|
210
|
+
.string()
|
|
211
|
+
.optional()
|
|
212
|
+
.describe("Optional. Restrict the scan to one service (exact, case-sensitive name from `list_services`). Default: scan every monitored service."),
|
|
213
|
+
duration: z
|
|
214
|
+
.string()
|
|
215
|
+
.optional()
|
|
216
|
+
.describe("Optional. Look-back window analyzed for anomalies, written as <number><unit> with unit s|m|h|d (e.g. '5m', '15m', '1h'). Default: '10m'."),
|
|
217
|
+
sensitivity: z
|
|
218
|
+
.enum(["low", "medium", "high"])
|
|
219
|
+
.optional()
|
|
220
|
+
.describe("Optional. Detection threshold: 'low' flags only strong deviations (>3σ), 'medium' is balanced (>2σ), 'high' is most sensitive and noisier (>1.5σ). Default: 'medium'."),
|
|
221
|
+
}, async (args) => withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx)));
|
|
113
222
|
return mcpServer;
|
|
114
223
|
}
|
|
115
224
|
// --- HTTP server ---
|
|
116
225
|
const app = express();
|
|
117
226
|
app.use(express.json({ limit: "1mb" }));
|
|
118
227
|
// Security headers
|
|
119
|
-
app.use((
|
|
228
|
+
app.use((req, res, next) => {
|
|
120
229
|
res.setHeader("X-Content-Type-Options", "nosniff");
|
|
121
230
|
res.setHeader("X-Frame-Options", "DENY");
|
|
122
231
|
res.setHeader("X-XSS-Protection", "1; mode=block");
|
|
123
232
|
res.setHeader("Referrer-Policy", "strict-origin-when-cross-origin");
|
|
233
|
+
// Dynamic API responses must never be served from the browser/proxy
|
|
234
|
+
// cache: after a mutation (e.g. installing a connector) the UI
|
|
235
|
+
// re-fetches these GETs immediately, and a heuristically-cached stale
|
|
236
|
+
// body would make the change "not show up until a page reload".
|
|
237
|
+
if (req.path.startsWith("/api/")) {
|
|
238
|
+
res.setHeader("Cache-Control", "no-store");
|
|
239
|
+
}
|
|
124
240
|
next();
|
|
125
241
|
});
|
|
126
242
|
// API request counter — emitted at response time so the `status` label
|
|
@@ -215,8 +331,147 @@ async function main() {
|
|
|
215
331
|
})),
|
|
216
332
|
});
|
|
217
333
|
});
|
|
334
|
+
// Connectors currently loaded into this server (builtin + filesystem
|
|
335
|
+
// plugins), with manifest metadata — drives the UI "Connectors" page.
|
|
336
|
+
app.get("/api/connectors", (_req, res) => {
|
|
337
|
+
res.json({ connectors: describeInstalled(getPluginLoader().list()) });
|
|
338
|
+
});
|
|
339
|
+
// Server-side proxy of the connector hub catalog (so the browser
|
|
340
|
+
// needn't reach the hub directly — works behind a proxy / against a
|
|
341
|
+
// mirror via HUB_CATALOG_URL). Installed status merged in.
|
|
342
|
+
app.get("/api/hub/catalog", async (_req, res) => {
|
|
343
|
+
const url = resolveHubCatalogUrl();
|
|
344
|
+
try {
|
|
345
|
+
const catalog = await fetchHubCatalog(url);
|
|
346
|
+
res.json({
|
|
347
|
+
url,
|
|
348
|
+
connectors: mergeCatalog(catalog, describeInstalled(getPluginLoader().list())),
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
catch (e) {
|
|
352
|
+
res.status(502).json({ url, error: e instanceof Error ? e.message : String(e), connectors: [] });
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
// Install a connector from the hub into the running server.
|
|
356
|
+
//
|
|
357
|
+
// Runtime code-load is powerful, so this is doubly gated:
|
|
358
|
+
// 1. ENABLE_UI_INSTALL=true must be set (default OFF).
|
|
359
|
+
// 2. PLUGIN_TRUST_ROOT must be configured — install is ALWAYS
|
|
360
|
+
// fail-closed verified (no insecure bypass over HTTP).
|
|
361
|
+
// Only catalog tarballUrls are fetched (no arbitrary URL in the body)
|
|
362
|
+
// to avoid SSRF. The connector persists to PLUGINS_DIR (back it with
|
|
363
|
+
// a PVC on k8s so it survives restarts).
|
|
364
|
+
app.post("/api/connectors/install", installRateLimit, async (req, res) => {
|
|
365
|
+
if (process.env.ENABLE_UI_INSTALL !== "true") {
|
|
366
|
+
return res.status(403).json({
|
|
367
|
+
error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
const trustRootPath = process.env.PLUGIN_TRUST_ROOT;
|
|
371
|
+
if (!trustRootPath) {
|
|
372
|
+
return res.status(412).json({
|
|
373
|
+
error: "PLUGIN_TRUST_ROOT not configured — refusing to install unverified code.",
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
const name = (req.body || {}).name;
|
|
377
|
+
const version = (req.body || {}).version;
|
|
378
|
+
if (!isValidConnectorName(name)) {
|
|
379
|
+
return res.status(400).json({ error: "invalid connector name" });
|
|
380
|
+
}
|
|
381
|
+
const pluginsDir = process.env.PLUGINS_DIR ?? "/app/plugins";
|
|
382
|
+
let work = null;
|
|
383
|
+
try {
|
|
384
|
+
const catalog = await fetchHubCatalog(resolveHubCatalogUrl());
|
|
385
|
+
const entry = catalog.connectors.find((c) => c.name === name);
|
|
386
|
+
if (!entry)
|
|
387
|
+
return res.status(404).json({ error: `'${name}' is not in the catalog` });
|
|
388
|
+
if (entry.builtin)
|
|
389
|
+
return res.status(409).json({ error: `'${name}' is builtin — no install needed` });
|
|
390
|
+
const v = version
|
|
391
|
+
? entry.versions.find((x) => x.version === version)
|
|
392
|
+
: entry.versions.find((x) => x.version === (entry.latest ?? entry.versions[0]?.version)) ?? entry.versions[0];
|
|
393
|
+
if (!v || !v.tarballUrl) {
|
|
394
|
+
return res.status(422).json({ error: `no tarball for ${name}@${version ?? "latest"}` });
|
|
395
|
+
}
|
|
396
|
+
const resp = await fetch(v.tarballUrl);
|
|
397
|
+
if (!resp.ok)
|
|
398
|
+
return res.status(502).json({ error: `tarball download HTTP ${resp.status}` });
|
|
399
|
+
const declared = Number(resp.headers.get("content-length") || 0);
|
|
400
|
+
if (declared > MAX_CONNECTOR_TGZ_BYTES) {
|
|
401
|
+
return res.status(413).json({ error: `tarball too large (${declared} bytes)` });
|
|
402
|
+
}
|
|
403
|
+
const buf = Buffer.from(await resp.arrayBuffer());
|
|
404
|
+
if (buf.length > MAX_CONNECTOR_TGZ_BYTES) {
|
|
405
|
+
return res.status(413).json({ error: `tarball too large (${buf.length} bytes)` });
|
|
406
|
+
}
|
|
407
|
+
work = mkdtempSync(join(tmpdir(), "obsmcp-dl-"));
|
|
408
|
+
const tgz = join(work, "c.tgz");
|
|
409
|
+
writeFileSync(tgz, buf);
|
|
410
|
+
const result = installTarball({ tgzPath: tgz, pluginsDir, trustRootPath, expectedName: name });
|
|
411
|
+
await getPluginLoader().load(); // re-scan so /api/connectors reflects it
|
|
412
|
+
res.json({
|
|
413
|
+
ok: true,
|
|
414
|
+
...result,
|
|
415
|
+
note: "installed & persisted to PLUGINS_DIR. Add a source of this type to use it; a server restart is recommended for full availability in existing MCP sessions.",
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
catch (e) {
|
|
419
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
420
|
+
const code = e instanceof PluginVerificationError ? 400 : 500;
|
|
421
|
+
res.status(code).json({ error: `install failed (fail-closed): ${msg}` });
|
|
422
|
+
}
|
|
423
|
+
finally {
|
|
424
|
+
if (work)
|
|
425
|
+
rmSync(work, { recursive: true, force: true });
|
|
426
|
+
}
|
|
427
|
+
});
|
|
428
|
+
// Upload a connector bundle (.tgz) and install it into the running
|
|
429
|
+
// server. Same fail-closed guardrails as /install: the upload is
|
|
430
|
+
// ALWAYS verified against PLUGIN_TRUST_ROOT (signature + integrity),
|
|
431
|
+
// so an unsigned/tampered bundle is rejected. Body is the raw tarball
|
|
432
|
+
// bytes (application/octet-stream). Persists to PLUGINS_DIR.
|
|
433
|
+
app.post("/api/connectors/upload", installRateLimit, express.raw({ type: "application/octet-stream", limit: "50mb" }), async (req, res) => {
|
|
434
|
+
if (process.env.ENABLE_UI_INSTALL !== "true") {
|
|
435
|
+
return res.status(403).json({
|
|
436
|
+
error: "UI install is disabled. Set ENABLE_UI_INSTALL=true and PLUGIN_TRUST_ROOT to enable it.",
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
const trustRootPath = process.env.PLUGIN_TRUST_ROOT;
|
|
440
|
+
if (!trustRootPath) {
|
|
441
|
+
return res.status(412).json({
|
|
442
|
+
error: "PLUGIN_TRUST_ROOT not configured — refusing to install unverified code.",
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
const body = req.body;
|
|
446
|
+
if (!Buffer.isBuffer(body) || body.length === 0) {
|
|
447
|
+
return res.status(400).json({ error: "empty body — POST the connector .tgz as application/octet-stream" });
|
|
448
|
+
}
|
|
449
|
+
const pluginsDir = process.env.PLUGINS_DIR ?? "/app/plugins";
|
|
450
|
+
let work = null;
|
|
451
|
+
try {
|
|
452
|
+
work = mkdtempSync(join(tmpdir(), "obsmcp-up-"));
|
|
453
|
+
const tgz = join(work, "c.tgz");
|
|
454
|
+
writeFileSync(tgz, body);
|
|
455
|
+
const result = installTarball({ tgzPath: tgz, pluginsDir, trustRootPath });
|
|
456
|
+
await getPluginLoader().load(); // re-scan so /api/connectors reflects it
|
|
457
|
+
res.json({
|
|
458
|
+
ok: true,
|
|
459
|
+
...result,
|
|
460
|
+
note: "uploaded, verified & persisted to PLUGINS_DIR. Add a source of this type to use it; a server restart is recommended for full availability in existing MCP sessions.",
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
catch (e) {
|
|
464
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
465
|
+
const code = e instanceof PluginVerificationError ? 400 : 500;
|
|
466
|
+
res.status(code).json({ error: `upload install failed (fail-closed): ${msg}` });
|
|
467
|
+
}
|
|
468
|
+
finally {
|
|
469
|
+
if (work)
|
|
470
|
+
rmSync(work, { recursive: true, force: true });
|
|
471
|
+
}
|
|
472
|
+
});
|
|
218
473
|
// Add a new source
|
|
219
|
-
app.post("/api/sources", async (req, res) => {
|
|
474
|
+
app.post("/api/sources", installRateLimit, async (req, res) => {
|
|
220
475
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
221
476
|
if (!name || !type || !url) {
|
|
222
477
|
res.status(400).json({ error: "name, type, and url are required" });
|
|
@@ -279,7 +534,7 @@ async function main() {
|
|
|
279
534
|
res.json({ ok: true });
|
|
280
535
|
});
|
|
281
536
|
// Test a source connection (without saving)
|
|
282
|
-
app.post("/api/sources/test", async (req, res) => {
|
|
537
|
+
app.post("/api/sources/test", installRateLimit, async (req, res) => {
|
|
283
538
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
284
539
|
if (!type || !url) {
|
|
285
540
|
res.status(400).json({ error: "type and url are required" });
|
|
@@ -325,7 +580,7 @@ async function main() {
|
|
|
325
580
|
// List discovered services
|
|
326
581
|
app.get("/api/services", async (_req, res) => {
|
|
327
582
|
try {
|
|
328
|
-
const result = await listServicesHandler(registry, {});
|
|
583
|
+
const result = await listServicesHandler(registry, {}, defaultContext());
|
|
329
584
|
res.json(parseToolResult(result));
|
|
330
585
|
}
|
|
331
586
|
catch {
|
|
@@ -335,7 +590,7 @@ async function main() {
|
|
|
335
590
|
// Health endpoint for UI dashboard
|
|
336
591
|
app.get("/api/health/:service", async (req, res) => {
|
|
337
592
|
try {
|
|
338
|
-
const result = await getServiceHealthHandler(registry, { service: req.params.service });
|
|
593
|
+
const result = await getServiceHealthHandler(registry, { service: req.params.service }, defaultContext());
|
|
339
594
|
res.json(parseToolResult(result));
|
|
340
595
|
}
|
|
341
596
|
catch {
|
|
@@ -345,13 +600,13 @@ async function main() {
|
|
|
345
600
|
// Health for all services
|
|
346
601
|
app.get("/api/health", async (_req, res) => {
|
|
347
602
|
try {
|
|
348
|
-
const servicesResult = await listServicesHandler(registry, {});
|
|
603
|
+
const servicesResult = await listServicesHandler(registry, {}, defaultContext());
|
|
349
604
|
const parsed = parseToolResult(servicesResult);
|
|
350
605
|
const services = parsed?.services || [];
|
|
351
606
|
const health = {};
|
|
352
607
|
for (const svc of services) {
|
|
353
608
|
try {
|
|
354
|
-
const result = await getServiceHealthHandler(registry, { service: svc.name });
|
|
609
|
+
const result = await getServiceHealthHandler(registry, { service: svc.name }, defaultContext());
|
|
355
610
|
health[svc.name] = parseToolResult(result);
|
|
356
611
|
}
|
|
357
612
|
catch {
|
|
@@ -432,6 +687,16 @@ async function main() {
|
|
|
432
687
|
saveConfig(config);
|
|
433
688
|
res.json({ ok: true });
|
|
434
689
|
});
|
|
690
|
+
// Stdio transport: one server over stdin/stdout, no HTTP listener.
|
|
691
|
+
if (STDIO) {
|
|
692
|
+
const server = createMcpServer(defaultContext());
|
|
693
|
+
await server.connect(new StdioServerTransport());
|
|
694
|
+
console.error(`observability-mcp running on stdio transport · connectors: ${registry
|
|
695
|
+
.getAll()
|
|
696
|
+
.map((c) => c.name)
|
|
697
|
+
.join(", ")}`);
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
435
700
|
// MCP Streamable HTTP transport — stateful sessions
|
|
436
701
|
const transports = new Map();
|
|
437
702
|
const sessionLastActive = new Map();
|
|
@@ -448,7 +713,26 @@ async function main() {
|
|
|
448
713
|
}
|
|
449
714
|
mcpActiveSessions.set(transports.size);
|
|
450
715
|
}, 5 * 60 * 1000);
|
|
716
|
+
// Single-tenant auth gate. No credentials configured → anonymous (current
|
|
717
|
+
// behaviour, fully backward compatible). Configured → require a valid
|
|
718
|
+
// Bearer/X-API-Key on every /mcp request; resolve the principal + its
|
|
719
|
+
// coarse source allow-list into the RequestContext.
|
|
720
|
+
function gateCtx(req, res) {
|
|
721
|
+
if (!credentialsConfigured())
|
|
722
|
+
return defaultContext();
|
|
723
|
+
const cred = resolveToken(extractToken(req.headers), loadCredentials());
|
|
724
|
+
if (!cred) {
|
|
725
|
+
res
|
|
726
|
+
.status(401)
|
|
727
|
+
.json({ error: "unauthorized: valid Bearer token or X-API-Key required" });
|
|
728
|
+
return null;
|
|
729
|
+
}
|
|
730
|
+
return principalContext(cred.name, cred.allowedSources);
|
|
731
|
+
}
|
|
451
732
|
app.post("/mcp", async (req, res) => {
|
|
733
|
+
const ctx = gateCtx(req, res);
|
|
734
|
+
if (!ctx)
|
|
735
|
+
return;
|
|
452
736
|
const sessionId = req.headers["mcp-session-id"];
|
|
453
737
|
let transport;
|
|
454
738
|
if (sessionId && transports.has(sessionId)) {
|
|
@@ -468,7 +752,7 @@ async function main() {
|
|
|
468
752
|
}
|
|
469
753
|
mcpActiveSessions.set(transports.size);
|
|
470
754
|
};
|
|
471
|
-
const sessionMcpServer = createMcpServer();
|
|
755
|
+
const sessionMcpServer = createMcpServer(ctx);
|
|
472
756
|
await sessionMcpServer.connect(transport);
|
|
473
757
|
}
|
|
474
758
|
await transport.handleRequest(req, res, req.body);
|
|
@@ -482,6 +766,8 @@ async function main() {
|
|
|
482
766
|
mcpActiveSessions.set(transports.size);
|
|
483
767
|
});
|
|
484
768
|
app.get("/mcp", async (req, res) => {
|
|
769
|
+
if (!gateCtx(req, res))
|
|
770
|
+
return;
|
|
485
771
|
const sessionId = req.headers["mcp-session-id"];
|
|
486
772
|
const transport = transports.get(sessionId);
|
|
487
773
|
if (!transport) {
|
|
@@ -491,6 +777,8 @@ async function main() {
|
|
|
491
777
|
await transport.handleRequest(req, res);
|
|
492
778
|
});
|
|
493
779
|
app.delete("/mcp", async (req, res) => {
|
|
780
|
+
if (!gateCtx(req, res))
|
|
781
|
+
return;
|
|
494
782
|
const sessionId = req.headers["mcp-session-id"];
|
|
495
783
|
const transport = transports.get(sessionId);
|
|
496
784
|
if (transport) {
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifiable offline mode — egress policy.
|
|
3
|
+
*
|
|
4
|
+
* The server performs **no telemetry, analytics, phone-home, or update
|
|
5
|
+
* checks**. The only outbound network calls it ever makes are to backends
|
|
6
|
+
* the operator explicitly configures (Prometheus/Loki/... source URLs) or to
|
|
7
|
+
* an artifact URL the operator/registry explicitly asks it to install.
|
|
8
|
+
*
|
|
9
|
+
* This module is the machine-checkable statement of that guarantee:
|
|
10
|
+
* `egress-policy.test.ts` fails CI if any source file outside the allowlist
|
|
11
|
+
* introduces an outbound call — so the "no data egress" property cannot
|
|
12
|
+
* silently regress.
|
|
13
|
+
*/
|
|
14
|
+
export declare const OFFLINE_STATEMENT: string;
|
|
15
|
+
/** Regex of outbound-call shapes the guard scans for. */
|
|
16
|
+
export declare const OUTBOUND_PATTERN: RegExp;
|
|
17
|
+
/**
|
|
18
|
+
* Files/prefixes permitted to make outbound calls, each with the reason.
|
|
19
|
+
* Anything matching OUTBOUND_PATTERN outside these paths is a policy breach
|
|
20
|
+
* (e.g. a newly added analytics/telemetry module).
|
|
21
|
+
*/
|
|
22
|
+
export declare const EGRESS_ALLOWLIST: ReadonlyArray<{
|
|
23
|
+
prefix: string;
|
|
24
|
+
reason: string;
|
|
25
|
+
}>;
|
|
26
|
+
/**
|
|
27
|
+
* Hard-blocked analytics/telemetry SDKs — matches an *import/require of the
|
|
28
|
+
* package*, not the word in prose, so comments/policy text don't false-positive.
|
|
29
|
+
*/
|
|
30
|
+
export declare const FORBIDDEN_TELEMETRY: RegExp;
|
|
31
|
+
export declare function isEgressAllowed(relPath: string): boolean;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifiable offline mode — egress policy.
|
|
3
|
+
*
|
|
4
|
+
* The server performs **no telemetry, analytics, phone-home, or update
|
|
5
|
+
* checks**. The only outbound network calls it ever makes are to backends
|
|
6
|
+
* the operator explicitly configures (Prometheus/Loki/... source URLs) or to
|
|
7
|
+
* an artifact URL the operator/registry explicitly asks it to install.
|
|
8
|
+
*
|
|
9
|
+
* This module is the machine-checkable statement of that guarantee:
|
|
10
|
+
* `egress-policy.test.ts` fails CI if any source file outside the allowlist
|
|
11
|
+
* introduces an outbound call — so the "no data egress" property cannot
|
|
12
|
+
* silently regress.
|
|
13
|
+
*/
|
|
14
|
+
export const OFFLINE_STATEMENT = "observability-mcp makes no telemetry/analytics/phone-home/update calls. " +
|
|
15
|
+
"Outbound traffic goes only to operator-configured source backends and " +
|
|
16
|
+
"operator/registry-requested plugin artifacts. It runs fully air-gapped.";
|
|
17
|
+
/** Regex of outbound-call shapes the guard scans for. */
|
|
18
|
+
export const OUTBOUND_PATTERN = /\b(fetch\s*\(|https?\.request\s*\(|new\s+WebSocket\s*\(|import\s*\(\s*['"]https?:)/;
|
|
19
|
+
/**
|
|
20
|
+
* Files/prefixes permitted to make outbound calls, each with the reason.
|
|
21
|
+
* Anything matching OUTBOUND_PATTERN outside these paths is a policy breach
|
|
22
|
+
* (e.g. a newly added analytics/telemetry module).
|
|
23
|
+
*/
|
|
24
|
+
export const EGRESS_ALLOWLIST = [
|
|
25
|
+
{ prefix: "connectors/", reason: "connectors query operator-configured source backends" },
|
|
26
|
+
{ prefix: "cli/index.ts", reason: "CLI fetches a source location the operator passed explicitly" },
|
|
27
|
+
{ prefix: "index.ts", reason: "connector-hub plugin install of an operator/registry-requested tarball URL" },
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Hard-blocked analytics/telemetry SDKs — matches an *import/require of the
|
|
31
|
+
* package*, not the word in prose, so comments/policy text don't false-positive.
|
|
32
|
+
*/
|
|
33
|
+
export const FORBIDDEN_TELEMETRY = /(?:from\s*['"]|require\(\s*['"])[^'"]*(sentry|posthog|mixpanel|amplitude|@segment|datadog-rum|analytics-node|google-analytics)/i;
|
|
34
|
+
export function isEgressAllowed(relPath) {
|
|
35
|
+
const p = relPath.replace(/\\/g, "/");
|
|
36
|
+
return EGRESS_ALLOWLIST.some((a) => p === a.prefix || p.startsWith(a.prefix));
|
|
37
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { dirname, join, relative } from "node:path";
|
|
6
|
+
import { OUTBOUND_PATTERN, FORBIDDEN_TELEMETRY, isEgressAllowed, EGRESS_ALLOWLIST, } from "./egress-policy.js";
|
|
7
|
+
// Verifiable offline mode: static guard so the "no data egress" guarantee
|
|
8
|
+
// cannot silently regress. Any new outbound call outside the documented
|
|
9
|
+
// allowlist, or any analytics/telemetry SDK anywhere, fails CI here.
|
|
10
|
+
const srcRoot = join(dirname(fileURLToPath(import.meta.url)), "..");
|
|
11
|
+
function walk(dir) {
|
|
12
|
+
const out = [];
|
|
13
|
+
for (const e of readdirSync(dir)) {
|
|
14
|
+
const p = join(dir, e);
|
|
15
|
+
if (statSync(p).isDirectory())
|
|
16
|
+
out.push(...walk(p));
|
|
17
|
+
else if (e.endsWith(".ts") && !e.endsWith(".test.ts"))
|
|
18
|
+
out.push(p);
|
|
19
|
+
}
|
|
20
|
+
return out;
|
|
21
|
+
}
|
|
22
|
+
describe("verifiable offline mode — egress policy", () => {
|
|
23
|
+
const files = walk(srcRoot)
|
|
24
|
+
.map((f) => ({
|
|
25
|
+
rel: relative(srcRoot, f).replace(/\\/g, "/"),
|
|
26
|
+
src: readFileSync(f, "utf8"),
|
|
27
|
+
}))
|
|
28
|
+
// The policy module itself names these tokens by design.
|
|
29
|
+
.filter((f) => f.rel !== "net/egress-policy.ts");
|
|
30
|
+
it("scans a non-trivial number of source files", () => {
|
|
31
|
+
assert.ok(files.length > 20, `only scanned ${files.length} files`);
|
|
32
|
+
});
|
|
33
|
+
it("no outbound call outside the egress allowlist", () => {
|
|
34
|
+
const breaches = files
|
|
35
|
+
.filter((f) => OUTBOUND_PATTERN.test(f.src) && !isEgressAllowed(f.rel))
|
|
36
|
+
.map((f) => f.rel);
|
|
37
|
+
assert.deepEqual(breaches, [], `outbound calls found outside allowlist (${EGRESS_ALLOWLIST.map((a) => a.prefix).join(", ")}): ` +
|
|
38
|
+
`${breaches.join(", ")} — telemetry/phone-home is forbidden; if legitimate, extend EGRESS_ALLOWLIST with a reason`);
|
|
39
|
+
});
|
|
40
|
+
it("no analytics/telemetry SDK anywhere in source", () => {
|
|
41
|
+
const hits = files
|
|
42
|
+
.filter((f) => FORBIDDEN_TELEMETRY.test(f.src))
|
|
43
|
+
.map((f) => f.rel);
|
|
44
|
+
assert.deepEqual(hits, [], `forbidden telemetry/analytics identifiers in: ${hits.join(", ")}`);
|
|
45
|
+
});
|
|
46
|
+
it("allowlisted files are still present (allowlist not stale)", () => {
|
|
47
|
+
for (const { prefix } of EGRESS_ALLOWLIST) {
|
|
48
|
+
const covered = files.some((f) => f.rel === prefix || f.rel.startsWith(prefix));
|
|
49
|
+
assert.ok(covered, `allowlist entry "${prefix}" matches no source file — prune it`);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
});
|
package/dist/sdk/index.d.ts
CHANGED
|
@@ -34,6 +34,12 @@ export interface ConnectorManifest {
|
|
|
34
34
|
/** Semver range of mcp-server versions this connector supports. */
|
|
35
35
|
serverVersion?: string;
|
|
36
36
|
};
|
|
37
|
+
/**
|
|
38
|
+
* Subresource-integrity-style digest of the entry file
|
|
39
|
+
* ("sha256-<base64>"). Required (and signature-checked) when the
|
|
40
|
+
* server runs with VERIFY_PLUGINS=true. See docs/plugin-architecture.md.
|
|
41
|
+
*/
|
|
42
|
+
integrity?: string;
|
|
37
43
|
}
|
|
38
44
|
/**
|
|
39
45
|
* The default export shape a connector plugin module must provide.
|
|
@@ -23,5 +23,6 @@ export declare const manifestSchema: z.ZodObject<{
|
|
|
23
23
|
compat: z.ZodOptional<z.ZodObject<{
|
|
24
24
|
serverVersion: z.ZodOptional<z.ZodString>;
|
|
25
25
|
}, z.core.$strip>>;
|
|
26
|
+
integrity: z.ZodOptional<z.ZodString>;
|
|
26
27
|
}, z.core.$strip>;
|
|
27
28
|
export type ValidatedConnectorManifest = z.infer<typeof manifestSchema>;
|