@thotischner/observability-mcp 1.4.1 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/anomaly.d.ts +89 -0
- package/dist/analysis/anomaly.js +235 -0
- package/dist/analysis/anomaly.test.js +149 -1
- package/dist/analysis/backtest.d.ts +31 -0
- package/dist/analysis/backtest.js +206 -0
- package/dist/analysis/backtest.test.d.ts +1 -0
- package/dist/analysis/backtest.test.js +34 -0
- package/dist/analysis/correlator.d.ts +35 -0
- package/dist/analysis/correlator.js +95 -0
- package/dist/analysis/correlator.test.js +60 -1
- package/dist/analysis/health.d.ts +2 -3
- package/dist/analysis/index.d.ts +32 -0
- package/dist/analysis/index.js +29 -0
- package/dist/analysis/library.test.d.ts +1 -0
- package/dist/analysis/library.test.js +44 -0
- package/dist/auth/credentials.d.ts +29 -0
- package/dist/auth/credentials.js +76 -0
- package/dist/auth/credentials.test.d.ts +1 -0
- package/dist/auth/credentials.test.js +57 -0
- package/dist/context.d.ts +27 -0
- package/dist/context.js +18 -0
- package/dist/index.js +53 -44
- package/dist/net/egress-policy.d.ts +31 -0
- package/dist/net/egress-policy.js +37 -0
- package/dist/net/egress-policy.test.d.ts +1 -0
- package/dist/net/egress-policy.test.js +52 -0
- package/dist/tools/context-seam.test.d.ts +1 -0
- package/dist/tools/context-seam.test.js +23 -0
- package/dist/tools/detect-anomalies.d.ts +2 -1
- package/dist/tools/detect-anomalies.js +47 -11
- package/dist/tools/get-service-health.d.ts +2 -1
- package/dist/tools/get-service-health.js +2 -1
- package/dist/tools/handlers.test.js +73 -0
- package/dist/tools/list-services.d.ts +2 -1
- package/dist/tools/list-services.js +2 -1
- package/dist/tools/list-sources.d.ts +2 -1
- package/dist/tools/list-sources.js +2 -1
- package/dist/tools/query-logs.d.ts +2 -1
- package/dist/tools/query-logs.js +2 -1
- package/dist/tools/query-metrics.d.ts +2 -1
- package/dist/tools/query-metrics.js +9 -1
- package/package.json +10 -2
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-tenant authentication primitive (opt-in, backward compatible).
|
|
3
|
+
*
|
|
4
|
+
* If no credentials are configured the server behaves exactly as before
|
|
5
|
+
* (anonymous, all access). If `OMCP_API_KEYS` is set, the `/mcp` endpoint
|
|
6
|
+
* requires a valid `Authorization: Bearer <token>` or `X-API-Key: <token>`.
|
|
7
|
+
*
|
|
8
|
+
* Config (env, no secrets in files):
|
|
9
|
+
* OMCP_API_KEYS="ci:tok_abc,agent:tok_def" # name:token, comma-separated
|
|
10
|
+
* (a bare "tok_xyz" is allowed; name defaults to "key")
|
|
11
|
+
* OMCP_KEY_SOURCES="agent=prom-prod|loki-prod;ci=prom-staging"
|
|
12
|
+
* # optional coarse per-key source allow-list
|
|
13
|
+
*
|
|
14
|
+
* Rich role-based access control (tools/services/lookback/read-only, the
|
|
15
|
+
* full governance object) is intentionally NOT here — this is only the
|
|
16
|
+
* authentication + identity + coarse source-scoping primitive.
|
|
17
|
+
*/
|
|
18
|
+
function parseKeySources(raw) {
|
|
19
|
+
const m = new Map();
|
|
20
|
+
if (!raw)
|
|
21
|
+
return m;
|
|
22
|
+
for (const entry of raw.split(";").map((s) => s.trim()).filter(Boolean)) {
|
|
23
|
+
const [name, list] = entry.split("=");
|
|
24
|
+
if (!name || !list)
|
|
25
|
+
continue;
|
|
26
|
+
m.set(name.trim(), list.split("|").map((s) => s.trim()).filter(Boolean));
|
|
27
|
+
}
|
|
28
|
+
return m;
|
|
29
|
+
}
|
|
30
|
+
/** Parse credentials from env. Returns an empty list when unconfigured. */
|
|
31
|
+
export function loadCredentials(env = process.env) {
|
|
32
|
+
const raw = env.OMCP_API_KEYS?.trim();
|
|
33
|
+
if (!raw)
|
|
34
|
+
return [];
|
|
35
|
+
const keySources = parseKeySources(env.OMCP_KEY_SOURCES);
|
|
36
|
+
const creds = [];
|
|
37
|
+
for (const part of raw.split(",").map((s) => s.trim()).filter(Boolean)) {
|
|
38
|
+
const idx = part.indexOf(":");
|
|
39
|
+
const name = idx > 0 ? part.slice(0, idx).trim() : "key";
|
|
40
|
+
const token = (idx > 0 ? part.slice(idx + 1) : part).trim();
|
|
41
|
+
if (!token)
|
|
42
|
+
continue;
|
|
43
|
+
creds.push({ name, token, allowedSources: keySources.get(name) });
|
|
44
|
+
}
|
|
45
|
+
return creds;
|
|
46
|
+
}
|
|
47
|
+
export function credentialsConfigured(env = process.env) {
|
|
48
|
+
return loadCredentials(env).length > 0;
|
|
49
|
+
}
|
|
50
|
+
/** Extract a bearer/api-key token from request headers. */
|
|
51
|
+
export function extractToken(headers) {
|
|
52
|
+
const auth = headers["authorization"];
|
|
53
|
+
if (typeof auth === "string" && /^Bearer\s+/i.test(auth)) {
|
|
54
|
+
return auth.replace(/^Bearer\s+/i, "").trim() || null;
|
|
55
|
+
}
|
|
56
|
+
const apiKey = headers["x-api-key"];
|
|
57
|
+
if (typeof apiKey === "string" && apiKey.trim())
|
|
58
|
+
return apiKey.trim();
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
/** Constant-time-ish token match → resolved credential, or null. */
|
|
62
|
+
export function resolveToken(token, creds) {
|
|
63
|
+
if (!token)
|
|
64
|
+
return null;
|
|
65
|
+
for (const c of creds) {
|
|
66
|
+
if (c.token.length === token.length && safeEqual(c.token, token))
|
|
67
|
+
return c;
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
function safeEqual(a, b) {
|
|
72
|
+
let diff = 0;
|
|
73
|
+
for (let i = 0; i < a.length; i++)
|
|
74
|
+
diff |= a.charCodeAt(i) ^ b.charCodeAt(i);
|
|
75
|
+
return diff === 0;
|
|
76
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./credentials.js";
|
|
4
|
+
import { queryMetricsHandler } from "../tools/query-metrics.js";
|
|
5
|
+
import { principalContext, defaultContext } from "../context.js";
|
|
6
|
+
describe("single-tenant auth primitive", () => {
|
|
7
|
+
it("unconfigured → no credentials, anonymous (backward compatible)", () => {
|
|
8
|
+
assert.equal(credentialsConfigured({}), false);
|
|
9
|
+
assert.deepEqual(loadCredentials({}), []);
|
|
10
|
+
});
|
|
11
|
+
it("parses name:token and bare token", () => {
|
|
12
|
+
const creds = loadCredentials({ OMCP_API_KEYS: "ci:tok_abc, tok_bare " });
|
|
13
|
+
assert.equal(creds.length, 2);
|
|
14
|
+
assert.deepEqual(creds[0], { name: "ci", token: "tok_abc", allowedSources: undefined });
|
|
15
|
+
assert.equal(creds[1].name, "key");
|
|
16
|
+
assert.equal(creds[1].token, "tok_bare");
|
|
17
|
+
});
|
|
18
|
+
it("parses per-key source allow-list", () => {
|
|
19
|
+
const creds = loadCredentials({
|
|
20
|
+
OMCP_API_KEYS: "agent:tok1,ci:tok2",
|
|
21
|
+
OMCP_KEY_SOURCES: "agent=prom-prod|loki-prod; ci=prom-staging",
|
|
22
|
+
});
|
|
23
|
+
assert.deepEqual(creds[0].allowedSources, ["prom-prod", "loki-prod"]);
|
|
24
|
+
assert.deepEqual(creds[1].allowedSources, ["prom-staging"]);
|
|
25
|
+
});
|
|
26
|
+
it("extractToken handles Bearer and X-API-Key", () => {
|
|
27
|
+
assert.equal(extractToken({ authorization: "Bearer abc" }), "abc");
|
|
28
|
+
assert.equal(extractToken({ authorization: "bearer xyz " }), "xyz");
|
|
29
|
+
assert.equal(extractToken({ "x-api-key": "k1" }), "k1");
|
|
30
|
+
assert.equal(extractToken({}), null);
|
|
31
|
+
});
|
|
32
|
+
it("resolveToken matches only an exact token", () => {
|
|
33
|
+
const creds = loadCredentials({ OMCP_API_KEYS: "a:secret123" });
|
|
34
|
+
assert.equal(resolveToken("secret123", creds)?.name, "a");
|
|
35
|
+
assert.equal(resolveToken("secret12", creds), null);
|
|
36
|
+
assert.equal(resolveToken("wrong", creds), null);
|
|
37
|
+
assert.equal(resolveToken(null, creds), null);
|
|
38
|
+
});
|
|
39
|
+
it("coarse source scoping denies an out-of-scope source", async () => {
|
|
40
|
+
const ctx = principalContext("agent", ["prom-prod"]);
|
|
41
|
+
const res = await queryMetricsHandler({}, { service: "svc", metric: "cpu", source: "prom-secret" }, ctx);
|
|
42
|
+
const text = res.content[0].text;
|
|
43
|
+
assert.match(text, /forbidden: source.*prom-secret.*not in your allowed sources/);
|
|
44
|
+
});
|
|
45
|
+
it("anonymous (no allow-list) does not trigger the scoping guard", async () => {
|
|
46
|
+
// No allowedSources → guard is a no-op. It must NOT short-circuit with a
|
|
47
|
+
// forbidden message (it falls through to normal handling, which on a stub
|
|
48
|
+
// registry may throw — that's fine, it means we passed the guard).
|
|
49
|
+
try {
|
|
50
|
+
const res = await queryMetricsHandler({}, { service: "svc", metric: "cpu", source: "anything" }, defaultContext());
|
|
51
|
+
assert.doesNotMatch(res.content[0].text, /allowed sources/);
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
// threw past the guard → guard correctly did not fire
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
});
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request-scoped context threaded from the transport boundary (HTTP `/mcp`,
|
|
3
|
+
* stdio, and the internal REST/dashboard call sites) into every tool handler.
|
|
4
|
+
*
|
|
5
|
+
* Today it carries only an anonymous principal and a correlation id — it is a
|
|
6
|
+
* deliberate pass-through that does not change behaviour. It is the single
|
|
7
|
+
* seam that later access-control / scoping / audit work attaches to, so those
|
|
8
|
+
* features become additive rather than a cross-cutting rewrite.
|
|
9
|
+
*/
|
|
10
|
+
export interface RequestContext {
|
|
11
|
+
/** Stable id for the calling principal. "anonymous" when no auth configured. */
|
|
12
|
+
principalId: string;
|
|
13
|
+
/** How the principal was authenticated. */
|
|
14
|
+
auth: "anonymous" | "apikey";
|
|
15
|
+
/**
|
|
16
|
+
* Coarse per-credential source allow-list (single-tenant primitive). When
|
|
17
|
+
* set, the principal may only target these source names. Rich role-based
|
|
18
|
+
* scoping (tools/services/lookback/read-only) is a separate concern.
|
|
19
|
+
*/
|
|
20
|
+
allowedSources?: string[];
|
|
21
|
+
/** Correlates all tool calls within one transport request/session. */
|
|
22
|
+
correlationId: string;
|
|
23
|
+
}
|
|
24
|
+
/** Default all-access anonymous context — preserves current behaviour. */
|
|
25
|
+
export declare function defaultContext(): RequestContext;
|
|
26
|
+
/** Context for an authenticated API-key principal. */
|
|
27
|
+
export declare function principalContext(principalId: string, allowedSources?: string[]): RequestContext;
|
package/dist/context.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
/** Default all-access anonymous context — preserves current behaviour. */
|
|
3
|
+
export function defaultContext() {
|
|
4
|
+
return {
|
|
5
|
+
principalId: "anonymous",
|
|
6
|
+
auth: "anonymous",
|
|
7
|
+
correlationId: randomUUID(),
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
/** Context for an authenticated API-key principal. */
|
|
11
|
+
export function principalContext(principalId, allowedSources) {
|
|
12
|
+
return {
|
|
13
|
+
principalId,
|
|
14
|
+
auth: "apikey",
|
|
15
|
+
allowedSources: allowedSources && allowedSources.length > 0 ? allowedSources : undefined,
|
|
16
|
+
correlationId: randomUUID(),
|
|
17
|
+
};
|
|
18
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import express from "express";
|
|
3
|
+
import rateLimit from "express-rate-limit";
|
|
3
4
|
import { randomUUID } from "node:crypto";
|
|
4
5
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
5
6
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
@@ -7,6 +8,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
|
|
|
7
8
|
import { z } from "zod";
|
|
8
9
|
import { loadConfig, saveConfig, DEFAULT_HEALTH_THRESHOLDS, DEFAULT_SETTINGS } from "./config/loader.js";
|
|
9
10
|
import { ConnectorRegistry, getSupportedTypes } from "./connectors/registry.js";
|
|
11
|
+
import { defaultContext, principalContext } from "./context.js";
|
|
12
|
+
import { loadCredentials, credentialsConfigured, extractToken, resolveToken, } from "./auth/credentials.js";
|
|
10
13
|
import { getPluginLoader } from "./connectors/loader.js";
|
|
11
14
|
import { resolveHubCatalogUrl, describeInstalled, mergeCatalog, fetchHubCatalog, } from "./connectors/hub.js";
|
|
12
15
|
import { isValidConnectorName, installTarball } from "./connectors/install.js";
|
|
@@ -75,35 +78,18 @@ function validateSourceUrl(url) {
|
|
|
75
78
|
// Hard cap for a downloaded/uploaded connector tarball (defence against
|
|
76
79
|
// a hostile or accidental huge artifact OOM-ing the server).
|
|
77
80
|
const MAX_CONNECTOR_TGZ_BYTES = 64 * 1024 * 1024;
|
|
78
|
-
//
|
|
79
|
-
//
|
|
80
|
-
//
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
}
|
|
91
|
-
const key = req.ip || "unknown";
|
|
92
|
-
let s = installRateState.get(key);
|
|
93
|
-
if (!s || s.resetAt < now) {
|
|
94
|
-
s = { count: 0, resetAt: now + WINDOW_MS };
|
|
95
|
-
installRateState.set(key, s);
|
|
96
|
-
}
|
|
97
|
-
s.count++;
|
|
98
|
-
if (s.count > MAX) {
|
|
99
|
-
res.setHeader("Retry-After", String(Math.ceil((s.resetAt - now) / 1000)));
|
|
100
|
-
res.status(429).json({
|
|
101
|
-
error: "rate limit exceeded — too many connector install attempts, slow down",
|
|
102
|
-
});
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
|
-
next();
|
|
106
|
-
}
|
|
81
|
+
// Per-client rate limiter for the expensive runtime routes (connector
|
|
82
|
+
// install/upload: fetch + extract + verify + fs write + loader rescan;
|
|
83
|
+
// add/test source: outbound backend connect). Uses express-rate-limit
|
|
84
|
+
// so the control is explicit and well-tested. Bounds abuse even with
|
|
85
|
+
// ENABLE_UI_INSTALL on.
|
|
86
|
+
const installRateLimit = rateLimit({
|
|
87
|
+
windowMs: 60_000,
|
|
88
|
+
limit: 5,
|
|
89
|
+
standardHeaders: true,
|
|
90
|
+
legacyHeaders: false,
|
|
91
|
+
message: { error: "rate limit exceeded — too many attempts, slow down" },
|
|
92
|
+
});
|
|
107
93
|
async function main() {
|
|
108
94
|
// Stdio transport mode (MCP catalogs / desktop clients / Glama's
|
|
109
95
|
// mcp-proxy spawn a stdio MCP server and read JSON-RPC from stdout).
|
|
@@ -124,7 +110,7 @@ async function main() {
|
|
|
124
110
|
// so we cannot share a single McpServer across HTTP sessions. Each new
|
|
125
111
|
// session needs its own server. The factory captures the live registry
|
|
126
112
|
// by reference so tool handlers always see the current configuration.
|
|
127
|
-
function createMcpServer() {
|
|
113
|
+
function createMcpServer(ctx) {
|
|
128
114
|
const mcpServer = new McpServer({
|
|
129
115
|
name: "observability-mcp",
|
|
130
116
|
version: SERVER_VERSION,
|
|
@@ -135,7 +121,7 @@ async function main() {
|
|
|
135
121
|
"When to use: call this first to learn which source names exist and are healthy before passing `source` to other tools, or to debug why a query returns no data.",
|
|
136
122
|
"Behavior: read-only, no side effects. Returns one entry per source with its name, type, configured URL, signal types (metrics/logs), and a live up/down status. Never throws for an unreachable backend — the backend is reported as down instead.",
|
|
137
123
|
"Related: use `list_services` to see what is monitored within these sources.",
|
|
138
|
-
].join(" "), {}, async () => withToolMetrics("list_sources", () => listSourcesHandler(registry)));
|
|
124
|
+
].join(" "), {}, async () => withToolMetrics("list_sources", () => listSourcesHandler(registry, ctx)));
|
|
139
125
|
mcpServer.tool("list_services", [
|
|
140
126
|
"Discover the service names that can be queried, aggregated across every connected backend.",
|
|
141
127
|
"When to use: call this before `query_metrics`, `query_logs`, or `get_service_health` to obtain the exact, case-sensitive service name those tools require.",
|
|
@@ -146,7 +132,7 @@ async function main() {
|
|
|
146
132
|
.string()
|
|
147
133
|
.optional()
|
|
148
134
|
.describe("Optional case-insensitive substring to narrow the result to matching service names (e.g. 'payment'). Omit to list every discovered service."),
|
|
149
|
-
}, async (args) => withToolMetrics("list_services", () => listServicesHandler(registry, args)));
|
|
135
|
+
}, async (args) => withToolMetrics("list_services", () => listServicesHandler(registry, args, ctx)));
|
|
150
136
|
const metricsList = getAvailableMetricNames(registry);
|
|
151
137
|
const metricNames = registry.getBySignal("metrics").flatMap(c => c.getMetrics().map(m => m.name));
|
|
152
138
|
const uniqueNames = [...new Set(metricNames)];
|
|
@@ -175,7 +161,7 @@ async function main() {
|
|
|
175
161
|
.string()
|
|
176
162
|
.optional()
|
|
177
163
|
.describe("Optional. Metric label to break the result down by, e.g. 'instance', 'pod', 'node'. When set, the response contains one series per distinct label value under `groups`. Default: a single aggregated series."),
|
|
178
|
-
}, async (args) => withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args)));
|
|
164
|
+
}, async (args) => withToolMetrics("query_metrics", () => queryMetricsHandler(registry, args, ctx)));
|
|
179
165
|
mcpServer.tool("query_logs", [
|
|
180
166
|
"Fetch recent log entries for ONE service over a look-back window, with a pre-computed summary (error/warning counts and the most frequent error patterns).",
|
|
181
167
|
"When to use: to inspect what a service actually logged, or to investigate an error spike surfaced by `detect_anomalies` / `get_service_health`. For numeric metrics use `query_metrics` instead.",
|
|
@@ -203,7 +189,7 @@ async function main() {
|
|
|
203
189
|
.positive()
|
|
204
190
|
.optional()
|
|
205
191
|
.describe("Optional. Maximum number of log entries to return (most recent first). Default: 100."),
|
|
206
|
-
}, async (args) => withToolMetrics("query_logs", () => queryLogsHandler(registry, args)));
|
|
192
|
+
}, async (args) => withToolMetrics("query_logs", () => queryLogsHandler(registry, args, ctx)));
|
|
207
193
|
mcpServer.tool("get_service_health", [
|
|
208
194
|
"Produce a single aggregated health verdict for ONE service by combining its metrics and logs.",
|
|
209
195
|
"When to use: the fastest way to answer 'is this service healthy right now and why?'. Use `query_metrics`/`query_logs` to drill into the underlying numbers, or `detect_anomalies` to scan many services at once.",
|
|
@@ -213,7 +199,7 @@ async function main() {
|
|
|
213
199
|
service: z
|
|
214
200
|
.string()
|
|
215
201
|
.describe("Required. Exact, case-sensitive service name exactly as returned by `list_services` (e.g. 'payment-service')."),
|
|
216
|
-
}, async (args) => withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args)));
|
|
202
|
+
}, async (args) => withToolMetrics("get_service_health", () => getServiceHealthHandler(registry, args, ctx)));
|
|
217
203
|
mcpServer.tool("detect_anomalies", [
|
|
218
204
|
"Scan one or all monitored services for abnormal behavior and return the findings ranked by severity.",
|
|
219
205
|
"When to use: the entry point for 'is anything wrong anywhere?' triage. Once a service is flagged, follow up with `get_service_health` for the verdict or `query_metrics`/`query_logs` for the raw evidence.",
|
|
@@ -232,7 +218,7 @@ async function main() {
|
|
|
232
218
|
.enum(["low", "medium", "high"])
|
|
233
219
|
.optional()
|
|
234
220
|
.describe("Optional. Detection threshold: 'low' flags only strong deviations (>3σ), 'medium' is balanced (>2σ), 'high' is most sensitive and noisier (>1.5σ). Default: 'medium'."),
|
|
235
|
-
}, async (args) => withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args)));
|
|
221
|
+
}, async (args) => withToolMetrics("detect_anomalies", () => detectAnomaliesHandler(registry, args, ctx)));
|
|
236
222
|
return mcpServer;
|
|
237
223
|
}
|
|
238
224
|
// --- HTTP server ---
|
|
@@ -485,7 +471,7 @@ async function main() {
|
|
|
485
471
|
}
|
|
486
472
|
});
|
|
487
473
|
// Add a new source
|
|
488
|
-
app.post("/api/sources", async (req, res) => {
|
|
474
|
+
app.post("/api/sources", installRateLimit, async (req, res) => {
|
|
489
475
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
490
476
|
if (!name || !type || !url) {
|
|
491
477
|
res.status(400).json({ error: "name, type, and url are required" });
|
|
@@ -548,7 +534,7 @@ async function main() {
|
|
|
548
534
|
res.json({ ok: true });
|
|
549
535
|
});
|
|
550
536
|
// Test a source connection (without saving)
|
|
551
|
-
app.post("/api/sources/test", async (req, res) => {
|
|
537
|
+
app.post("/api/sources/test", installRateLimit, async (req, res) => {
|
|
552
538
|
const { name, type, url, enabled, auth, tls } = req.body;
|
|
553
539
|
if (!type || !url) {
|
|
554
540
|
res.status(400).json({ error: "type and url are required" });
|
|
@@ -594,7 +580,7 @@ async function main() {
|
|
|
594
580
|
// List discovered services
|
|
595
581
|
app.get("/api/services", async (_req, res) => {
|
|
596
582
|
try {
|
|
597
|
-
const result = await listServicesHandler(registry, {});
|
|
583
|
+
const result = await listServicesHandler(registry, {}, defaultContext());
|
|
598
584
|
res.json(parseToolResult(result));
|
|
599
585
|
}
|
|
600
586
|
catch {
|
|
@@ -604,7 +590,7 @@ async function main() {
|
|
|
604
590
|
// Health endpoint for UI dashboard
|
|
605
591
|
app.get("/api/health/:service", async (req, res) => {
|
|
606
592
|
try {
|
|
607
|
-
const result = await getServiceHealthHandler(registry, { service: req.params.service });
|
|
593
|
+
const result = await getServiceHealthHandler(registry, { service: req.params.service }, defaultContext());
|
|
608
594
|
res.json(parseToolResult(result));
|
|
609
595
|
}
|
|
610
596
|
catch {
|
|
@@ -614,13 +600,13 @@ async function main() {
|
|
|
614
600
|
// Health for all services
|
|
615
601
|
app.get("/api/health", async (_req, res) => {
|
|
616
602
|
try {
|
|
617
|
-
const servicesResult = await listServicesHandler(registry, {});
|
|
603
|
+
const servicesResult = await listServicesHandler(registry, {}, defaultContext());
|
|
618
604
|
const parsed = parseToolResult(servicesResult);
|
|
619
605
|
const services = parsed?.services || [];
|
|
620
606
|
const health = {};
|
|
621
607
|
for (const svc of services) {
|
|
622
608
|
try {
|
|
623
|
-
const result = await getServiceHealthHandler(registry, { service: svc.name });
|
|
609
|
+
const result = await getServiceHealthHandler(registry, { service: svc.name }, defaultContext());
|
|
624
610
|
health[svc.name] = parseToolResult(result);
|
|
625
611
|
}
|
|
626
612
|
catch {
|
|
@@ -703,7 +689,7 @@ async function main() {
|
|
|
703
689
|
});
|
|
704
690
|
// Stdio transport: one server over stdin/stdout, no HTTP listener.
|
|
705
691
|
if (STDIO) {
|
|
706
|
-
const server = createMcpServer();
|
|
692
|
+
const server = createMcpServer(defaultContext());
|
|
707
693
|
await server.connect(new StdioServerTransport());
|
|
708
694
|
console.error(`observability-mcp running on stdio transport · connectors: ${registry
|
|
709
695
|
.getAll()
|
|
@@ -727,7 +713,26 @@ async function main() {
|
|
|
727
713
|
}
|
|
728
714
|
mcpActiveSessions.set(transports.size);
|
|
729
715
|
}, 5 * 60 * 1000);
|
|
716
|
+
// Single-tenant auth gate. No credentials configured → anonymous (current
|
|
717
|
+
// behaviour, fully backward compatible). Configured → require a valid
|
|
718
|
+
// Bearer/X-API-Key on every /mcp request; resolve the principal + its
|
|
719
|
+
// coarse source allow-list into the RequestContext.
|
|
720
|
+
function gateCtx(req, res) {
|
|
721
|
+
if (!credentialsConfigured())
|
|
722
|
+
return defaultContext();
|
|
723
|
+
const cred = resolveToken(extractToken(req.headers), loadCredentials());
|
|
724
|
+
if (!cred) {
|
|
725
|
+
res
|
|
726
|
+
.status(401)
|
|
727
|
+
.json({ error: "unauthorized: valid Bearer token or X-API-Key required" });
|
|
728
|
+
return null;
|
|
729
|
+
}
|
|
730
|
+
return principalContext(cred.name, cred.allowedSources);
|
|
731
|
+
}
|
|
730
732
|
app.post("/mcp", async (req, res) => {
|
|
733
|
+
const ctx = gateCtx(req, res);
|
|
734
|
+
if (!ctx)
|
|
735
|
+
return;
|
|
731
736
|
const sessionId = req.headers["mcp-session-id"];
|
|
732
737
|
let transport;
|
|
733
738
|
if (sessionId && transports.has(sessionId)) {
|
|
@@ -747,7 +752,7 @@ async function main() {
|
|
|
747
752
|
}
|
|
748
753
|
mcpActiveSessions.set(transports.size);
|
|
749
754
|
};
|
|
750
|
-
const sessionMcpServer = createMcpServer();
|
|
755
|
+
const sessionMcpServer = createMcpServer(ctx);
|
|
751
756
|
await sessionMcpServer.connect(transport);
|
|
752
757
|
}
|
|
753
758
|
await transport.handleRequest(req, res, req.body);
|
|
@@ -761,6 +766,8 @@ async function main() {
|
|
|
761
766
|
mcpActiveSessions.set(transports.size);
|
|
762
767
|
});
|
|
763
768
|
app.get("/mcp", async (req, res) => {
|
|
769
|
+
if (!gateCtx(req, res))
|
|
770
|
+
return;
|
|
764
771
|
const sessionId = req.headers["mcp-session-id"];
|
|
765
772
|
const transport = transports.get(sessionId);
|
|
766
773
|
if (!transport) {
|
|
@@ -770,6 +777,8 @@ async function main() {
|
|
|
770
777
|
await transport.handleRequest(req, res);
|
|
771
778
|
});
|
|
772
779
|
app.delete("/mcp", async (req, res) => {
|
|
780
|
+
if (!gateCtx(req, res))
|
|
781
|
+
return;
|
|
773
782
|
const sessionId = req.headers["mcp-session-id"];
|
|
774
783
|
const transport = transports.get(sessionId);
|
|
775
784
|
if (transport) {
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifiable offline mode — egress policy.
|
|
3
|
+
*
|
|
4
|
+
* The server performs **no telemetry, analytics, phone-home, or update
|
|
5
|
+
* checks**. The only outbound network calls it ever makes are to backends
|
|
6
|
+
* the operator explicitly configures (Prometheus/Loki/... source URLs) or to
|
|
7
|
+
* an artifact URL the operator/registry explicitly asks it to install.
|
|
8
|
+
*
|
|
9
|
+
* This module is the machine-checkable statement of that guarantee:
|
|
10
|
+
* `egress-policy.test.ts` fails CI if any source file outside the allowlist
|
|
11
|
+
* introduces an outbound call — so the "no data egress" property cannot
|
|
12
|
+
* silently regress.
|
|
13
|
+
*/
|
|
14
|
+
export declare const OFFLINE_STATEMENT: string;
|
|
15
|
+
/** Regex of outbound-call shapes the guard scans for. */
|
|
16
|
+
export declare const OUTBOUND_PATTERN: RegExp;
|
|
17
|
+
/**
|
|
18
|
+
* Files/prefixes permitted to make outbound calls, each with the reason.
|
|
19
|
+
* Anything matching OUTBOUND_PATTERN outside these paths is a policy breach
|
|
20
|
+
* (e.g. a newly added analytics/telemetry module).
|
|
21
|
+
*/
|
|
22
|
+
export declare const EGRESS_ALLOWLIST: ReadonlyArray<{
|
|
23
|
+
prefix: string;
|
|
24
|
+
reason: string;
|
|
25
|
+
}>;
|
|
26
|
+
/**
|
|
27
|
+
* Hard-blocked analytics/telemetry SDKs — matches an *import/require of the
|
|
28
|
+
* package*, not the word in prose, so comments/policy text don't false-positive.
|
|
29
|
+
*/
|
|
30
|
+
export declare const FORBIDDEN_TELEMETRY: RegExp;
|
|
31
|
+
export declare function isEgressAllowed(relPath: string): boolean;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifiable offline mode — egress policy.
|
|
3
|
+
*
|
|
4
|
+
* The server performs **no telemetry, analytics, phone-home, or update
|
|
5
|
+
* checks**. The only outbound network calls it ever makes are to backends
|
|
6
|
+
* the operator explicitly configures (Prometheus/Loki/... source URLs) or to
|
|
7
|
+
* an artifact URL the operator/registry explicitly asks it to install.
|
|
8
|
+
*
|
|
9
|
+
* This module is the machine-checkable statement of that guarantee:
|
|
10
|
+
* `egress-policy.test.ts` fails CI if any source file outside the allowlist
|
|
11
|
+
* introduces an outbound call — so the "no data egress" property cannot
|
|
12
|
+
* silently regress.
|
|
13
|
+
*/
|
|
14
|
+
export const OFFLINE_STATEMENT = "observability-mcp makes no telemetry/analytics/phone-home/update calls. " +
|
|
15
|
+
"Outbound traffic goes only to operator-configured source backends and " +
|
|
16
|
+
"operator/registry-requested plugin artifacts. It runs fully air-gapped.";
|
|
17
|
+
/** Regex of outbound-call shapes the guard scans for. */
|
|
18
|
+
export const OUTBOUND_PATTERN = /\b(fetch\s*\(|https?\.request\s*\(|new\s+WebSocket\s*\(|import\s*\(\s*['"]https?:)/;
|
|
19
|
+
/**
|
|
20
|
+
* Files/prefixes permitted to make outbound calls, each with the reason.
|
|
21
|
+
* Anything matching OUTBOUND_PATTERN outside these paths is a policy breach
|
|
22
|
+
* (e.g. a newly added analytics/telemetry module).
|
|
23
|
+
*/
|
|
24
|
+
export const EGRESS_ALLOWLIST = [
|
|
25
|
+
{ prefix: "connectors/", reason: "connectors query operator-configured source backends" },
|
|
26
|
+
{ prefix: "cli/index.ts", reason: "CLI fetches a source location the operator passed explicitly" },
|
|
27
|
+
{ prefix: "index.ts", reason: "connector-hub plugin install of an operator/registry-requested tarball URL" },
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Hard-blocked analytics/telemetry SDKs — matches an *import/require of the
|
|
31
|
+
* package*, not the word in prose, so comments/policy text don't false-positive.
|
|
32
|
+
*/
|
|
33
|
+
export const FORBIDDEN_TELEMETRY = /(?:from\s*['"]|require\(\s*['"])[^'"]*(sentry|posthog|mixpanel|amplitude|@segment|datadog-rum|analytics-node|google-analytics)/i;
|
|
34
|
+
export function isEgressAllowed(relPath) {
|
|
35
|
+
const p = relPath.replace(/\\/g, "/");
|
|
36
|
+
return EGRESS_ALLOWLIST.some((a) => p === a.prefix || p.startsWith(a.prefix));
|
|
37
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { dirname, join, relative } from "node:path";
|
|
6
|
+
import { OUTBOUND_PATTERN, FORBIDDEN_TELEMETRY, isEgressAllowed, EGRESS_ALLOWLIST, } from "./egress-policy.js";
|
|
7
|
+
// Verifiable offline mode: static guard so the "no data egress" guarantee
|
|
8
|
+
// cannot silently regress. Any new outbound call outside the documented
|
|
9
|
+
// allowlist, or any analytics/telemetry SDK anywhere, fails CI here.
|
|
10
|
+
const srcRoot = join(dirname(fileURLToPath(import.meta.url)), "..");
|
|
11
|
+
function walk(dir) {
|
|
12
|
+
const out = [];
|
|
13
|
+
for (const e of readdirSync(dir)) {
|
|
14
|
+
const p = join(dir, e);
|
|
15
|
+
if (statSync(p).isDirectory())
|
|
16
|
+
out.push(...walk(p));
|
|
17
|
+
else if (e.endsWith(".ts") && !e.endsWith(".test.ts"))
|
|
18
|
+
out.push(p);
|
|
19
|
+
}
|
|
20
|
+
return out;
|
|
21
|
+
}
|
|
22
|
+
describe("verifiable offline mode — egress policy", () => {
|
|
23
|
+
const files = walk(srcRoot)
|
|
24
|
+
.map((f) => ({
|
|
25
|
+
rel: relative(srcRoot, f).replace(/\\/g, "/"),
|
|
26
|
+
src: readFileSync(f, "utf8"),
|
|
27
|
+
}))
|
|
28
|
+
// The policy module itself names these tokens by design.
|
|
29
|
+
.filter((f) => f.rel !== "net/egress-policy.ts");
|
|
30
|
+
it("scans a non-trivial number of source files", () => {
|
|
31
|
+
assert.ok(files.length > 20, `only scanned ${files.length} files`);
|
|
32
|
+
});
|
|
33
|
+
it("no outbound call outside the egress allowlist", () => {
|
|
34
|
+
const breaches = files
|
|
35
|
+
.filter((f) => OUTBOUND_PATTERN.test(f.src) && !isEgressAllowed(f.rel))
|
|
36
|
+
.map((f) => f.rel);
|
|
37
|
+
assert.deepEqual(breaches, [], `outbound calls found outside allowlist (${EGRESS_ALLOWLIST.map((a) => a.prefix).join(", ")}): ` +
|
|
38
|
+
`${breaches.join(", ")} — telemetry/phone-home is forbidden; if legitimate, extend EGRESS_ALLOWLIST with a reason`);
|
|
39
|
+
});
|
|
40
|
+
it("no analytics/telemetry SDK anywhere in source", () => {
|
|
41
|
+
const hits = files
|
|
42
|
+
.filter((f) => FORBIDDEN_TELEMETRY.test(f.src))
|
|
43
|
+
.map((f) => f.rel);
|
|
44
|
+
assert.deepEqual(hits, [], `forbidden telemetry/analytics identifiers in: ${hits.join(", ")}`);
|
|
45
|
+
});
|
|
46
|
+
it("allowlisted files are still present (allowlist not stale)", () => {
|
|
47
|
+
for (const { prefix } of EGRESS_ALLOWLIST) {
|
|
48
|
+
const covered = files.some((f) => f.rel === prefix || f.rel.startsWith(prefix));
|
|
49
|
+
assert.ok(covered, `allowlist entry "${prefix}" matches no source file — prune it`);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { readFileSync, readdirSync } from "node:fs";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { dirname, join } from "node:path";
|
|
6
|
+
// Keystone guard: every tool handler must accept the RequestContext seam.
|
|
7
|
+
// This prevents a new handler (or a refactor) from silently bypassing the
|
|
8
|
+
// request-scoped context that access-control / scoping / audit attach to.
|
|
9
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
describe("RequestContext seam", () => {
|
|
11
|
+
const handlerFiles = readdirSync(here).filter((f) => f.endsWith(".ts") && !f.endsWith(".test.ts"));
|
|
12
|
+
for (const file of handlerFiles) {
|
|
13
|
+
const src = readFileSync(join(here, file), "utf8");
|
|
14
|
+
const hasHandler = /export\s+(async\s+)?function\s+\w*Handler\s*\(/.test(src);
|
|
15
|
+
if (!hasHandler)
|
|
16
|
+
continue;
|
|
17
|
+
it(`${file}: handler accepts a RequestContext`, () => {
|
|
18
|
+
assert.match(src, /_ctx:\s*RequestContext/, `${file} exports a *Handler but does not thread RequestContext — ` +
|
|
19
|
+
`add the ctx seam (see context.ts)`);
|
|
20
|
+
assert.match(src, /from "\.\.\/context\.js"/, `${file} must import from ../context.js`);
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
});
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ConnectorRegistry } from "../connectors/registry.js";
|
|
2
|
+
import { type RequestContext } from "../context.js";
|
|
2
3
|
export declare const detectAnomaliesDefinition: {
|
|
3
4
|
name: "detect_anomalies";
|
|
4
5
|
description: string;
|
|
@@ -25,7 +26,7 @@ export declare function detectAnomaliesHandler(registry: ConnectorRegistry, args
|
|
|
25
26
|
service?: string;
|
|
26
27
|
duration?: string;
|
|
27
28
|
sensitivity?: string;
|
|
28
|
-
}): Promise<{
|
|
29
|
+
}, _ctx?: RequestContext): Promise<{
|
|
29
30
|
content: {
|
|
30
31
|
type: "text";
|
|
31
32
|
text: string;
|