@thotischner/observability-mcp 1.8.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/history.d.ts +70 -0
- package/dist/analysis/history.js +170 -0
- package/dist/analysis/history.test.d.ts +1 -0
- package/dist/analysis/history.test.js +141 -0
- package/dist/audit/log.d.ts +9 -0
- package/dist/audit/log.js +20 -0
- package/dist/audit/redaction-bypass.d.ts +67 -0
- package/dist/audit/redaction-bypass.js +64 -0
- package/dist/audit/redaction-bypass.test.d.ts +1 -0
- package/dist/audit/redaction-bypass.test.js +72 -0
- package/dist/audit/sinks/types.d.ts +18 -0
- package/dist/audit/sinks/types.js +1 -0
- package/dist/audit/sinks/webhook.d.ts +45 -0
- package/dist/audit/sinks/webhook.js +111 -0
- package/dist/audit/sinks/webhook.test.d.ts +1 -0
- package/dist/audit/sinks/webhook.test.js +162 -0
- package/dist/auth/credentials.d.ts +11 -0
- package/dist/auth/credentials.js +27 -0
- package/dist/auth/credentials.test.js +21 -1
- package/dist/auth/csrf.d.ts +26 -0
- package/dist/auth/csrf.js +128 -0
- package/dist/auth/csrf.test.d.ts +1 -0
- package/dist/auth/csrf.test.js +143 -0
- package/dist/auth/local-users.d.ts +6 -0
- package/dist/auth/local-users.js +11 -0
- package/dist/auth/local-users.test.js +41 -0
- package/dist/auth/middleware.d.ts +7 -6
- package/dist/auth/oidc/dcr.d.ts +70 -0
- package/dist/auth/oidc/dcr.js +160 -0
- package/dist/auth/oidc/dcr.test.d.ts +1 -0
- package/dist/auth/oidc/dcr.test.js +109 -0
- package/dist/auth/oidc/endpoints.js +44 -0
- package/dist/auth/oidc/profiles.d.ts +22 -0
- package/dist/auth/oidc/profiles.js +95 -0
- package/dist/auth/oidc/profiles.test.d.ts +1 -0
- package/dist/auth/oidc/profiles.test.js +51 -0
- package/dist/auth/oidc/runtime.d.ts +3 -0
- package/dist/auth/oidc/runtime.js +16 -3
- package/dist/auth/oidc/runtime.test.js +1 -0
- package/dist/auth/policy/batch-dry-run.d.ts +56 -0
- package/dist/auth/policy/batch-dry-run.js +129 -0
- package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
- package/dist/auth/policy/batch-dry-run.test.js +140 -0
- package/dist/auth/policy/engine.d.ts +20 -4
- package/dist/auth/policy/engine.js +16 -2
- package/dist/auth/policy/loader.d.ts +11 -1
- package/dist/auth/policy/loader.js +37 -0
- package/dist/auth/policy/loader.test.d.ts +1 -0
- package/dist/auth/policy/loader.test.js +86 -0
- package/dist/auth/policy/opa.d.ts +5 -5
- package/dist/auth/policy/opa.js +25 -14
- package/dist/auth/policy/opa.test.js +48 -0
- package/dist/auth/rbac.d.ts +23 -1
- package/dist/auth/rbac.js +43 -1
- package/dist/auth/rbac.test.js +62 -0
- package/dist/cli/index.js +3 -0
- package/dist/cli/inspector-config.d.ts +9 -0
- package/dist/cli/inspector-config.js +28 -0
- package/dist/cli/inspector-config.test.d.ts +1 -0
- package/dist/cli/inspector-config.test.js +33 -0
- package/dist/cli/lib.d.ts +1 -1
- package/dist/cli/lib.js +1 -0
- package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
- package/dist/conformance/mcp-2025-11-25.test.js +206 -0
- package/dist/connectors/interface.d.ts +5 -1
- package/dist/connectors/loader.js +6 -4
- package/dist/connectors/loader.test.d.ts +1 -0
- package/dist/connectors/loader.test.js +78 -0
- package/dist/connectors/prometheus.test.js +31 -13
- package/dist/connectors/registry.d.ts +13 -0
- package/dist/connectors/registry.js +30 -0
- package/dist/connectors/registry.test.js +56 -2
- package/dist/context.d.ts +32 -0
- package/dist/context.js +35 -0
- package/dist/context.test.d.ts +1 -0
- package/dist/context.test.js +58 -0
- package/dist/federation/registry.d.ts +32 -0
- package/dist/federation/registry.js +77 -0
- package/dist/federation/registry.test.d.ts +1 -0
- package/dist/federation/registry.test.js +130 -0
- package/dist/federation/upstream.d.ts +60 -0
- package/dist/federation/upstream.js +114 -0
- package/dist/index.js +1188 -120
- package/dist/middleware/ssrfGuard.d.ts +15 -0
- package/dist/middleware/ssrfGuard.js +103 -0
- package/dist/middleware/ssrfGuard.test.d.ts +1 -0
- package/dist/middleware/ssrfGuard.test.js +81 -0
- package/dist/observability/otel.d.ts +20 -0
- package/dist/observability/otel.js +118 -0
- package/dist/observability/otel.test.d.ts +1 -0
- package/dist/observability/otel.test.js +56 -0
- package/dist/openapi.js +215 -7
- package/dist/openapi.test.js +34 -0
- package/dist/postmortem/synthesizer.d.ts +83 -0
- package/dist/postmortem/synthesizer.js +205 -0
- package/dist/postmortem/synthesizer.test.d.ts +1 -0
- package/dist/postmortem/synthesizer.test.js +141 -0
- package/dist/products/loader.d.ts +31 -3
- package/dist/products/loader.js +77 -4
- package/dist/products/loader.test.js +90 -1
- package/dist/quota/charge.d.ts +28 -0
- package/dist/quota/charge.js +30 -0
- package/dist/quota/charge.test.d.ts +1 -0
- package/dist/quota/charge.test.js +83 -0
- package/dist/quota/limiter.d.ts +29 -4
- package/dist/quota/limiter.js +64 -8
- package/dist/quota/limiter.test.js +86 -0
- package/dist/scim/group-role-map.d.ts +4 -0
- package/dist/scim/group-role-map.js +33 -0
- package/dist/scim/group-role-map.test.d.ts +1 -0
- package/dist/scim/group-role-map.test.js +33 -0
- package/dist/scim/routes.d.ts +15 -0
- package/dist/scim/routes.js +249 -0
- package/dist/scim/store.d.ts +37 -0
- package/dist/scim/store.js +178 -0
- package/dist/scim/store.test.d.ts +1 -0
- package/dist/scim/store.test.js +121 -0
- package/dist/scim/types.d.ts +73 -0
- package/dist/scim/types.js +29 -0
- package/dist/sdk/hooks.d.ts +77 -0
- package/dist/sdk/hooks.js +72 -0
- package/dist/sdk/hooks.test.d.ts +1 -0
- package/dist/sdk/hooks.test.js +159 -0
- package/dist/sdk/index.d.ts +2 -0
- package/dist/sdk/index.js +1 -0
- package/dist/sdk/manifest-schema.d.ts +17 -0
- package/dist/sdk/manifest-schema.js +21 -0
- package/dist/tools/context-seam.test.js +6 -1
- package/dist/tools/detect-anomalies.d.ts +1 -1
- package/dist/tools/detect-anomalies.js +5 -4
- package/dist/tools/generate-postmortem.d.ts +35 -0
- package/dist/tools/generate-postmortem.js +191 -0
- package/dist/tools/get-anomaly-history.d.ts +35 -0
- package/dist/tools/get-anomaly-history.js +126 -0
- package/dist/tools/get-service-health.d.ts +1 -1
- package/dist/tools/get-service-health.js +4 -3
- package/dist/tools/list-services.d.ts +1 -1
- package/dist/tools/list-services.js +3 -2
- package/dist/tools/list-sources.d.ts +1 -1
- package/dist/tools/list-sources.js +6 -2
- package/dist/tools/query-logs.d.ts +1 -1
- package/dist/tools/query-logs.js +2 -2
- package/dist/tools/query-metrics.d.ts +1 -1
- package/dist/tools/query-metrics.js +19 -6
- package/dist/tools/query-traces.d.ts +47 -0
- package/dist/tools/query-traces.js +145 -0
- package/dist/tools/query-traces.test.d.ts +1 -0
- package/dist/tools/query-traces.test.js +110 -0
- package/dist/tools/registry-names.d.ts +35 -0
- package/dist/tools/registry-names.js +54 -0
- package/dist/tools/registry-names.test.d.ts +1 -0
- package/dist/tools/registry-names.test.js +61 -0
- package/dist/tools/topology.d.ts +3 -3
- package/dist/tools/topology.js +10 -6
- package/dist/topology/merge.d.ts +22 -0
- package/dist/topology/merge.js +178 -0
- package/dist/topology/merge.test.d.ts +1 -0
- package/dist/topology/merge.test.js +110 -0
- package/dist/transport/sessionStore.d.ts +66 -0
- package/dist/transport/sessionStore.js +138 -0
- package/dist/transport/sessionStore.test.d.ts +1 -0
- package/dist/transport/sessionStore.test.js +118 -0
- package/dist/transport/websocket.d.ts +35 -0
- package/dist/transport/websocket.js +133 -0
- package/dist/transport/websocket.test.d.ts +1 -0
- package/dist/transport/websocket.test.js +124 -0
- package/dist/types.d.ts +51 -0
- package/dist/ui/index.html +1729 -100
- package/package.json +13 -3
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { test } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { HookRegistry } from "./hooks.js";
|
|
4
|
+
function ctx(target = "list_services") {
|
|
5
|
+
return {
|
|
6
|
+
principal: "alice",
|
|
7
|
+
tenant: "default",
|
|
8
|
+
kind: "tool_pre_invoke",
|
|
9
|
+
target,
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
test("HookRegistry.register: adds an entry with defaults applied", () => {
|
|
13
|
+
const r = new HookRegistry();
|
|
14
|
+
r.register({
|
|
15
|
+
pluginName: "p1",
|
|
16
|
+
kind: "tool_pre_invoke",
|
|
17
|
+
handler: () => ({ allow: true }),
|
|
18
|
+
});
|
|
19
|
+
const list = r.list("tool_pre_invoke");
|
|
20
|
+
assert.equal(list.length, 1);
|
|
21
|
+
assert.equal(list[0]?.priority, 100);
|
|
22
|
+
assert.equal(list[0]?.mode, "enforce");
|
|
23
|
+
});
|
|
24
|
+
test("HookRegistry.register: re-registering same (plugin,kind) replaces prior entry", () => {
|
|
25
|
+
const r = new HookRegistry();
|
|
26
|
+
r.register({ pluginName: "p", kind: "tool_pre_invoke", priority: 10, handler: () => ({ allow: true }) });
|
|
27
|
+
r.register({ pluginName: "p", kind: "tool_pre_invoke", priority: 20, handler: () => ({ allow: true }) });
|
|
28
|
+
const list = r.list("tool_pre_invoke");
|
|
29
|
+
assert.equal(list.length, 1);
|
|
30
|
+
assert.equal(list[0]?.priority, 20);
|
|
31
|
+
});
|
|
32
|
+
test("HookRegistry.list: orders by priority (lower runs first)", () => {
|
|
33
|
+
const r = new HookRegistry();
|
|
34
|
+
r.register({ pluginName: "a", kind: "tool_pre_invoke", priority: 50, handler: () => ({ allow: true }) });
|
|
35
|
+
r.register({ pluginName: "b", kind: "tool_pre_invoke", priority: 10, handler: () => ({ allow: true }) });
|
|
36
|
+
r.register({ pluginName: "c", kind: "tool_pre_invoke", priority: 99, handler: () => ({ allow: true }) });
|
|
37
|
+
const names = r.list("tool_pre_invoke").map((e) => e.pluginName);
|
|
38
|
+
assert.deepEqual(names, ["b", "a", "c"]);
|
|
39
|
+
});
|
|
40
|
+
test("HookRegistry.list: disabled hooks are filtered out", () => {
|
|
41
|
+
const r = new HookRegistry();
|
|
42
|
+
r.register({ pluginName: "a", kind: "tool_pre_invoke", handler: () => ({ allow: true }) });
|
|
43
|
+
r.register({ pluginName: "b", kind: "tool_pre_invoke", mode: "disabled", handler: () => ({ allow: true }) });
|
|
44
|
+
const names = r.list("tool_pre_invoke").map((e) => e.pluginName);
|
|
45
|
+
assert.deepEqual(names, ["a"]);
|
|
46
|
+
});
|
|
47
|
+
test("HookRegistry.unregisterPlugin: drops every entry for a plugin", () => {
|
|
48
|
+
const r = new HookRegistry();
|
|
49
|
+
r.register({ pluginName: "p", kind: "tool_pre_invoke", handler: () => ({ allow: true }) });
|
|
50
|
+
r.register({ pluginName: "p", kind: "tool_post_invoke", handler: () => ({ allow: true }) });
|
|
51
|
+
r.register({ pluginName: "q", kind: "tool_pre_invoke", handler: () => ({ allow: true }) });
|
|
52
|
+
const dropped = r.unregisterPlugin("p");
|
|
53
|
+
assert.equal(dropped, 2);
|
|
54
|
+
assert.equal(r.all().length, 1);
|
|
55
|
+
assert.equal(r.all()[0]?.pluginName, "q");
|
|
56
|
+
});
|
|
57
|
+
test("HookRegistry.fire: chains payload mutations and returns the final", async () => {
|
|
58
|
+
const r = new HookRegistry();
|
|
59
|
+
r.register({
|
|
60
|
+
pluginName: "a",
|
|
61
|
+
kind: "tool_pre_invoke",
|
|
62
|
+
priority: 10,
|
|
63
|
+
handler: (_c, p) => ({ allow: true, payload: { ...p, a: 1 } }),
|
|
64
|
+
});
|
|
65
|
+
r.register({
|
|
66
|
+
pluginName: "b",
|
|
67
|
+
kind: "tool_pre_invoke",
|
|
68
|
+
priority: 20,
|
|
69
|
+
handler: (_c, p) => ({ allow: true, payload: { ...p, b: 2 } }),
|
|
70
|
+
});
|
|
71
|
+
const result = await r.fire("tool_pre_invoke", ctx(), { initial: true });
|
|
72
|
+
assert.equal(result.allow, true);
|
|
73
|
+
assert.deepEqual(result.payload, { initial: true, a: 1, b: 2 });
|
|
74
|
+
});
|
|
75
|
+
test("HookRegistry.fire: first allow:false short-circuits subsequent hooks", async () => {
|
|
76
|
+
const r = new HookRegistry();
|
|
77
|
+
let sawSecond = false;
|
|
78
|
+
r.register({
|
|
79
|
+
pluginName: "a",
|
|
80
|
+
kind: "tool_pre_invoke",
|
|
81
|
+
priority: 10,
|
|
82
|
+
handler: () => ({ allow: false, reason: "denied by policy" }),
|
|
83
|
+
});
|
|
84
|
+
r.register({
|
|
85
|
+
pluginName: "b",
|
|
86
|
+
kind: "tool_pre_invoke",
|
|
87
|
+
priority: 20,
|
|
88
|
+
handler: () => {
|
|
89
|
+
sawSecond = true;
|
|
90
|
+
return { allow: true };
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
const result = await r.fire("tool_pre_invoke", ctx(), {});
|
|
94
|
+
assert.equal(result.allow, false);
|
|
95
|
+
assert.equal(result.reason, "denied by policy");
|
|
96
|
+
assert.equal(sawSecond, false);
|
|
97
|
+
});
|
|
98
|
+
test("HookRegistry.fire: enforce-mode throw blocks the chain", async () => {
|
|
99
|
+
const r = new HookRegistry();
|
|
100
|
+
let sawSecond = false;
|
|
101
|
+
r.register({
|
|
102
|
+
pluginName: "a",
|
|
103
|
+
kind: "tool_pre_invoke",
|
|
104
|
+
handler: () => {
|
|
105
|
+
throw new Error("boom");
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
r.register({
|
|
109
|
+
pluginName: "b",
|
|
110
|
+
kind: "tool_pre_invoke",
|
|
111
|
+
priority: 200,
|
|
112
|
+
handler: () => {
|
|
113
|
+
sawSecond = true;
|
|
114
|
+
return { allow: true };
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
const result = await r.fire("tool_pre_invoke", ctx(), {});
|
|
118
|
+
assert.equal(result.allow, false);
|
|
119
|
+
assert.match(result.reason ?? "", /boom/);
|
|
120
|
+
assert.equal(sawSecond, false);
|
|
121
|
+
});
|
|
122
|
+
test("HookRegistry.fire: permissive-mode throw is logged + chain continues with prior payload", async () => {
|
|
123
|
+
const r = new HookRegistry();
|
|
124
|
+
r.register({
|
|
125
|
+
pluginName: "a",
|
|
126
|
+
kind: "tool_pre_invoke",
|
|
127
|
+
priority: 10,
|
|
128
|
+
handler: (_c, p) => ({ allow: true, payload: { ...p, a: 1 } }),
|
|
129
|
+
});
|
|
130
|
+
r.register({
|
|
131
|
+
pluginName: "b",
|
|
132
|
+
kind: "tool_pre_invoke",
|
|
133
|
+
priority: 20,
|
|
134
|
+
mode: "permissive",
|
|
135
|
+
handler: () => {
|
|
136
|
+
throw new Error("intermittent failure");
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
r.register({
|
|
140
|
+
pluginName: "c",
|
|
141
|
+
kind: "tool_pre_invoke",
|
|
142
|
+
priority: 30,
|
|
143
|
+
handler: (_c, p) => ({ allow: true, payload: { ...p, c: 3 } }),
|
|
144
|
+
});
|
|
145
|
+
const logs = [];
|
|
146
|
+
const result = await r.fire("tool_pre_invoke", ctx(), {}, (lvl, m) => {
|
|
147
|
+
if (lvl === "warn")
|
|
148
|
+
logs.push(m);
|
|
149
|
+
});
|
|
150
|
+
assert.equal(result.allow, true);
|
|
151
|
+
assert.deepEqual(result.payload, { a: 1, c: 3 });
|
|
152
|
+
assert.equal(logs.length, 1);
|
|
153
|
+
assert.match(logs[0] ?? "", /b\/tool_pre_invoke/);
|
|
154
|
+
});
|
|
155
|
+
test("HookRegistry.fire: no hooks => allow with the initial payload", async () => {
|
|
156
|
+
const r = new HookRegistry();
|
|
157
|
+
const result = await r.fire("tool_pre_invoke", ctx(), { x: 1 });
|
|
158
|
+
assert.deepEqual(result, { allow: true, payload: { x: 1 } });
|
|
159
|
+
});
|
package/dist/sdk/index.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export type { ObservabilityConnector } from "../connectors/interface.js";
|
|
2
2
|
export { manifestSchema } from "./manifest-schema.js";
|
|
3
3
|
export type { ValidatedConnectorManifest } from "./manifest-schema.js";
|
|
4
|
+
export { HookRegistry } from "./hooks.js";
|
|
5
|
+
export type { HookKind, HookContext, HookPayload, HookResult, HookRegistration, } from "./hooks.js";
|
|
4
6
|
export type { SignalType, SourceConfig, SourceAuth, SourceTls, ConnectorHealth, ServiceInfo, MetricInfo, MetricQuery, MetricResult, MetricSummary, DataPoint, LogQuery, LogResult, LogEntry, LogSummary, MetricDefinition, Resource, Edge, TopologySnapshot, TopologyChangeEvent, TopologyChangeListener, } from "../types.js";
|
|
5
7
|
/**
|
|
6
8
|
* Manifest shape declared in a plugin's `manifest.json`. The server
|
package/dist/sdk/index.js
CHANGED
|
@@ -25,5 +25,22 @@ export declare const manifestSchema: z.ZodObject<{
|
|
|
25
25
|
serverVersion: z.ZodOptional<z.ZodString>;
|
|
26
26
|
}, z.core.$strip>>;
|
|
27
27
|
integrity: z.ZodOptional<z.ZodString>;
|
|
28
|
+
hooks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
29
|
+
kind: z.ZodEnum<{
|
|
30
|
+
tool_pre_invoke: "tool_pre_invoke";
|
|
31
|
+
tool_post_invoke: "tool_post_invoke";
|
|
32
|
+
resource_pre_fetch: "resource_pre_fetch";
|
|
33
|
+
resource_post_fetch: "resource_post_fetch";
|
|
34
|
+
prompt_pre_fetch: "prompt_pre_fetch";
|
|
35
|
+
prompt_post_fetch: "prompt_post_fetch";
|
|
36
|
+
}>;
|
|
37
|
+
module: z.ZodString;
|
|
38
|
+
priority: z.ZodOptional<z.ZodNumber>;
|
|
39
|
+
mode: z.ZodOptional<z.ZodEnum<{
|
|
40
|
+
enforce: "enforce";
|
|
41
|
+
permissive: "permissive";
|
|
42
|
+
disabled: "disabled";
|
|
43
|
+
}>>;
|
|
44
|
+
}, z.core.$strip>>>;
|
|
28
45
|
}, z.core.$strip>;
|
|
29
46
|
export type ValidatedConnectorManifest = z.infer<typeof manifestSchema>;
|
|
@@ -44,4 +44,25 @@ export const manifestSchema = z.object({
|
|
|
44
44
|
message: 'integrity must be "sha256-<base64>"',
|
|
45
45
|
})
|
|
46
46
|
.optional(),
|
|
47
|
+
// Lifecycle hooks the plugin wants to fire at. Each entry points to
|
|
48
|
+
// a module path INSIDE the plugin's bundled files. The loader
|
|
49
|
+
// resolves the module relative to the plugin root, imports its
|
|
50
|
+
// default export as the handler, and registers it on the gateway's
|
|
51
|
+
// HookRegistry. Hot-reloadable: install/upgrade of a plugin
|
|
52
|
+
// re-registers its hooks without restart.
|
|
53
|
+
hooks: z
|
|
54
|
+
.array(z.object({
|
|
55
|
+
kind: z.enum([
|
|
56
|
+
"tool_pre_invoke",
|
|
57
|
+
"tool_post_invoke",
|
|
58
|
+
"resource_pre_fetch",
|
|
59
|
+
"resource_post_fetch",
|
|
60
|
+
"prompt_pre_fetch",
|
|
61
|
+
"prompt_post_fetch",
|
|
62
|
+
]),
|
|
63
|
+
module: z.string().min(1),
|
|
64
|
+
priority: z.number().int().optional(),
|
|
65
|
+
mode: z.enum(["enforce", "permissive", "disabled"]).optional(),
|
|
66
|
+
}))
|
|
67
|
+
.optional(),
|
|
47
68
|
});
|
|
@@ -15,7 +15,12 @@ describe("RequestContext seam", () => {
|
|
|
15
15
|
if (!hasHandler)
|
|
16
16
|
continue;
|
|
17
17
|
it(`${file}: handler accepts a RequestContext`, () => {
|
|
18
|
-
|
|
18
|
+
// Accept both the read-and-use form (`ctx: RequestContext`) and
|
|
19
|
+
// the historic placeholder form (`_ctx: RequestContext`) — the
|
|
20
|
+
// seam is the same; the underscore was only there to silence
|
|
21
|
+
// unused-param lints. Handlers that actually consume the ctx
|
|
22
|
+
// (tenant-aware tools, post-E7) drop it.
|
|
23
|
+
assert.match(src, /\b_?ctx:\s*RequestContext/, `${file} exports a *Handler but does not thread RequestContext — ` +
|
|
19
24
|
`add the ctx seam (see context.ts)`);
|
|
20
25
|
assert.match(src, /from "\.\.\/context\.js"/, `${file} must import from ../context.js`);
|
|
21
26
|
});
|
|
@@ -26,7 +26,7 @@ export declare function detectAnomaliesHandler(registry: ConnectorRegistry, args
|
|
|
26
26
|
service?: string;
|
|
27
27
|
duration?: string;
|
|
28
28
|
sensitivity?: string;
|
|
29
|
-
},
|
|
29
|
+
}, ctx?: RequestContext): Promise<{
|
|
30
30
|
content: {
|
|
31
31
|
type: "text";
|
|
32
32
|
text: string;
|
|
@@ -33,12 +33,13 @@ const KEY_METRICS = ["cpu", "memory", "error_rate", "latency_p99", "request_rate
|
|
|
33
33
|
// the overall error ratio is low (e.g. a memory leak emits a handful of
|
|
34
34
|
// "OutOfMemoryWarning" lines long before it turns into 5xx errors).
|
|
35
35
|
const CRITICAL_LOG_PATTERN = /\b(out\s?of\s?memory|oom|outofmemory|heap (usage|exhaust)|memory leak|panic|fatal|deadlock|segfault|stack overflow|cannot allocate)\b/i;
|
|
36
|
-
export async function detectAnomaliesHandler(registry, args,
|
|
36
|
+
export async function detectAnomaliesHandler(registry, args, ctx = defaultContext()) {
|
|
37
37
|
const duration = args.duration || "10m";
|
|
38
38
|
const threshold = SENSITIVITY_THRESHOLDS[args.sensitivity || "medium"] || 2.0;
|
|
39
|
-
// Discover services to scan
|
|
40
|
-
const
|
|
41
|
-
const
|
|
39
|
+
// Discover services to scan — tenant-scoped.
|
|
40
|
+
const tenantConnectors = registry.getByTenant(ctx.tenant);
|
|
41
|
+
const metricsConnectors = tenantConnectors.filter((c) => c.signalType === "metrics");
|
|
42
|
+
const logConnectors = tenantConnectors.filter((c) => c.signalType === "logs");
|
|
42
43
|
let serviceNames = [];
|
|
43
44
|
if (args.service) {
|
|
44
45
|
serviceNames = [args.service];
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { ConnectorRegistry } from "../connectors/registry.js";
|
|
2
|
+
import { type RequestContext } from "../context.js";
|
|
3
|
+
export declare const generatePostmortemDefinition: {
|
|
4
|
+
name: "generate_postmortem";
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: "object";
|
|
8
|
+
properties: {
|
|
9
|
+
service: {
|
|
10
|
+
type: string;
|
|
11
|
+
description: string;
|
|
12
|
+
};
|
|
13
|
+
duration: {
|
|
14
|
+
type: string;
|
|
15
|
+
description: string;
|
|
16
|
+
};
|
|
17
|
+
format: {
|
|
18
|
+
type: string;
|
|
19
|
+
description: string;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
required: string[];
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
export declare function generatePostmortemHandler(registry: ConnectorRegistry, args: {
|
|
26
|
+
service: string;
|
|
27
|
+
duration?: string;
|
|
28
|
+
format?: string;
|
|
29
|
+
}, ctx?: RequestContext): Promise<{
|
|
30
|
+
content: {
|
|
31
|
+
type: "text";
|
|
32
|
+
text: string;
|
|
33
|
+
}[];
|
|
34
|
+
isError: boolean;
|
|
35
|
+
}>;
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// generate_postmortem — Phase F19a.
|
|
2
|
+
//
|
|
3
|
+
// Stitches together anomaly history (F15), trace summaries (F13),
|
|
4
|
+
// and the topology blast-radius (existing get_blast_radius
|
|
5
|
+
// machinery) into a single markdown post-mortem report.
|
|
6
|
+
//
|
|
7
|
+
// The synthesizer is pure compute (see ./../postmortem/synthesizer);
|
|
8
|
+
// this handler is just the orchestration: pull each upstream
|
|
9
|
+
// primitive in parallel, hand the result to the synthesizer.
|
|
10
|
+
import { defaultContext } from "../context.js";
|
|
11
|
+
import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
|
|
12
|
+
import { synthesizePostmortem, } from "../postmortem/synthesizer.js";
|
|
13
|
+
export const generatePostmortemDefinition = {
|
|
14
|
+
name: "generate_postmortem",
|
|
15
|
+
description: [
|
|
16
|
+
"Stitch the gateway's primitives (anomaly history, blast-radius, traces, log highlights) into a single markdown post-mortem report for one service over a given window.",
|
|
17
|
+
"When to use: after an incident, when the operator or LLM wants 'one document the on-call can read in 60 seconds' instead of poking the individual tools.",
|
|
18
|
+
"Prerequisites: anomaly history requires OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus source pointed at the same TSDB (see docs/anomaly-history.md). Traces require a Tempo / Jaeger source. Blast-radius requires a topology provider.",
|
|
19
|
+
"Behavior: read-only. Returns BOTH a structured JSON shape AND a markdown body suitable to paste straight into a ticket. Output is capped (timeline truncated to 20 rows in the markdown, 30 nodes in the blast radius table, 10 traces) — the structured shape carries the full data.",
|
|
20
|
+
"Related: `get_anomaly_history` for the raw scores; `query_traces` for individual traces; `get_blast_radius` for the topology.",
|
|
21
|
+
].join(" "),
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: "object",
|
|
24
|
+
properties: {
|
|
25
|
+
service: { type: "string", description: "Suspected root-cause service (the operator's first guess)." },
|
|
26
|
+
duration: { type: "string", description: "Rolling window the incident took place in, e.g. '1h', '6h'. Default '1h'." },
|
|
27
|
+
format: { type: "string", description: "Output format: 'markdown' (default) or 'json'." },
|
|
28
|
+
},
|
|
29
|
+
required: ["service"],
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
export async function generatePostmortemHandler(registry, args, ctx = defaultContext()) {
|
|
33
|
+
const svcErr = validateServiceName(args.service);
|
|
34
|
+
if (svcErr)
|
|
35
|
+
return errorResponse(svcErr);
|
|
36
|
+
const duration = args.duration || "1h";
|
|
37
|
+
const durationErr = validateDuration(duration);
|
|
38
|
+
if (durationErr)
|
|
39
|
+
return errorResponse(durationErr);
|
|
40
|
+
const now = new Date();
|
|
41
|
+
const fromIso = new Date(now.getTime() - parseDurationMs(duration)).toISOString();
|
|
42
|
+
const toIso = now.toISOString();
|
|
43
|
+
// Parallel-fetch every upstream primitive. Each fetch swallows
|
|
44
|
+
// its own errors and returns an empty result — the post-mortem
|
|
45
|
+
// must always synthesise SOMETHING (even "no signal found").
|
|
46
|
+
const [anomalies, traces, blastRadius, logHighlights] = await Promise.all([
|
|
47
|
+
fetchAnomalies(registry, args.service, duration, ctx),
|
|
48
|
+
fetchTraces(registry, args.service, duration, ctx),
|
|
49
|
+
fetchBlastRadius(registry, args.service, ctx),
|
|
50
|
+
fetchLogHighlights(registry, args.service, duration, ctx),
|
|
51
|
+
]);
|
|
52
|
+
const report = synthesizePostmortem({
|
|
53
|
+
service: args.service,
|
|
54
|
+
window: duration,
|
|
55
|
+
tenant: ctx.tenant || "default",
|
|
56
|
+
fromIso,
|
|
57
|
+
toIso,
|
|
58
|
+
anomalies,
|
|
59
|
+
blastRadius,
|
|
60
|
+
traces,
|
|
61
|
+
logHighlights,
|
|
62
|
+
});
|
|
63
|
+
if ((args.format || "markdown").toLowerCase() === "json") {
|
|
64
|
+
return {
|
|
65
|
+
content: [{ type: "text", text: JSON.stringify(report) }],
|
|
66
|
+
isError: false,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
// Default: return the markdown body. The structured sections live
|
|
70
|
+
// in JSON if the caller asked for them.
|
|
71
|
+
return {
|
|
72
|
+
content: [{ type: "text", text: report.markdown }],
|
|
73
|
+
isError: false,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
function parseDurationMs(d) {
|
|
77
|
+
const m = d.match(/^(\d+)([smhd])$/);
|
|
78
|
+
if (!m)
|
|
79
|
+
return 60 * 60 * 1000;
|
|
80
|
+
const n = parseInt(m[1], 10);
|
|
81
|
+
const unit = m[2];
|
|
82
|
+
return unit === "s" ? n * 1000
|
|
83
|
+
: unit === "m" ? n * 60_000
|
|
84
|
+
: unit === "h" ? n * 3_600_000
|
|
85
|
+
: n * 86_400_000;
|
|
86
|
+
}
|
|
87
|
+
async function fetchAnomalies(registry, service, duration, ctx) {
|
|
88
|
+
const metric = `omcp_anomaly_score{service="${escLabel(service)}"}`;
|
|
89
|
+
for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryMetrics === "function")) {
|
|
90
|
+
try {
|
|
91
|
+
const r = await c.queryMetrics({ service, metric, duration });
|
|
92
|
+
if (r && r.values && r.values.length > 0) {
|
|
93
|
+
return r.values.map((v) => ({
|
|
94
|
+
ts: typeof v.timestamp === "number" ? new Date(v.timestamp).toISOString() : String(v.timestamp),
|
|
95
|
+
service,
|
|
96
|
+
score: typeof v.value === "number" ? v.value : Number(v.value) || 0,
|
|
97
|
+
method: "mad",
|
|
98
|
+
severity: "warn",
|
|
99
|
+
}));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
/* fall through to next source */
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
async function fetchTraces(registry, service, duration, ctx) {
|
|
109
|
+
for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryTraces === "function")) {
|
|
110
|
+
try {
|
|
111
|
+
const r = await c.queryTraces({ service, duration, limit: 10 });
|
|
112
|
+
if (r && r.traces && r.traces.length > 0) {
|
|
113
|
+
return r.traces.map((t) => ({
|
|
114
|
+
traceId: t.traceId,
|
|
115
|
+
rootName: t.rootName,
|
|
116
|
+
rootService: t.rootService,
|
|
117
|
+
durationMs: t.durationMs,
|
|
118
|
+
hasError: t.hasError,
|
|
119
|
+
}));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
/* fall through */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return [];
|
|
127
|
+
}
|
|
128
|
+
async function fetchBlastRadius(registry, service, ctx) {
|
|
129
|
+
// We don't have a direct "give me blast radius for service X" helper at
|
|
130
|
+
// this layer — the existing get_blast_radius is a tool that takes a
|
|
131
|
+
// resource id. For the post-mortem we settle for the full topology
|
|
132
|
+
// snapshot of the caller's tenant and let the synthesizer mark the
|
|
133
|
+
// suspect-named node as root. Future F19b can plumb the real walker.
|
|
134
|
+
for (const c of registry.getByTenant(ctx.tenant)) {
|
|
135
|
+
if (typeof c.getTopologySnapshot !== "function")
|
|
136
|
+
continue;
|
|
137
|
+
try {
|
|
138
|
+
const snap = await c.getTopologySnapshot();
|
|
139
|
+
if (!snap?.resources?.length)
|
|
140
|
+
continue;
|
|
141
|
+
// Pick nodes whose name matches the suspected service (case-
|
|
142
|
+
// insensitive substring is conservative-enough for the
|
|
143
|
+
// synopsis; the real walker can be precise later).
|
|
144
|
+
const needle = service.toLowerCase();
|
|
145
|
+
const matching = snap.resources.filter((r) => r.name?.toLowerCase().includes(needle) ||
|
|
146
|
+
(r.labels && Object.values(r.labels).some((v) => String(v).toLowerCase() === needle)));
|
|
147
|
+
if (matching.length === 0)
|
|
148
|
+
continue;
|
|
149
|
+
const matchedIds = new Set(matching.map((r) => r.id));
|
|
150
|
+
const connected = snap.edges.filter((e) => matchedIds.has(e.from) || matchedIds.has(e.to));
|
|
151
|
+
const neighborIds = new Set([
|
|
152
|
+
...matching.map((r) => r.id),
|
|
153
|
+
...connected.map((e) => e.from),
|
|
154
|
+
...connected.map((e) => e.to),
|
|
155
|
+
]);
|
|
156
|
+
const nodes = snap.resources
|
|
157
|
+
.filter((r) => neighborIds.has(r.id))
|
|
158
|
+
.map((r) => ({
|
|
159
|
+
id: r.id,
|
|
160
|
+
kind: r.kind,
|
|
161
|
+
name: r.name,
|
|
162
|
+
root: matchedIds.has(r.id),
|
|
163
|
+
}));
|
|
164
|
+
return {
|
|
165
|
+
nodes,
|
|
166
|
+
edges: connected.map((e) => ({ from: e.from, to: e.to, relation: e.relation })),
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
/* fall through */
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return { nodes: [], edges: [] };
|
|
174
|
+
}
|
|
175
|
+
async function fetchLogHighlights(registry, service, duration, ctx) {
|
|
176
|
+
for (const c of registry.getByTenant(ctx.tenant).filter((x) => typeof x.queryLogs === "function")) {
|
|
177
|
+
try {
|
|
178
|
+
const r = await c.queryLogs({ service, duration, limit: 5 });
|
|
179
|
+
if (r?.summary?.errorCount && r.summary.errorCount > 0) {
|
|
180
|
+
return [`${service}: ${r.summary.errorCount} error log line(s) in window (source: ${r.source}).`];
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
/* skip */
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return [];
|
|
188
|
+
}
|
|
189
|
+
function escLabel(v) {
|
|
190
|
+
return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
191
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { ConnectorRegistry } from "../connectors/registry.js";
|
|
2
|
+
import { type RequestContext } from "../context.js";
|
|
3
|
+
export declare const getAnomalyHistoryDefinition: {
|
|
4
|
+
name: "get_anomaly_history";
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: "object";
|
|
8
|
+
properties: {
|
|
9
|
+
service: {
|
|
10
|
+
type: string;
|
|
11
|
+
description: string;
|
|
12
|
+
};
|
|
13
|
+
duration: {
|
|
14
|
+
type: string;
|
|
15
|
+
description: string;
|
|
16
|
+
};
|
|
17
|
+
method: {
|
|
18
|
+
type: string;
|
|
19
|
+
description: string;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
required: string[];
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
export declare function getAnomalyHistoryHandler(registry: ConnectorRegistry, args: {
|
|
26
|
+
service: string;
|
|
27
|
+
duration?: string;
|
|
28
|
+
method?: string;
|
|
29
|
+
}, ctx?: RequestContext): Promise<{
|
|
30
|
+
content: {
|
|
31
|
+
type: "text";
|
|
32
|
+
text: string;
|
|
33
|
+
}[];
|
|
34
|
+
isError: boolean;
|
|
35
|
+
}>;
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// get_anomaly_history — Phase F15.
|
|
2
|
+
//
|
|
3
|
+
// Reads anomaly scores previously written to the TSDB by the
|
|
4
|
+
// AnomalyHistory writer. The tool is a thin convenience wrapper: it
|
|
5
|
+
// builds the PromQL query `omcp_anomaly_score{service="..."}` and
|
|
6
|
+
// dispatches via any Prometheus-shaped connector in the caller's
|
|
7
|
+
// tenant.
|
|
8
|
+
//
|
|
9
|
+
// Operators wire the round-trip themselves (Prometheus scrapes the
|
|
10
|
+
// same remote-write endpoint the writer pushes to) — the gateway
|
|
11
|
+
// doesn't need a direct TSDB query path because it already speaks
|
|
12
|
+
// PromQL via the Prometheus connector.
|
|
13
|
+
import { defaultContext } from "../context.js";
|
|
14
|
+
import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
|
|
15
|
+
export const getAnomalyHistoryDefinition = {
|
|
16
|
+
name: "get_anomaly_history",
|
|
17
|
+
description: [
|
|
18
|
+
"Replay historical anomaly scores for a service from the TSDB the gateway writes to (omcp_anomaly_score series).",
|
|
19
|
+
"When to use: post-mortem reconstruction (what did the gateway see at 03:42?), trend analysis on detector noise, or pulling context for the LLM when an incident is reviewed after the fact.",
|
|
20
|
+
"Prerequisites: the operator must have OMCP_ANOMALY_HISTORY_REMOTE_WRITE configured AND a Prometheus connector pointed at the same TSDB so the round-trip closes.",
|
|
21
|
+
"Behavior: read-only. Returns the time-series of scores with per-method/severity labels. Empty result means either no anomalies in the window or history is disabled.",
|
|
22
|
+
"Related: `detect_anomalies` for the live scores; `query_metrics` if you want to write the PromQL by hand.",
|
|
23
|
+
].join(" "),
|
|
24
|
+
inputSchema: {
|
|
25
|
+
type: "object",
|
|
26
|
+
properties: {
|
|
27
|
+
service: { type: "string", description: "Service name to filter on." },
|
|
28
|
+
duration: { type: "string", description: "Rolling window (e.g. '1h', '24h'). Default '1h'." },
|
|
29
|
+
method: { type: "string", description: "Filter by detector method ('mad', 'seasonality', 'correlator'). Optional." },
|
|
30
|
+
},
|
|
31
|
+
required: ["service"],
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
export async function getAnomalyHistoryHandler(registry, args, ctx = defaultContext()) {
|
|
35
|
+
const svcErr = validateServiceName(args.service);
|
|
36
|
+
if (svcErr)
|
|
37
|
+
return errorResponse(svcErr);
|
|
38
|
+
const duration = args.duration || "1h";
|
|
39
|
+
const durationErr = validateDuration(duration);
|
|
40
|
+
if (durationErr)
|
|
41
|
+
return errorResponse(durationErr);
|
|
42
|
+
// Pick any metrics connector. The operator is expected to have
|
|
43
|
+
// their TSDB scraped by Prometheus, so any metric source can serve
|
|
44
|
+
// the query. We don't try to auto-detect "the right source" — the
|
|
45
|
+
// query is global by metric name.
|
|
46
|
+
const candidates = registry
|
|
47
|
+
.getByTenant(ctx.tenant)
|
|
48
|
+
.filter((c) => typeof c.queryMetrics === "function");
|
|
49
|
+
if (candidates.length === 0) {
|
|
50
|
+
return {
|
|
51
|
+
content: [
|
|
52
|
+
{
|
|
53
|
+
type: "text",
|
|
54
|
+
text: JSON.stringify({
|
|
55
|
+
error: "No metrics backend configured to query the TSDB. Configure a Prometheus source pointed at the same TSDB OMCP_ANOMALY_HISTORY_REMOTE_WRITE writes to.",
|
|
56
|
+
}),
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
isError: true,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
// Build the PromQL. The recording metric `omcp_anomaly_score` is
|
|
63
|
+
// expected to exist; if the writer is disabled or never fired, the
|
|
64
|
+
// query just returns an empty series — that's a valid result.
|
|
65
|
+
const labelFilters = [`service="${escLabel(args.service)}"`];
|
|
66
|
+
if (args.method)
|
|
67
|
+
labelFilters.push(`method="${escLabel(args.method)}"`);
|
|
68
|
+
const metric = `omcp_anomaly_score{${labelFilters.join(",")}}`;
|
|
69
|
+
// Fan out across every metrics connector; first non-empty answer wins.
|
|
70
|
+
for (const c of candidates) {
|
|
71
|
+
if (!c.queryMetrics)
|
|
72
|
+
continue;
|
|
73
|
+
try {
|
|
74
|
+
const r = await c.queryMetrics({
|
|
75
|
+
service: args.service,
|
|
76
|
+
metric,
|
|
77
|
+
duration,
|
|
78
|
+
});
|
|
79
|
+
if (r && Array.isArray(r.values) && r.values.length > 0) {
|
|
80
|
+
return {
|
|
81
|
+
content: [
|
|
82
|
+
{
|
|
83
|
+
type: "text",
|
|
84
|
+
text: JSON.stringify({
|
|
85
|
+
service: args.service,
|
|
86
|
+
duration,
|
|
87
|
+
method: args.method,
|
|
88
|
+
source: r.source,
|
|
89
|
+
values: r.values,
|
|
90
|
+
summary: r.summary,
|
|
91
|
+
metric,
|
|
92
|
+
}),
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
isError: false,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
catch (err) {
|
|
100
|
+
console.warn("get_anomaly_history: %s threw: %s", c.name, err instanceof Error ? err.message : String(err));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// No connector returned data — either the metric doesn't exist or
|
|
104
|
+
// there were no anomalies in the window. Both are useful answers.
|
|
105
|
+
return {
|
|
106
|
+
content: [
|
|
107
|
+
{
|
|
108
|
+
type: "text",
|
|
109
|
+
text: JSON.stringify({
|
|
110
|
+
service: args.service,
|
|
111
|
+
duration,
|
|
112
|
+
method: args.method,
|
|
113
|
+
values: [],
|
|
114
|
+
summary: { count: 0 },
|
|
115
|
+
metric,
|
|
116
|
+
hint: "No anomaly history found. Either the window is clean, or OMCP_ANOMALY_HISTORY_REMOTE_WRITE was unset when the anomalies fired, or the configured Prometheus source isn't scraping the TSDB this writer pushes to.",
|
|
117
|
+
}),
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
isError: false,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
/** Escape a PromQL label value (backslash + double-quote). */
|
|
124
|
+
function escLabel(v) {
|
|
125
|
+
return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
126
|
+
}
|
|
@@ -18,7 +18,7 @@ export declare const getServiceHealthDefinition: {
|
|
|
18
18
|
};
|
|
19
19
|
export declare function getServiceHealthHandler(registry: ConnectorRegistry, args: {
|
|
20
20
|
service: string;
|
|
21
|
-
},
|
|
21
|
+
}, ctx?: RequestContext): Promise<{
|
|
22
22
|
content: {
|
|
23
23
|
type: "text";
|
|
24
24
|
text: string;
|