@thotischner/observability-mcp 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/analysis/history.d.ts +36 -2
  2. package/dist/analysis/history.js +60 -2
  3. package/dist/analysis/history.test.js +46 -0
  4. package/dist/audit/sinks/s3.d.ts +61 -0
  5. package/dist/audit/sinks/s3.js +179 -0
  6. package/dist/audit/sinks/s3.test.d.ts +1 -0
  7. package/dist/audit/sinks/s3.test.js +175 -0
  8. package/dist/auth/csrf.d.ts +6 -0
  9. package/dist/auth/csrf.js +4 -0
  10. package/dist/auth/csrf.test.js +22 -0
  11. package/dist/auth/lockout.d.ts +72 -0
  12. package/dist/auth/lockout.js +134 -0
  13. package/dist/auth/lockout.test.d.ts +1 -0
  14. package/dist/auth/lockout.test.js +133 -0
  15. package/dist/auth/middleware.d.ts +5 -0
  16. package/dist/auth/middleware.js +6 -1
  17. package/dist/auth/middleware.test.js +31 -0
  18. package/dist/auth/password-policy.d.ts +52 -0
  19. package/dist/auth/password-policy.js +125 -0
  20. package/dist/auth/password-policy.test.d.ts +1 -0
  21. package/dist/auth/password-policy.test.js +111 -0
  22. package/dist/auth/policy/batch-dry-run.js +15 -0
  23. package/dist/auth/revocation.d.ts +93 -0
  24. package/dist/auth/revocation.js +193 -0
  25. package/dist/auth/revocation.test.d.ts +1 -0
  26. package/dist/auth/revocation.test.js +136 -0
  27. package/dist/auth/session.d.ts +7 -0
  28. package/dist/auth/session.js +6 -0
  29. package/dist/auth/session.test.js +21 -0
  30. package/dist/connectors/interface.d.ts +5 -1
  31. package/dist/connectors/loader.d.ts +8 -0
  32. package/dist/connectors/loader.js +49 -0
  33. package/dist/connectors/loki.d.ts +45 -1
  34. package/dist/connectors/loki.js +141 -8
  35. package/dist/connectors/loki.test.js +171 -1
  36. package/dist/connectors/manifest-hooks.test.d.ts +1 -0
  37. package/dist/connectors/manifest-hooks.test.js +206 -0
  38. package/dist/federation/registry.d.ts +27 -5
  39. package/dist/federation/registry.js +49 -4
  40. package/dist/federation/registry.test.js +79 -3
  41. package/dist/federation/upstream.d.ts +32 -6
  42. package/dist/federation/upstream.js +60 -12
  43. package/dist/federation/upstream.test.d.ts +1 -0
  44. package/dist/federation/upstream.test.js +118 -0
  45. package/dist/index.js +522 -67
  46. package/dist/metrics/self.d.ts +1 -0
  47. package/dist/metrics/self.js +8 -0
  48. package/dist/openapi.js +39 -0
  49. package/dist/openapi.test.js +1 -0
  50. package/dist/policy/redact.js +1 -1
  51. package/dist/postmortem/store.d.ts +34 -0
  52. package/dist/postmortem/store.js +113 -0
  53. package/dist/postmortem/store.test.d.ts +1 -0
  54. package/dist/postmortem/store.test.js +118 -0
  55. package/dist/scim/compliance.test.d.ts +1 -0
  56. package/dist/scim/compliance.test.js +169 -0
  57. package/dist/scim/factory.test.d.ts +1 -0
  58. package/dist/scim/factory.test.js +54 -0
  59. package/dist/scim/patch-ops.test.d.ts +1 -0
  60. package/dist/scim/patch-ops.test.js +100 -0
  61. package/dist/scim/redis-store.d.ts +38 -0
  62. package/dist/scim/redis-store.js +178 -0
  63. package/dist/scim/redis-store.test.d.ts +1 -0
  64. package/dist/scim/redis-store.test.js +138 -0
  65. package/dist/scim/routes.d.ts +27 -2
  66. package/dist/scim/routes.js +161 -15
  67. package/dist/scim/store.d.ts +40 -1
  68. package/dist/scim/store.js +23 -5
  69. package/dist/sdk/hook-wrappers.d.ts +39 -0
  70. package/dist/sdk/hook-wrappers.js +113 -0
  71. package/dist/sdk/hook-wrappers.test.d.ts +1 -0
  72. package/dist/sdk/hook-wrappers.test.js +204 -0
  73. package/dist/sdk/index.d.ts +13 -0
  74. package/dist/security/csp.d.ts +64 -0
  75. package/dist/security/csp.js +135 -0
  76. package/dist/security/csp.test.d.ts +1 -0
  77. package/dist/security/csp.test.js +97 -0
  78. package/dist/tools/detect-anomalies.d.ts +12 -1
  79. package/dist/tools/detect-anomalies.js +22 -2
  80. package/dist/tools/query-logs.d.ts +40 -0
  81. package/dist/tools/query-logs.js +69 -3
  82. package/dist/tools/topology.js +23 -5
  83. package/dist/tools/topology.test.js +45 -0
  84. package/dist/tools/validation.d.ts +13 -0
  85. package/dist/tools/validation.js +74 -0
  86. package/dist/tools/validation.test.js +54 -1
  87. package/dist/transport/transportSessionMap.d.ts +70 -0
  88. package/dist/transport/transportSessionMap.js +128 -0
  89. package/dist/transport/transportSessionMap.test.d.ts +1 -0
  90. package/dist/transport/transportSessionMap.test.js +111 -0
  91. package/dist/types.d.ts +48 -0
  92. package/dist/ui/index.html +898 -116
  93. package/package.json +1 -1
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Content-Security-Policy for the management-plane Web UI.
3
+ *
4
+ * Two policies ship together, by design:
5
+ *
6
+ * - **Enforced** (`Content-Security-Policy`): a real, non-breaking policy.
7
+ * It locks down everything the UI doesn't need — no remote scripts
8
+ * (`script-src 'self'`), no plugins (`object-src 'none'`), no `<base>`
9
+ * hijack (`base-uri 'self'`), no framing (`frame-ancestors 'none'`),
10
+ * and same-origin-only XHR via `connect-src 'self'`. It keeps
11
+ * `'unsafe-inline'` for `script-src` because the single-file UI uses
12
+ * ~200 inline event-handler attributes (`onclick=`, …) that a nonce
13
+ * cannot cover — a nonce in `script-src` would *disable* `'unsafe-inline'`
14
+ * in CSP3 and break every button. So the enforced policy is a genuine
15
+ * improvement over no CSP without regressing the UI.
16
+ *
17
+ * - **Report-Only** (`Content-Security-Policy-Report-Only`): the strict
18
+ * target policy — `script-src 'self' 'nonce-…'`, no `'unsafe-inline'`.
19
+ * The two legitimate inline `<script>` blocks carry the per-request
20
+ * nonce, so this policy flags ONLY the inline event-handler debt. It
21
+ * blocks nothing; it just reports, giving an actionable migration list
22
+ * (move the handlers to addEventListener) before a future slice can
23
+ * promote the strict policy to enforced.
24
+ *
25
+ * It is **opt-in** (`OMCP_CSP_STRICT_REPORT=true`): with ~200 inline
26
+ * handlers it would otherwise emit a `[Report Only]` console message
27
+ * per handler on every page load — noise an operator with devtools
28
+ * open shouldn't eat by default. Enable it when you're actively
29
+ * working the migration. The enforced policy + reporting endpoint are
30
+ * always on regardless.
31
+ *
32
+ * Both policies report to `/api/csp-violations` via the modern Reporting
33
+ * API (`Reporting-Endpoints` + `report-to`) and the legacy `report-uri`.
34
+ */
35
+ /** Placeholder substituted with the per-request nonce when serving the UI HTML. */
36
+ export declare const CSP_NONCE_PLACEHOLDER = "__CSP_NONCE__";
37
+ /** The named reporting group used in the Report-To / Reporting-Endpoints headers. */
38
+ export declare const CSP_REPORT_GROUP = "omcp-csp";
39
+ /** Where violation reports are POSTed. */
40
+ export declare const CSP_REPORT_PATH = "/api/csp-violations";
41
+ /** Fresh base64 nonce (128 bits). */
42
+ export declare function generateNonce(): string;
43
+ /** The enforced policy — non-breaking, keeps the UI working. */
44
+ export declare function enforcedCsp(): string;
45
+ /** The strict target policy, run in report-only mode against the nonce. */
46
+ export declare function reportOnlyCsp(nonce: string): string;
47
+ /** Whether the strict Report-Only policy is enabled. Default off — see
48
+ * the module header for why (console noise from ~200 inline handlers). */
49
+ export declare function cspStrictReportFromEnv(env?: NodeJS.ProcessEnv): boolean;
50
+ /** Value for the modern `Reporting-Endpoints` header. */
51
+ export declare function reportingEndpointsHeader(): string;
52
+ /** Value for the legacy `Report-To` header (Reporting API v0). */
53
+ export declare function reportToHeader(): string;
54
+ /**
55
+ * Normalise a posted CSP violation (either the legacy
56
+ * `application/csp-report` `{ "csp-report": {...} }` envelope or a modern
57
+ * Reporting-API `application/reports+json` array element) into a compact,
58
+ * log-safe summary. Returns null when the body isn't a recognisable report.
59
+ */
60
+ export declare function summariseViolation(body: unknown): {
61
+ directive: string;
62
+ blockedUri: string;
63
+ documentUri: string;
64
+ } | null;
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Content-Security-Policy for the management-plane Web UI.
3
+ *
4
+ * Two policies ship together, by design:
5
+ *
6
+ * - **Enforced** (`Content-Security-Policy`): a real, non-breaking policy.
7
+ * It locks down everything the UI doesn't need — no remote scripts
8
+ * (`script-src 'self'`), no plugins (`object-src 'none'`), no `<base>`
9
+ * hijack (`base-uri 'self'`), no framing (`frame-ancestors 'none'`),
10
+ * and same-origin-only XHR via `connect-src 'self'`. It keeps
11
+ * `'unsafe-inline'` for `script-src` because the single-file UI uses
12
+ * ~200 inline event-handler attributes (`onclick=`, …) that a nonce
13
+ * cannot cover — a nonce in `script-src` would *disable* `'unsafe-inline'`
14
+ * in CSP3 and break every button. So the enforced policy is a genuine
15
+ * improvement over no CSP without regressing the UI.
16
+ *
17
+ * - **Report-Only** (`Content-Security-Policy-Report-Only`): the strict
18
+ * target policy — `script-src 'self' 'nonce-…'`, no `'unsafe-inline'`.
19
+ * The two legitimate inline `<script>` blocks carry the per-request
20
+ * nonce, so this policy flags ONLY the inline event-handler debt. It
21
+ * blocks nothing; it just reports, giving an actionable migration list
22
+ * (move the handlers to addEventListener) before a future slice can
23
+ * promote the strict policy to enforced.
24
+ *
25
+ * It is **opt-in** (`OMCP_CSP_STRICT_REPORT=true`): with ~200 inline
26
+ * handlers it would otherwise emit a `[Report Only]` console message
27
+ * per handler on every page load — noise an operator with devtools
28
+ * open shouldn't eat by default. Enable it when you're actively
29
+ * working the migration. The enforced policy + reporting endpoint are
30
+ * always on regardless.
31
+ *
32
+ * Both policies report to `/api/csp-violations` via the modern Reporting
33
+ * API (`Reporting-Endpoints` + `report-to`) and the legacy `report-uri`.
34
+ */
35
+ import { randomBytes } from "node:crypto";
36
+ /** Placeholder substituted with the per-request nonce when serving the UI HTML. */
37
+ export const CSP_NONCE_PLACEHOLDER = "__CSP_NONCE__";
38
+ /** The named reporting group used in the Report-To / Reporting-Endpoints headers. */
39
+ export const CSP_REPORT_GROUP = "omcp-csp";
40
+ /** Where violation reports are POSTed. */
41
+ export const CSP_REPORT_PATH = "/api/csp-violations";
42
+ /** Fresh base64 nonce (128 bits). */
43
+ export function generateNonce() {
44
+ return randomBytes(16).toString("base64");
45
+ }
46
+ /** The enforced policy — non-breaking, keeps the UI working. */
47
+ export function enforcedCsp() {
48
+ return [
49
+ "default-src 'self'",
50
+ "base-uri 'self'",
51
+ "object-src 'none'",
52
+ "frame-ancestors 'none'",
53
+ "form-action 'self'",
54
+ "script-src 'self' 'unsafe-inline'",
55
+ "style-src 'self' 'unsafe-inline'",
56
+ "img-src 'self' data:",
57
+ "font-src 'self' data:",
58
+ "connect-src 'self'",
59
+ `report-uri ${CSP_REPORT_PATH}`,
60
+ `report-to ${CSP_REPORT_GROUP}`,
61
+ ].join("; ");
62
+ }
63
+ /** The strict target policy, run in report-only mode against the nonce. */
64
+ export function reportOnlyCsp(nonce) {
65
+ return [
66
+ "default-src 'self'",
67
+ "base-uri 'self'",
68
+ "object-src 'none'",
69
+ "frame-ancestors 'none'",
70
+ "form-action 'self'",
71
+ `script-src 'self' 'nonce-${nonce}'`,
72
+ "style-src 'self' 'unsafe-inline'",
73
+ "img-src 'self' data:",
74
+ "font-src 'self' data:",
75
+ "connect-src 'self'",
76
+ `report-uri ${CSP_REPORT_PATH}`,
77
+ `report-to ${CSP_REPORT_GROUP}`,
78
+ ].join("; ");
79
+ }
80
+ /** Whether the strict Report-Only policy is enabled. Default off — see
81
+ * the module header for why (console noise from ~200 inline handlers). */
82
+ export function cspStrictReportFromEnv(env = process.env) {
83
+ const v = env.OMCP_CSP_STRICT_REPORT?.trim().toLowerCase();
84
+ return v === "1" || v === "true" || v === "yes";
85
+ }
86
+ /** Value for the modern `Reporting-Endpoints` header. */
87
+ export function reportingEndpointsHeader() {
88
+ return `${CSP_REPORT_GROUP}="${CSP_REPORT_PATH}"`;
89
+ }
90
+ /** Value for the legacy `Report-To` header (Reporting API v0). */
91
+ export function reportToHeader() {
92
+ return JSON.stringify({
93
+ group: CSP_REPORT_GROUP,
94
+ max_age: 10886400,
95
+ endpoints: [{ url: CSP_REPORT_PATH }],
96
+ });
97
+ }
98
+ /**
99
+ * Normalise a posted CSP violation (either the legacy
100
+ * `application/csp-report` `{ "csp-report": {...} }` envelope or a modern
101
+ * Reporting-API `application/reports+json` array element) into a compact,
102
+ * log-safe summary. Returns null when the body isn't a recognisable report.
103
+ */
104
+ export function summariseViolation(body) {
105
+ if (!body || typeof body !== "object")
106
+ return null;
107
+ // Reporting API delivers an array of { type, body: {...} }.
108
+ if (Array.isArray(body)) {
109
+ for (const item of body) {
110
+ const s = summariseViolation(item);
111
+ if (s)
112
+ return s;
113
+ }
114
+ return null;
115
+ }
116
+ const o = body;
117
+ // Reporting-API single report: { type: "csp-violation", body: {...} }.
118
+ const report = (o["csp-report"] ?? o.body ?? o);
119
+ if (!report || typeof report !== "object")
120
+ return null;
121
+ const pick = (...keys) => {
122
+ for (const k of keys) {
123
+ const v = report[k];
124
+ if (typeof v === "string" && v)
125
+ return v.slice(0, 256);
126
+ }
127
+ return "";
128
+ };
129
+ const directive = pick("effective-directive", "effectiveDirective", "violated-directive", "violatedDirective");
130
+ const blockedUri = pick("blocked-uri", "blockedURL", "blockedURI");
131
+ const documentUri = pick("document-uri", "documentURL", "documentURI");
132
+ if (!directive && !blockedUri && !documentUri)
133
+ return null;
134
+ return { directive, blockedUri, documentUri };
135
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,97 @@
1
+ import { test } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { generateNonce, enforcedCsp, reportOnlyCsp, reportingEndpointsHeader, reportToHeader, summariseViolation, cspStrictReportFromEnv, CSP_NONCE_PLACEHOLDER, CSP_REPORT_GROUP, CSP_REPORT_PATH, } from "./csp.js";
4
+ test("generateNonce returns a fresh base64 value each call", () => {
5
+ const a = generateNonce();
6
+ const b = generateNonce();
7
+ assert.notEqual(a, b);
8
+ assert.match(a, /^[A-Za-z0-9+/]+=*$/);
9
+ // 16 bytes → 24 base64 chars (with padding).
10
+ assert.ok(a.length >= 22);
11
+ });
12
+ test("enforced policy keeps the UI working but locks the rest down", () => {
13
+ const csp = enforcedCsp();
14
+ // Inline handlers survive: unsafe-inline present, NO nonce (which would disable it).
15
+ assert.match(csp, /script-src 'self' 'unsafe-inline'/);
16
+ assert.ok(!csp.includes("nonce-"), "enforced policy must not carry a nonce");
17
+ // Hard locks.
18
+ assert.match(csp, /object-src 'none'/);
19
+ assert.match(csp, /base-uri 'self'/);
20
+ assert.match(csp, /frame-ancestors 'none'/);
21
+ assert.match(csp, /default-src 'self'/);
22
+ assert.match(csp, /connect-src 'self'/);
23
+ // Reporting wired both ways.
24
+ assert.match(csp, new RegExp(`report-uri ${CSP_REPORT_PATH}`));
25
+ assert.match(csp, new RegExp(`report-to ${CSP_REPORT_GROUP}`));
26
+ });
27
+ test("report-only policy is strict and nonce-bound, no unsafe-inline on scripts", () => {
28
+ const nonce = generateNonce();
29
+ const csp = reportOnlyCsp(nonce);
30
+ assert.match(csp, new RegExp(`script-src 'self' 'nonce-${nonce.replace(/[+/]/g, "\\$&")}'`));
31
+ // Strict: the script directive must NOT allow unsafe-inline.
32
+ const scriptDirective = csp.split(";").find((d) => d.trim().startsWith("script-src"));
33
+ assert.ok(!scriptDirective.includes("unsafe-inline"));
34
+ assert.match(csp, /object-src 'none'/);
35
+ });
36
+ test("reporting headers name the same group + endpoint", () => {
37
+ assert.equal(reportingEndpointsHeader(), `${CSP_REPORT_GROUP}="${CSP_REPORT_PATH}"`);
38
+ const parsed = JSON.parse(reportToHeader());
39
+ assert.equal(parsed.group, CSP_REPORT_GROUP);
40
+ assert.equal(parsed.endpoints[0].url, CSP_REPORT_PATH);
41
+ assert.ok(parsed.max_age > 0);
42
+ });
43
+ test("the nonce placeholder is a stable token", () => {
44
+ assert.equal(CSP_NONCE_PLACEHOLDER, "__CSP_NONCE__");
45
+ });
46
+ test("strict report-only is opt-in (default off)", () => {
47
+ assert.equal(cspStrictReportFromEnv({}), false);
48
+ assert.equal(cspStrictReportFromEnv({ OMCP_CSP_STRICT_REPORT: "true" }), true);
49
+ assert.equal(cspStrictReportFromEnv({ OMCP_CSP_STRICT_REPORT: "1" }), true);
50
+ assert.equal(cspStrictReportFromEnv({ OMCP_CSP_STRICT_REPORT: "no" }), false);
51
+ assert.equal(cspStrictReportFromEnv({ OMCP_CSP_STRICT_REPORT: "false" }), false);
52
+ });
53
+ test("summariseViolation parses the legacy csp-report envelope", () => {
54
+ const s = summariseViolation({
55
+ "csp-report": {
56
+ "effective-directive": "script-src-attr",
57
+ "blocked-uri": "inline",
58
+ "document-uri": "https://gw.example/",
59
+ "extra": "ignored",
60
+ },
61
+ });
62
+ assert.deepEqual(s, {
63
+ directive: "script-src-attr",
64
+ blockedUri: "inline",
65
+ documentUri: "https://gw.example/",
66
+ });
67
+ });
68
+ test("summariseViolation parses a modern Reporting-API array", () => {
69
+ const s = summariseViolation([
70
+ {
71
+ type: "csp-violation",
72
+ body: {
73
+ effectiveDirective: "script-src-elem",
74
+ blockedURL: "https://evil.example/x.js",
75
+ documentURL: "https://gw.example/",
76
+ },
77
+ },
78
+ ]);
79
+ assert.equal(s?.directive, "script-src-elem");
80
+ assert.equal(s?.blockedUri, "https://evil.example/x.js");
81
+ });
82
+ test("summariseViolation falls back to violated-directive", () => {
83
+ const s = summariseViolation({ "csp-report": { "violated-directive": "img-src", "blocked-uri": "data" } });
84
+ assert.equal(s?.directive, "img-src");
85
+ });
86
+ test("summariseViolation returns null for junk", () => {
87
+ assert.equal(summariseViolation(null), null);
88
+ assert.equal(summariseViolation("nope"), null);
89
+ assert.equal(summariseViolation({}), null);
90
+ assert.equal(summariseViolation({ random: "field" }), null);
91
+ });
92
+ test("summariseViolation truncates over-long fields", () => {
93
+ const long = "a".repeat(5000);
94
+ const s = summariseViolation({ "csp-report": { "blocked-uri": long } });
95
+ assert.ok(s);
96
+ assert.ok((s.blockedUri).length <= 256);
97
+ });
@@ -22,11 +22,22 @@ export declare const detectAnomaliesDefinition: {
22
22
  };
23
23
  };
24
24
  };
25
+ export interface AnomalyHistorySink {
26
+ record(entry: {
27
+ ts: string;
28
+ service: string;
29
+ tenant: string;
30
+ score: number;
31
+ method: string;
32
+ severity: string;
33
+ signal?: string;
34
+ }): Promise<void> | void;
35
+ }
25
36
  export declare function detectAnomaliesHandler(registry: ConnectorRegistry, args: {
26
37
  service?: string;
27
38
  duration?: string;
28
39
  sensitivity?: string;
29
- }, ctx?: RequestContext): Promise<{
40
+ }, ctx?: RequestContext, history?: AnomalyHistorySink): Promise<{
30
41
  content: {
31
42
  type: "text";
32
43
  text: string;
@@ -33,7 +33,7 @@ const KEY_METRICS = ["cpu", "memory", "error_rate", "latency_p99", "request_rate
33
33
  // the overall error ratio is low (e.g. a memory leak emits a handful of
34
34
  // "OutOfMemoryWarning" lines long before it turns into 5xx errors).
35
35
  const CRITICAL_LOG_PATTERN = /\b(out\s?of\s?memory|oom|outofmemory|heap (usage|exhaust)|memory leak|panic|fatal|deadlock|segfault|stack overflow|cannot allocate)\b/i;
36
- export async function detectAnomaliesHandler(registry, args, ctx = defaultContext()) {
36
+ export async function detectAnomaliesHandler(registry, args, ctx = defaultContext(), history) {
37
37
  const duration = args.duration || "10m";
38
38
  const threshold = SENSITIVITY_THRESHOLDS[args.sensitivity || "medium"] || 2.0;
39
39
  // Discover services to scan — tenant-scoped.
@@ -72,9 +72,10 @@ export async function detectAnomaliesHandler(registry, args, ctx = defaultContex
72
72
  const deviationPercent = anomaly.baselineValue === 0
73
73
  ? 100
74
74
  : Math.round(((anomaly.recentValue - anomaly.baselineValue) / anomaly.baselineValue) * 100);
75
+ const severityLabel = Math.abs(anomaly.score) >= 6 ? "high" : Math.abs(anomaly.score) >= 4 ? "medium" : "low";
75
76
  allAnomalies.push({
76
77
  metric,
77
- severity: Math.abs(anomaly.score) >= 6 ? "high" : Math.abs(anomaly.score) >= 4 ? "medium" : "low",
78
+ severity: severityLabel,
78
79
  description: `${metric}: ${anomaly.reason}`,
79
80
  currentValue: anomaly.recentValue,
80
81
  baselineValue: anomaly.baselineValue,
@@ -82,6 +83,25 @@ export async function detectAnomaliesHandler(registry, args, ctx = defaultContex
82
83
  source: connector.name,
83
84
  service: serviceName,
84
85
  });
86
+ // Phase P1: mirror the score to the TSDB sink (no-op if no
87
+ // sink wired). Best-effort — a slow / down sink must never
88
+ // block the detector loop, which is why we don't await.
89
+ if (history) {
90
+ try {
91
+ void history.record({
92
+ ts: new Date().toISOString(),
93
+ service: serviceName,
94
+ tenant: ctx.tenant || "default",
95
+ score: Math.abs(anomaly.score),
96
+ method: anomaly.method === "seasonal" ? "seasonality"
97
+ : anomaly.method === "robust-z" ? "mad"
98
+ : anomaly.method,
99
+ severity: severityLabel === "high" ? "critical" : severityLabel === "medium" ? "warn" : "info",
100
+ signal: metric,
101
+ });
102
+ }
103
+ catch { /* swallow — best-effort */ }
104
+ }
85
105
  }
86
106
  }
87
107
  catch {
@@ -22,10 +22,43 @@ export declare const queryLogsDefinition: {
22
22
  type: string;
23
23
  description: string;
24
24
  };
25
+ labels: {
26
+ type: string;
27
+ additionalProperties: {
28
+ type: string;
29
+ };
30
+ description: string;
31
+ };
25
32
  limit: {
26
33
  type: string;
27
34
  description: string;
28
35
  };
36
+ aggregate: {
37
+ type: string;
38
+ description: string;
39
+ properties: {
40
+ op: {
41
+ type: string;
42
+ enum: string[];
43
+ };
44
+ by: {
45
+ type: string;
46
+ items: {
47
+ type: string;
48
+ };
49
+ description: string;
50
+ };
51
+ k: {
52
+ type: string;
53
+ description: string;
54
+ };
55
+ step: {
56
+ type: string;
57
+ description: string;
58
+ };
59
+ };
60
+ required: string[];
61
+ };
29
62
  };
30
63
  required: string[];
31
64
  };
@@ -36,6 +69,13 @@ export declare function queryLogsHandler(registry: ConnectorRegistry, args: {
36
69
  duration?: string;
37
70
  level?: string;
38
71
  limit?: number;
72
+ labels?: Record<string, string>;
73
+ aggregate?: {
74
+ op: "count_over_time" | "sum" | "topk";
75
+ by?: string[];
76
+ k?: number;
77
+ step?: string;
78
+ };
39
79
  }, ctx?: RequestContext): Promise<{
40
80
  content: {
41
81
  type: "text";
@@ -1,8 +1,8 @@
1
1
  import { defaultContext } from "../context.js";
2
- import { validateDuration, validateServiceName, errorResponse } from "./validation.js";
2
+ import { validateDuration, validateServiceName, validateLogLabels, validateLogAggregate, errorResponse } from "./validation.js";
3
3
  export const queryLogsDefinition = {
4
4
  name: "query_logs",
5
- description: "Query logs for a service over a given timeframe. Returns log entries with a summary including error/warning counts and top error patterns. Supports filtering by log level and search query.",
5
+ description: "Query logs for a service over a given timeframe. Returns log entries with a summary including error/warning counts and top error patterns. Filter by log level, a free-text/regex search, OR structured `labels` (exact-match on backend-extracted fields like method/status/url/environment — far more reliable than regex on structured JSON logs).",
6
6
  inputSchema: {
7
7
  type: "object",
8
8
  properties: {
@@ -22,9 +22,25 @@ export const queryLogsDefinition = {
22
22
  type: "string",
23
23
  description: "Filter by log level: 'error', 'warn', 'info', 'debug'",
24
24
  },
25
+ labels: {
26
+ type: "object",
27
+ additionalProperties: { type: "string" },
28
+ description: "Structured equality filters on backend-extracted fields, AND'd together, e.g. {\"method\":\"GET\",\"url\":\"/\",\"status\":\"200\",\"environment\":\"prod\"}. Prefer this over `query` for structured JSON logs — the literal text rarely appears verbatim. Label names must be [a-zA-Z_][a-zA-Z0-9_]* (max 20).",
29
+ },
25
30
  limit: {
26
31
  type: "number",
27
- description: "Maximum number of log entries to return. Default: 100",
32
+ description: "Maximum number of log entries to return. Default: 100. Ignored when `aggregate` is set.",
33
+ },
34
+ aggregate: {
35
+ type: "object",
36
+ description: "Server-side aggregation — returns grouped counts, not raw rows, so you get a number instead of a haystack. op: 'count_over_time' (time series of counts per bucket), 'sum' (total per group over the window), 'topk' (top-k groups by total). Example: {\"op\":\"topk\",\"by\":[\"url\"],\"k\":10} for the busiest paths. Honours `labels`/`query` filters.",
37
+ properties: {
38
+ op: { type: "string", enum: ["count_over_time", "sum", "topk"] },
39
+ by: { type: "array", items: { type: "string" }, description: "Group-by label names (required for topk)." },
40
+ k: { type: "number", description: "Top-k count (default 10)." },
41
+ step: { type: "string", description: "Bucket size for count_over_time, e.g. '15m'. Defaults to ~1/60th of the window." },
42
+ },
43
+ required: ["op"],
28
44
  },
29
45
  },
30
46
  required: ["service"],
@@ -38,6 +54,12 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
38
54
  const durationErr = validateDuration(duration);
39
55
  if (durationErr)
40
56
  return errorResponse(durationErr);
57
+ const labelsErr = validateLogLabels(args.labels);
58
+ if (labelsErr)
59
+ return errorResponse(labelsErr);
60
+ const aggErr = validateLogAggregate(args.aggregate);
61
+ if (aggErr)
62
+ return errorResponse(aggErr);
41
63
  const connectors = registry.getByTenant(ctx.tenant).filter((c) => c.signalType === "logs");
42
64
  if (connectors.length === 0) {
43
65
  return {
@@ -47,6 +69,49 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
47
69
  isError: true,
48
70
  };
49
71
  }
72
+ // Aggregate mode (Q-LOG2): route to the connector's queryLogAggregate.
73
+ if (args.aggregate) {
74
+ const aggResults = [];
75
+ const aggErrors = [];
76
+ let capable = 0;
77
+ for (const connector of connectors) {
78
+ if (!connector.queryLogAggregate)
79
+ continue;
80
+ capable++;
81
+ try {
82
+ const q = {
83
+ service: args.service,
84
+ duration,
85
+ labels: args.labels,
86
+ query: args.query,
87
+ op: args.aggregate.op,
88
+ by: args.aggregate.by,
89
+ k: args.aggregate.k,
90
+ step: args.aggregate.step,
91
+ };
92
+ aggResults.push(await connector.queryLogAggregate(q));
93
+ }
94
+ catch (err) {
95
+ const msg = err instanceof Error ? err.message : String(err);
96
+ console.error(`Log aggregate failed on ${connector.name}:`, msg);
97
+ aggErrors.push(`${connector.name}: ${msg}`);
98
+ }
99
+ }
100
+ if (capable === 0) {
101
+ return errorResponse("No log backend supports aggregation (queryLogAggregate).");
102
+ }
103
+ if (aggResults.length === 0) {
104
+ return {
105
+ content: [{ type: "text", text: JSON.stringify({ error: aggErrors.length ? `Aggregate failed: ${aggErrors.join("; ")}` : "No data returned", service: args.service, duration }) }],
106
+ isError: aggErrors.length > 0,
107
+ };
108
+ }
109
+ return {
110
+ content: [
111
+ { type: "text", text: JSON.stringify(aggResults.length === 1 ? aggResults[0] : aggResults, null, 2) },
112
+ ],
113
+ };
114
+ }
50
115
  const results = [];
51
116
  const errors = [];
52
117
  for (const connector of connectors) {
@@ -59,6 +124,7 @@ export async function queryLogsHandler(registry, args, ctx = defaultContext()) {
59
124
  duration,
60
125
  level: args.level,
61
126
  limit: args.limit,
127
+ labels: args.labels,
62
128
  });
63
129
  results.push(result);
64
130
  }
@@ -14,10 +14,10 @@
14
14
  // connector later requires zero changes here.
15
15
  import { isTopologyProvider } from "../connectors/interface.js";
16
16
  import { defaultContext } from "../context.js";
17
+ import { mergeTopologies } from "../topology/merge.js";
17
18
  export async function aggregateTopology(registry, tenant) {
18
19
  const sources = [];
19
- const resources = [];
20
- const edges = [];
20
+ const snapshots = [];
21
21
  // Tenant-scoped when a tenant is supplied (call sites at the MCP
22
22
  // tool layer pass ctx.tenant); undefined preserves the original
23
23
  // global behaviour for internal / non-request callers.
@@ -34,14 +34,32 @@ export async function aggregateTopology(registry, tenant) {
34
34
  resources: snap.resources.length,
35
35
  edges: snap.edges.length,
36
36
  });
37
- resources.push(...snap.resources);
38
- edges.push(...snap.edges);
37
+ snapshots.push(snap);
39
38
  }
40
39
  catch {
41
40
  // A misbehaving connector must not poison the agent's view of the graph.
42
41
  }
43
42
  }
44
- return { sources, resources, edges };
43
+ // P1: run the snapshots through mergeTopologies so workloads
44
+ // surfaced by more than one provider (e.g. the same Deployment
45
+ // observed by both Kubernetes + a service-mesh connector) collapse
46
+ // into a single canonical node and edges are rewritten to match.
47
+ //
48
+ // ONLY engages for multi-source topologies — with a single snapshot
49
+ // the merger would mis-group intra-source siblings that happen to
50
+ // share a canonical label (e.g. two pod replicas with
51
+ // `app.kubernetes.io/name=api`). The merger is designed for
52
+ // cross-provider de-duplication, not intra-provider.
53
+ if (snapshots.length <= 1) {
54
+ const only = snapshots[0];
55
+ return {
56
+ sources,
57
+ resources: only?.resources ?? [],
58
+ edges: only?.edges ?? [],
59
+ };
60
+ }
61
+ const merged = mergeTopologies(snapshots);
62
+ return { sources, resources: merged.resources, edges: merged.edges };
45
63
  }
46
64
  /**
47
65
  * Resolve a caller-supplied identifier to a Resource. Accepts:
@@ -208,3 +208,48 @@ describe("get_blast_radius tool", () => {
208
208
  assert.equal(apiBucket.ownershipRootKind, "deployment");
209
209
  });
210
210
  });
211
+ // --- Multi-source merge (Phase P1 wiring) ----------------------------
212
+ // `aggregateTopology` now delegates to `mergeTopologies` when 2+
213
+ // snapshots are present so the same logical workload reported by
214
+ // e.g. Kubernetes + a cloud connector collapses into one node.
215
+ // Single-snapshot calls pass through unchanged (guarded so we don't
216
+ // mis-merge intra-source siblings that share an `app:` label).
217
+ describe("aggregateTopology — multi-source merger (P1 wire)", () => {
218
+ it("collapses cross-source duplicates that share a canonical label", async () => {
219
+ // Source A (k8s): one Deployment "checkout" in prod
220
+ const aRes = [
221
+ { id: "k8s:deployment:prod/checkout", kind: "deployment", name: "checkout", source: "k8s",
222
+ labels: { "app.kubernetes.io/name": "checkout" } },
223
+ ];
224
+ // Source B (trace provider): the same logical service
225
+ const bRes = [
226
+ { id: "tempo:service:checkout", kind: "trace_service", name: "checkout", source: "tempo",
227
+ labels: { "service.name": "checkout" } },
228
+ ];
229
+ const loader = new PluginLoader();
230
+ const reg = new ConnectorRegistry(loader);
231
+ const connA = new FakeTopologyConnector(aRes, []);
232
+ const connB = new FakeTopologyConnector(bRes, []);
233
+ await connA.connect({ name: "k8s", type: "fake", url: "", enabled: true });
234
+ await connB.connect({ name: "tempo", type: "fake", url: "", enabled: true });
235
+ const loaderInternal = loader;
236
+ loaderInternal.connectors.set("fake-a", { name: "fake-a", source: "builtin", factory: () => connA });
237
+ loaderInternal.connectors.set("fake-b", { name: "fake-b", source: "builtin", factory: () => connB });
238
+ await reg.addSource({ name: "k8s", type: "fake-a", url: "", enabled: true });
239
+ await reg.addSource({ name: "tempo", type: "fake-b", url: "", enabled: true });
240
+ const out = parseTool(await getTopologyHandler(reg, {}));
241
+ // 2 sources reported in summary
242
+ assert.equal(out.sources.length, 2);
243
+ // But ONE resource after merge (deployment + trace_service of the
244
+ // same canonical name collapse via MERGEABLE_KIND_PAIRS).
245
+ assert.equal(out.resources.length, 1);
246
+ assert.equal(out.resources[0].name, "checkout");
247
+ });
248
+ it("single-source passes through unchanged (no intra-source merging)", async () => {
249
+ // The existing 4-pod fixture has two pods sharing `app: api`.
250
+ // With a single snapshot the merger must NOT collapse them.
251
+ const reg = await makeRegistry();
252
+ const out = parseTool(await getTopologyHandler(reg, {}));
253
+ assert.equal(out.resources.length, fixture().resources.length);
254
+ });
255
+ });
@@ -8,6 +8,19 @@ export declare function validateMetricName(metric: string, registry: ConnectorRe
8
8
  */
9
9
  export declare function sanitizeLabelValue(value: string): string | null;
10
10
  export declare function validateServiceName(service: string): string | null;
11
+ /**
12
+ * Validate a structured `labels` filter map for query_logs. Fail-closed:
13
+ * any bad key/value rejects the whole request rather than silently
14
+ * dropping a filter (a dropped filter could widen results past what the
15
+ * caller intended). Bounds the map size + value length so a crafted input
16
+ * can't build a pathological query.
17
+ */
18
+ export declare function validateLogLabels(labels: unknown): string | null;
19
+ /**
20
+ * Validate the query_logs `aggregate` spec. Fail-closed, like the labels
21
+ * validator. Returns an error string or null.
22
+ */
23
+ export declare function validateLogAggregate(aggregate: unknown): string | null;
11
24
  export declare function errorResponse(message: string): {
12
25
  content: {
13
26
  type: "text";