@thotischner/observability-mcp 1.8.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/dist/analysis/history.d.ts +70 -0
  2. package/dist/analysis/history.js +170 -0
  3. package/dist/analysis/history.test.d.ts +1 -0
  4. package/dist/analysis/history.test.js +141 -0
  5. package/dist/audit/log.d.ts +9 -0
  6. package/dist/audit/log.js +20 -0
  7. package/dist/audit/redaction-bypass.d.ts +67 -0
  8. package/dist/audit/redaction-bypass.js +64 -0
  9. package/dist/audit/redaction-bypass.test.d.ts +1 -0
  10. package/dist/audit/redaction-bypass.test.js +72 -0
  11. package/dist/audit/sinks/s3.d.ts +61 -0
  12. package/dist/audit/sinks/s3.js +179 -0
  13. package/dist/audit/sinks/s3.test.d.ts +1 -0
  14. package/dist/audit/sinks/s3.test.js +175 -0
  15. package/dist/audit/sinks/types.d.ts +18 -0
  16. package/dist/audit/sinks/types.js +1 -0
  17. package/dist/audit/sinks/webhook.d.ts +45 -0
  18. package/dist/audit/sinks/webhook.js +111 -0
  19. package/dist/audit/sinks/webhook.test.d.ts +1 -0
  20. package/dist/audit/sinks/webhook.test.js +162 -0
  21. package/dist/auth/credentials.d.ts +11 -0
  22. package/dist/auth/credentials.js +27 -0
  23. package/dist/auth/credentials.test.js +21 -1
  24. package/dist/auth/csrf.d.ts +26 -0
  25. package/dist/auth/csrf.js +128 -0
  26. package/dist/auth/csrf.test.d.ts +1 -0
  27. package/dist/auth/csrf.test.js +143 -0
  28. package/dist/auth/local-users.d.ts +6 -0
  29. package/dist/auth/local-users.js +11 -0
  30. package/dist/auth/local-users.test.js +41 -0
  31. package/dist/auth/middleware.d.ts +7 -6
  32. package/dist/auth/oidc/dcr.d.ts +70 -0
  33. package/dist/auth/oidc/dcr.js +160 -0
  34. package/dist/auth/oidc/dcr.test.d.ts +1 -0
  35. package/dist/auth/oidc/dcr.test.js +109 -0
  36. package/dist/auth/oidc/endpoints.js +44 -0
  37. package/dist/auth/oidc/profiles.d.ts +22 -0
  38. package/dist/auth/oidc/profiles.js +95 -0
  39. package/dist/auth/oidc/profiles.test.d.ts +1 -0
  40. package/dist/auth/oidc/profiles.test.js +51 -0
  41. package/dist/auth/oidc/runtime.d.ts +3 -0
  42. package/dist/auth/oidc/runtime.js +16 -3
  43. package/dist/auth/oidc/runtime.test.js +1 -0
  44. package/dist/auth/policy/batch-dry-run.d.ts +56 -0
  45. package/dist/auth/policy/batch-dry-run.js +144 -0
  46. package/dist/auth/policy/batch-dry-run.test.d.ts +1 -0
  47. package/dist/auth/policy/batch-dry-run.test.js +140 -0
  48. package/dist/auth/policy/engine.d.ts +20 -4
  49. package/dist/auth/policy/engine.js +16 -2
  50. package/dist/auth/policy/loader.d.ts +11 -1
  51. package/dist/auth/policy/loader.js +37 -0
  52. package/dist/auth/policy/loader.test.d.ts +1 -0
  53. package/dist/auth/policy/loader.test.js +86 -0
  54. package/dist/auth/policy/opa.d.ts +5 -5
  55. package/dist/auth/policy/opa.js +25 -14
  56. package/dist/auth/policy/opa.test.js +48 -0
  57. package/dist/auth/rbac.d.ts +23 -1
  58. package/dist/auth/rbac.js +43 -1
  59. package/dist/auth/rbac.test.js +62 -0
  60. package/dist/cli/index.js +3 -0
  61. package/dist/cli/inspector-config.d.ts +9 -0
  62. package/dist/cli/inspector-config.js +28 -0
  63. package/dist/cli/inspector-config.test.d.ts +1 -0
  64. package/dist/cli/inspector-config.test.js +33 -0
  65. package/dist/cli/lib.d.ts +1 -1
  66. package/dist/cli/lib.js +1 -0
  67. package/dist/conformance/mcp-2025-11-25.test.d.ts +1 -0
  68. package/dist/conformance/mcp-2025-11-25.test.js +206 -0
  69. package/dist/connectors/interface.d.ts +5 -1
  70. package/dist/connectors/loader.d.ts +8 -0
  71. package/dist/connectors/loader.js +55 -4
  72. package/dist/connectors/loader.test.d.ts +1 -0
  73. package/dist/connectors/loader.test.js +78 -0
  74. package/dist/connectors/manifest-hooks.test.d.ts +1 -0
  75. package/dist/connectors/manifest-hooks.test.js +206 -0
  76. package/dist/connectors/prometheus.test.js +31 -13
  77. package/dist/connectors/registry.d.ts +13 -0
  78. package/dist/connectors/registry.js +30 -0
  79. package/dist/connectors/registry.test.js +56 -2
  80. package/dist/context.d.ts +32 -0
  81. package/dist/context.js +35 -0
  82. package/dist/context.test.d.ts +1 -0
  83. package/dist/context.test.js +58 -0
  84. package/dist/federation/registry.d.ts +54 -0
  85. package/dist/federation/registry.js +122 -0
  86. package/dist/federation/registry.test.d.ts +1 -0
  87. package/dist/federation/registry.test.js +206 -0
  88. package/dist/federation/upstream.d.ts +86 -0
  89. package/dist/federation/upstream.js +162 -0
  90. package/dist/federation/upstream.test.d.ts +1 -0
  91. package/dist/federation/upstream.test.js +118 -0
  92. package/dist/index.js +1435 -126
  93. package/dist/metrics/self.d.ts +1 -0
  94. package/dist/metrics/self.js +8 -0
  95. package/dist/middleware/ssrfGuard.d.ts +15 -0
  96. package/dist/middleware/ssrfGuard.js +103 -0
  97. package/dist/middleware/ssrfGuard.test.d.ts +1 -0
  98. package/dist/middleware/ssrfGuard.test.js +81 -0
  99. package/dist/observability/otel.d.ts +20 -0
  100. package/dist/observability/otel.js +118 -0
  101. package/dist/observability/otel.test.d.ts +1 -0
  102. package/dist/observability/otel.test.js +56 -0
  103. package/dist/openapi.js +215 -7
  104. package/dist/openapi.test.js +34 -0
  105. package/dist/policy/redact.js +1 -1
  106. package/dist/postmortem/store.d.ts +34 -0
  107. package/dist/postmortem/store.js +113 -0
  108. package/dist/postmortem/store.test.d.ts +1 -0
  109. package/dist/postmortem/store.test.js +118 -0
  110. package/dist/postmortem/synthesizer.d.ts +83 -0
  111. package/dist/postmortem/synthesizer.js +205 -0
  112. package/dist/postmortem/synthesizer.test.d.ts +1 -0
  113. package/dist/postmortem/synthesizer.test.js +141 -0
  114. package/dist/products/loader.d.ts +31 -3
  115. package/dist/products/loader.js +77 -4
  116. package/dist/products/loader.test.js +90 -1
  117. package/dist/quota/charge.d.ts +28 -0
  118. package/dist/quota/charge.js +30 -0
  119. package/dist/quota/charge.test.d.ts +1 -0
  120. package/dist/quota/charge.test.js +83 -0
  121. package/dist/quota/limiter.d.ts +29 -4
  122. package/dist/quota/limiter.js +64 -8
  123. package/dist/quota/limiter.test.js +86 -0
  124. package/dist/scim/compliance.test.d.ts +1 -0
  125. package/dist/scim/compliance.test.js +169 -0
  126. package/dist/scim/factory.test.d.ts +1 -0
  127. package/dist/scim/factory.test.js +54 -0
  128. package/dist/scim/group-role-map.d.ts +4 -0
  129. package/dist/scim/group-role-map.js +33 -0
  130. package/dist/scim/group-role-map.test.d.ts +1 -0
  131. package/dist/scim/group-role-map.test.js +33 -0
  132. package/dist/scim/patch-ops.test.d.ts +1 -0
  133. package/dist/scim/patch-ops.test.js +100 -0
  134. package/dist/scim/redis-store.d.ts +38 -0
  135. package/dist/scim/redis-store.js +178 -0
  136. package/dist/scim/redis-store.test.d.ts +1 -0
  137. package/dist/scim/redis-store.test.js +138 -0
  138. package/dist/scim/routes.d.ts +40 -0
  139. package/dist/scim/routes.js +395 -0
  140. package/dist/scim/store.d.ts +76 -0
  141. package/dist/scim/store.js +196 -0
  142. package/dist/scim/store.test.d.ts +1 -0
  143. package/dist/scim/store.test.js +121 -0
  144. package/dist/scim/types.d.ts +73 -0
  145. package/dist/scim/types.js +29 -0
  146. package/dist/sdk/hook-wrappers.d.ts +39 -0
  147. package/dist/sdk/hook-wrappers.js +113 -0
  148. package/dist/sdk/hook-wrappers.test.d.ts +1 -0
  149. package/dist/sdk/hook-wrappers.test.js +204 -0
  150. package/dist/sdk/hooks.d.ts +77 -0
  151. package/dist/sdk/hooks.js +72 -0
  152. package/dist/sdk/hooks.test.d.ts +1 -0
  153. package/dist/sdk/hooks.test.js +159 -0
  154. package/dist/sdk/index.d.ts +15 -0
  155. package/dist/sdk/index.js +1 -0
  156. package/dist/sdk/manifest-schema.d.ts +17 -0
  157. package/dist/sdk/manifest-schema.js +21 -0
  158. package/dist/tools/context-seam.test.js +6 -1
  159. package/dist/tools/detect-anomalies.d.ts +12 -1
  160. package/dist/tools/detect-anomalies.js +26 -5
  161. package/dist/tools/generate-postmortem.d.ts +35 -0
  162. package/dist/tools/generate-postmortem.js +191 -0
  163. package/dist/tools/get-anomaly-history.d.ts +35 -0
  164. package/dist/tools/get-anomaly-history.js +126 -0
  165. package/dist/tools/get-service-health.d.ts +1 -1
  166. package/dist/tools/get-service-health.js +4 -3
  167. package/dist/tools/list-services.d.ts +1 -1
  168. package/dist/tools/list-services.js +3 -2
  169. package/dist/tools/list-sources.d.ts +1 -1
  170. package/dist/tools/list-sources.js +6 -2
  171. package/dist/tools/query-logs.d.ts +1 -1
  172. package/dist/tools/query-logs.js +2 -2
  173. package/dist/tools/query-metrics.d.ts +1 -1
  174. package/dist/tools/query-metrics.js +19 -6
  175. package/dist/tools/query-traces.d.ts +47 -0
  176. package/dist/tools/query-traces.js +145 -0
  177. package/dist/tools/query-traces.test.d.ts +1 -0
  178. package/dist/tools/query-traces.test.js +110 -0
  179. package/dist/tools/registry-names.d.ts +35 -0
  180. package/dist/tools/registry-names.js +54 -0
  181. package/dist/tools/registry-names.test.d.ts +1 -0
  182. package/dist/tools/registry-names.test.js +61 -0
  183. package/dist/tools/topology.d.ts +3 -3
  184. package/dist/tools/topology.js +33 -11
  185. package/dist/tools/topology.test.js +45 -0
  186. package/dist/topology/merge.d.ts +22 -0
  187. package/dist/topology/merge.js +178 -0
  188. package/dist/topology/merge.test.d.ts +1 -0
  189. package/dist/topology/merge.test.js +110 -0
  190. package/dist/transport/sessionStore.d.ts +66 -0
  191. package/dist/transport/sessionStore.js +138 -0
  192. package/dist/transport/sessionStore.test.d.ts +1 -0
  193. package/dist/transport/sessionStore.test.js +118 -0
  194. package/dist/transport/transportSessionMap.d.ts +70 -0
  195. package/dist/transport/transportSessionMap.js +128 -0
  196. package/dist/transport/transportSessionMap.test.d.ts +1 -0
  197. package/dist/transport/transportSessionMap.test.js +111 -0
  198. package/dist/transport/websocket.d.ts +35 -0
  199. package/dist/transport/websocket.js +133 -0
  200. package/dist/transport/websocket.test.d.ts +1 -0
  201. package/dist/transport/websocket.test.js +124 -0
  202. package/dist/types.d.ts +51 -0
  203. package/dist/ui/index.html +2529 -145
  204. package/package.json +13 -3
@@ -0,0 +1,83 @@
1
+ export interface AnomalySample {
2
+ ts: string;
3
+ service: string;
4
+ score: number;
5
+ method: string;
6
+ severity: string;
7
+ signal?: string;
8
+ }
9
+ export interface BlastRadiusNode {
10
+ id: string;
11
+ kind: string;
12
+ name: string;
13
+ /** Whether this node is the suspected root cause (the input service). */
14
+ root?: boolean;
15
+ }
16
+ export interface TraceSummary {
17
+ traceId: string;
18
+ rootName: string;
19
+ rootService: string;
20
+ durationMs: number;
21
+ hasError: boolean;
22
+ }
23
+ export interface PostmortemInput {
24
+ /** Suspected root-cause service (the operator's first guess). */
25
+ service: string;
26
+ /** Rolling window the incident took place in, e.g. "2h", "6h". */
27
+ window: string;
28
+ /** Tenant the incident occurred in. */
29
+ tenant: string;
30
+ /** RFC-3339 start + end of the incident window for human display. */
31
+ fromIso: string;
32
+ toIso: string;
33
+ /** Live anomaly samples within the window. */
34
+ anomalies: AnomalySample[];
35
+ /** Blast-radius graph at peak. */
36
+ blastRadius: {
37
+ nodes: BlastRadiusNode[];
38
+ edges: Array<{
39
+ from: string;
40
+ to: string;
41
+ relation: string;
42
+ }>;
43
+ };
44
+ /** Trace summaries (top by duration). */
45
+ traces: TraceSummary[];
46
+ /** Optional log-error summary lines, e.g. ["payment-service: 412 5xx in window"]. */
47
+ logHighlights?: string[];
48
+ }
49
+ export interface PostmortemReport {
50
+ service: string;
51
+ window: string;
52
+ fromIso: string;
53
+ toIso: string;
54
+ /** Compact synopsis the UI puts at the top of the report. */
55
+ synopsis: string;
56
+ /** Markdown body of the full report. */
57
+ markdown: string;
58
+ /** Structured form for callers that want to render their own UI. */
59
+ sections: {
60
+ timeline: Array<{
61
+ ts: string;
62
+ service: string;
63
+ score: number;
64
+ severity: string;
65
+ method: string;
66
+ }>;
67
+ blastRadius: {
68
+ nodes: BlastRadiusNode[];
69
+ edgeCount: number;
70
+ };
71
+ topTraces: TraceSummary[];
72
+ contributingSignals: Array<{
73
+ signal: string;
74
+ count: number;
75
+ meanScore: number;
76
+ }>;
77
+ followUps: string[];
78
+ logHighlights: string[];
79
+ };
80
+ }
81
+ /** Synthesise one report from already-fetched primitives. Pure
82
+ * compute — no I/O. */
83
+ export declare function synthesizePostmortem(input: PostmortemInput): PostmortemReport;
@@ -0,0 +1,205 @@
1
+ // Auto-post-mortem synthesizer — Phase F19.
2
+ //
3
+ // Stitches together the existing observability primitives — anomaly
4
+ // history (F15), blast-radius (F13/topology), trace summaries (F13),
5
+ // log-derived error patterns (existing query_logs) — into a single
6
+ // markdown report a human (or LLM) can read in one shot.
7
+ //
8
+ // The synthesizer is pure-ish: it accepts the upstream queries as
9
+ // injected functions so the tool layer can compose them without the
10
+ // synthesizer depending on the entire ConnectorRegistry API. Tests
11
+ // inject fake data and don't need a live demo stack.
12
+ /** Synthesise one report from already-fetched primitives. Pure
13
+ * compute — no I/O. */
14
+ export function synthesizePostmortem(input) {
15
+ const timeline = [...input.anomalies]
16
+ .sort((a, b) => a.ts.localeCompare(b.ts))
17
+ .map((a) => ({ ts: a.ts, service: a.service, score: a.score, severity: a.severity, method: a.method }));
18
+ const contributingSignals = aggregateBySignal(input.anomalies);
19
+ const peakScore = input.anomalies.reduce((m, a) => Math.max(m, a.score), 0);
20
+ const errorTraces = input.traces.filter((t) => t.hasError).length;
21
+ const peakNode = input.blastRadius.nodes.find((n) => n.root) ?? input.blastRadius.nodes[0];
22
+ const blastSize = input.blastRadius.nodes.length;
23
+ const followUps = inferFollowUps(input, { peakScore, errorTraces, blastSize });
24
+ const synopsis = synopsisFor(input, peakScore, errorTraces, blastSize);
25
+ const markdown = renderMarkdown({
26
+ input,
27
+ timeline,
28
+ contributingSignals,
29
+ peakNode,
30
+ peakScore,
31
+ errorTraces,
32
+ blastSize,
33
+ followUps,
34
+ synopsis,
35
+ });
36
+ return {
37
+ service: input.service,
38
+ window: input.window,
39
+ fromIso: input.fromIso,
40
+ toIso: input.toIso,
41
+ synopsis,
42
+ markdown,
43
+ sections: {
44
+ timeline,
45
+ blastRadius: { nodes: input.blastRadius.nodes, edgeCount: input.blastRadius.edges.length },
46
+ topTraces: input.traces.slice(0, 10),
47
+ contributingSignals,
48
+ followUps,
49
+ logHighlights: input.logHighlights ?? [],
50
+ },
51
+ };
52
+ }
53
+ function aggregateBySignal(anomalies) {
54
+ const groups = new Map();
55
+ for (const a of anomalies) {
56
+ const sig = a.signal ?? a.method;
57
+ const prev = groups.get(sig);
58
+ if (prev)
59
+ prev.push(a.score);
60
+ else
61
+ groups.set(sig, [a.score]);
62
+ }
63
+ return [...groups.entries()]
64
+ .map(([signal, scores]) => ({
65
+ signal,
66
+ count: scores.length,
67
+ meanScore: Math.round((scores.reduce((s, x) => s + x, 0) / scores.length) * 100) / 100,
68
+ }))
69
+ .sort((a, b) => b.meanScore - a.meanScore);
70
+ }
71
+ function inferFollowUps(input, ctx) {
72
+ const out = [];
73
+ if (input.anomalies.length === 0) {
74
+ out.push("No anomaly history found for this service in the window — confirm OMCP_ANOMALY_HISTORY_REMOTE_WRITE is wired and Prometheus is scraping the same TSDB.");
75
+ return out;
76
+ }
77
+ if (ctx.peakScore >= 0.9) {
78
+ out.push(`Peak anomaly score ${ctx.peakScore} is critical — review the detector's threshold for service '${input.service}' and consider whether the chosen method (${dominantMethod(input.anomalies)}) suits this signal's distribution.`);
79
+ }
80
+ if (ctx.errorTraces > 0) {
81
+ out.push(`${ctx.errorTraces} trace(s) carried error spans during the window — drill into the slowest via \`query_traces(service="${input.service}", errorsOnly=true)\`.`);
82
+ }
83
+ if (ctx.blastSize > 5) {
84
+ out.push(`Blast radius spans ${ctx.blastSize} nodes — verify that the dependency edges are still accurate (a stale topology snapshot can blow up the radius and miss the real cause).`);
85
+ }
86
+ if ((input.logHighlights ?? []).length > 0) {
87
+ out.push("Log highlights above point at concrete error patterns — promote the recurring ones to an alert or SLO so the next regression catches itself.");
88
+ }
89
+ if (out.length === 0) {
90
+ out.push("All signals look stable for this window — consider closing the incident as a transient anomaly or expanding the time window.");
91
+ }
92
+ return out;
93
+ }
94
+ function dominantMethod(anomalies) {
95
+ const c = new Map();
96
+ for (const a of anomalies)
97
+ c.set(a.method, (c.get(a.method) ?? 0) + 1);
98
+ return [...c.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] ?? "unknown";
99
+ }
100
+ function synopsisFor(input, peakScore, errorTraces, blastSize) {
101
+ const anomalyCount = input.anomalies.length;
102
+ if (anomalyCount === 0) {
103
+ return `No anomalies recorded for service '${input.service}' between ${input.fromIso} and ${input.toIso}. Either the window was clean, or the history sink wasn't writing at the time.`;
104
+ }
105
+ return [
106
+ `Service '${input.service}' produced ${anomalyCount} anomaly sample(s) between ${input.fromIso} and ${input.toIso}, peaking at ${peakScore}.`,
107
+ `Blast radius at peak covered ${blastSize} node(s); ${errorTraces} trace(s) carried error spans.`,
108
+ ].join(" ");
109
+ }
110
+ function renderMarkdown(ctx) {
111
+ const { input, timeline, contributingSignals, peakNode, peakScore, errorTraces, followUps, synopsis } = ctx;
112
+ const lines = [];
113
+ lines.push(`# Post-mortem — ${input.service}`);
114
+ lines.push("");
115
+ lines.push(`> **Window:** \`${input.fromIso}\` → \`${input.toIso}\` (\`${input.window}\`) `);
116
+ lines.push(`> **Tenant:** \`${input.tenant}\` `);
117
+ lines.push(`> **Generated by:** observability-mcp \`generate_postmortem\``);
118
+ lines.push("");
119
+ lines.push("## Synopsis");
120
+ lines.push("");
121
+ lines.push(synopsis);
122
+ lines.push("");
123
+ lines.push("## Anomaly timeline");
124
+ lines.push("");
125
+ if (timeline.length === 0) {
126
+ lines.push("_No anomaly samples in this window._");
127
+ }
128
+ else {
129
+ lines.push("| ts | service | score | severity | method |");
130
+ lines.push("|---|---|---|---|---|");
131
+ for (const t of timeline.slice(0, 20)) {
132
+ lines.push(`| \`${t.ts}\` | \`${t.service}\` | ${t.score} | ${t.severity} | ${t.method} |`);
133
+ }
134
+ if (timeline.length > 20)
135
+ lines.push(`| … | _${timeline.length - 20} more rows_ | | | |`);
136
+ }
137
+ lines.push("");
138
+ lines.push("## Blast radius at peak");
139
+ lines.push("");
140
+ if (peakNode) {
141
+ lines.push(`Root node: **\`${peakNode.name}\`** (\`${peakNode.kind}\`).`);
142
+ }
143
+ else {
144
+ lines.push("_Topology snapshot empty._");
145
+ }
146
+ lines.push("");
147
+ if (input.blastRadius.nodes.length > 0) {
148
+ lines.push("| node | kind |");
149
+ lines.push("|---|---|");
150
+ for (const n of input.blastRadius.nodes.slice(0, 30)) {
151
+ lines.push(`| \`${n.name}\`${n.root ? " *(root)*" : ""} | \`${n.kind}\` |`);
152
+ }
153
+ }
154
+ lines.push("");
155
+ lines.push(`Edges in radius: **${input.blastRadius.edges.length}**.`);
156
+ lines.push("");
157
+ lines.push("## Contributing signals (ranked)");
158
+ lines.push("");
159
+ if (contributingSignals.length === 0) {
160
+ lines.push("_No anomaly samples to rank._");
161
+ }
162
+ else {
163
+ lines.push("| signal | samples | mean score |");
164
+ lines.push("|---|---|---|");
165
+ for (const s of contributingSignals.slice(0, 10)) {
166
+ lines.push(`| \`${s.signal}\` | ${s.count} | ${s.meanScore} |`);
167
+ }
168
+ }
169
+ lines.push("");
170
+ lines.push("## Related traces");
171
+ lines.push("");
172
+ if (input.traces.length === 0) {
173
+ lines.push("_No traces returned for the window. Configure a Tempo / Jaeger source if traces are expected._");
174
+ }
175
+ else {
176
+ lines.push("| trace | service | duration ms | error |");
177
+ lines.push("|---|---|---|---|");
178
+ for (const t of input.traces.slice(0, 10)) {
179
+ lines.push(`| \`${t.traceId}\` | \`${t.rootService}\` | ${t.durationMs} | ${t.hasError ? "yes" : "no"} |`);
180
+ }
181
+ if (errorTraces > 0)
182
+ lines.push(`\n_${errorTraces} of the returned traces carried error spans._`);
183
+ }
184
+ lines.push("");
185
+ if ((input.logHighlights ?? []).length > 0) {
186
+ lines.push("## Log highlights");
187
+ lines.push("");
188
+ for (const l of input.logHighlights)
189
+ lines.push(`- ${l}`);
190
+ lines.push("");
191
+ }
192
+ lines.push("## Suggested follow-ups");
193
+ lines.push("");
194
+ for (const f of followUps)
195
+ lines.push(`- ${f}`);
196
+ lines.push("");
197
+ lines.push("---");
198
+ lines.push("");
199
+ lines.push(`*Generated by observability-mcp \`generate_postmortem\` — see \`docs/postmortems.md\` for the prompt sources.*`);
200
+ lines.push("");
201
+ // Bound the chunk to keep memory predictable; the rendered report
202
+ // is normally a few KB but a pathological 10k-sample timeline
203
+ // could approach MB without the slice() caps above.
204
+ return lines.join("\n");
205
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,141 @@
1
+ import { test } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { synthesizePostmortem, } from "./synthesizer.js";
4
+ function input(overrides = {}) {
5
+ return {
6
+ service: "payment",
7
+ window: "1h",
8
+ tenant: "default",
9
+ fromIso: "2026-06-06T00:00:00.000Z",
10
+ toIso: "2026-06-06T01:00:00.000Z",
11
+ anomalies: [],
12
+ blastRadius: { nodes: [], edges: [] },
13
+ traces: [],
14
+ ...overrides,
15
+ };
16
+ }
17
+ function anomaly(ts, score, method = "mad", severity = "warn", signal) {
18
+ return { ts, service: "payment", score, method, severity, signal };
19
+ }
20
+ test("synthesizePostmortem: empty input returns synopsis + 'no anomalies' follow-up", () => {
21
+ const r = synthesizePostmortem(input());
22
+ assert.match(r.synopsis, /No anomalies recorded/);
23
+ assert.equal(r.sections.timeline.length, 0);
24
+ assert.equal(r.sections.followUps.length, 1);
25
+ assert.match(r.sections.followUps[0], /OMCP_ANOMALY_HISTORY_REMOTE_WRITE/);
26
+ });
27
+ test("synthesizePostmortem: timeline is sorted by ts ascending", () => {
28
+ const r = synthesizePostmortem(input({
29
+ anomalies: [
30
+ anomaly("2026-06-06T00:30:00Z", 0.5),
31
+ anomaly("2026-06-06T00:10:00Z", 0.4),
32
+ anomaly("2026-06-06T00:50:00Z", 0.9),
33
+ ],
34
+ }));
35
+ assert.deepEqual(r.sections.timeline.map((t) => t.ts), ["2026-06-06T00:10:00Z", "2026-06-06T00:30:00Z", "2026-06-06T00:50:00Z"]);
36
+ });
37
+ test("synthesizePostmortem: contributing signals aggregated by signal label + ranked by mean score desc", () => {
38
+ const r = synthesizePostmortem(input({
39
+ anomalies: [
40
+ anomaly("2026-06-06T00:10Z", 0.5, "mad", "warn", "request_latency"),
41
+ anomaly("2026-06-06T00:20Z", 0.4, "mad", "warn", "request_latency"),
42
+ anomaly("2026-06-06T00:30Z", 0.95, "seasonality", "critical", "error_rate"),
43
+ ],
44
+ }));
45
+ const sigs = r.sections.contributingSignals;
46
+ assert.equal(sigs.length, 2);
47
+ // error_rate (0.95 mean) ranks above request_latency (0.45 mean)
48
+ assert.equal(sigs[0].signal, "error_rate");
49
+ assert.equal(sigs[0].count, 1);
50
+ assert.equal(sigs[0].meanScore, 0.95);
51
+ assert.equal(sigs[1].signal, "request_latency");
52
+ assert.equal(sigs[1].count, 2);
53
+ assert.equal(sigs[1].meanScore, 0.45);
54
+ });
55
+ test("synthesizePostmortem: missing signal label falls back to method", () => {
56
+ const r = synthesizePostmortem(input({ anomalies: [anomaly("2026-06-06T00:10Z", 0.6, "correlator")] }));
57
+ assert.equal(r.sections.contributingSignals[0].signal, "correlator");
58
+ });
59
+ test("synthesizePostmortem: critical peak triggers a follow-up mentioning the threshold", () => {
60
+ const r = synthesizePostmortem(input({ anomalies: [anomaly("2026-06-06T00:30Z", 0.95)] }));
61
+ assert.ok(r.sections.followUps.some((f) => /Peak anomaly score 0\.95/.test(f)));
62
+ });
63
+ test("synthesizePostmortem: errors-in-traces triggers errorsOnly drill-in suggestion", () => {
64
+ const r = synthesizePostmortem(input({
65
+ anomalies: [anomaly("2026-06-06T00:10Z", 0.6)],
66
+ traces: [
67
+ { traceId: "aaa", rootName: "GET /pay", rootService: "payment", durationMs: 800, hasError: true },
68
+ ],
69
+ }));
70
+ assert.ok(r.sections.followUps.some((f) => /errorsOnly=true/.test(f)));
71
+ });
72
+ test("synthesizePostmortem: large blast radius triggers stale-topology hint", () => {
73
+ const nodes = Array.from({ length: 7 }, (_, i) => ({ id: `n${i}`, kind: "pod", name: `n${i}`, root: i === 0 }));
74
+ const r = synthesizePostmortem(input({
75
+ anomalies: [anomaly("2026-06-06T00:10Z", 0.6)],
76
+ blastRadius: { nodes, edges: [{ from: "n0", to: "n1", relation: "CALLS" }] },
77
+ }));
78
+ assert.ok(r.sections.followUps.some((f) => /7 nodes/.test(f) && /stale topology/i.test(f)));
79
+ });
80
+ test("synthesizePostmortem: clean window returns a 'stable, consider closing' follow-up", () => {
81
+ // The "all signals stable" branch fires only when:
82
+ // anomalies present (not zero)
83
+ // peak < 0.9
84
+ // no error traces
85
+ // blast radius <= 5
86
+ // no log highlights
87
+ const r = synthesizePostmortem(input({
88
+ anomalies: [anomaly("2026-06-06T00:10Z", 0.3)],
89
+ blastRadius: { nodes: [{ id: "n0", kind: "pod", name: "n0", root: true }], edges: [] },
90
+ }));
91
+ assert.ok(r.sections.followUps.some((f) => /stable for this window/.test(f)));
92
+ });
93
+ test("synthesizePostmortem: markdown contains every section header in order", () => {
94
+ const r = synthesizePostmortem(input({
95
+ anomalies: [anomaly("2026-06-06T00:10Z", 0.7)],
96
+ blastRadius: {
97
+ nodes: [{ id: "p", kind: "deployment", name: "payment", root: true }],
98
+ edges: [{ from: "p", to: "rds", relation: "READS_FROM" }],
99
+ },
100
+ traces: [{ traceId: "t", rootName: "GET /pay", rootService: "payment", durationMs: 200, hasError: false }],
101
+ logHighlights: ["payment-service: 12 5xx in window"],
102
+ }));
103
+ for (const heading of [
104
+ "# Post-mortem — payment",
105
+ "## Synopsis",
106
+ "## Anomaly timeline",
107
+ "## Blast radius at peak",
108
+ "## Contributing signals (ranked)",
109
+ "## Related traces",
110
+ "## Log highlights",
111
+ "## Suggested follow-ups",
112
+ ]) {
113
+ assert.ok(r.markdown.includes(heading), `markdown missing section: ${heading}`);
114
+ }
115
+ // The order check — anomaly timeline should appear before blast radius
116
+ assert.ok(r.markdown.indexOf("## Anomaly timeline") < r.markdown.indexOf("## Blast radius at peak"));
117
+ });
118
+ test("synthesizePostmortem: timeline > 20 rows is truncated with an ellipsis row", () => {
119
+ const anomalies = Array.from({ length: 25 }, (_, i) => anomaly(`2026-06-06T00:${String(i).padStart(2, "0")}:00Z`, 0.5 + i * 0.01));
120
+ const r = synthesizePostmortem(input({ anomalies }));
121
+ // The structured section has all 25
122
+ assert.equal(r.sections.timeline.length, 25);
123
+ // The markdown table is capped at 20 data rows + an ellipsis row
124
+ // — count rows specifically inside the Anomaly timeline section
125
+ // (other sections also use | ` ... | tables and would inflate a
126
+ // global grep).
127
+ const md = r.markdown;
128
+ const timelineStart = md.indexOf("## Anomaly timeline");
129
+ const blastStart = md.indexOf("## Blast radius at peak");
130
+ const timelineSection = md.slice(timelineStart, blastStart);
131
+ const tableRows = timelineSection.split("\n").filter((l) => l.startsWith("| `")).length;
132
+ assert.equal(tableRows, 20);
133
+ assert.match(timelineSection, /_5 more rows_/);
134
+ });
135
+ test("synthesizePostmortem: report carries the input window + iso bounds back into the structured shape", () => {
136
+ const r = synthesizePostmortem(input({ window: "6h" }));
137
+ assert.equal(r.service, "payment");
138
+ assert.equal(r.window, "6h");
139
+ assert.equal(r.fromIso, "2026-06-06T00:00:00.000Z");
140
+ assert.equal(r.toIso, "2026-06-06T01:00:00.000Z");
141
+ });
@@ -12,8 +12,13 @@
12
12
  * (YAML or JSON). Missing/empty file → empty catalog.
13
13
  * - Strict validation: unknown action / unknown resource /
14
14
  * unexpected keys reject loudly.
15
- * - Hot-reload on next /api/products call (slice 2 wires the
16
- * reload trigger; for now the file is read once at boot).
15
+ * - Mtime-poll hot-reload: callers (e.g. each /api/products
16
+ * handler) `await store.maybeReload()` before reading. If the
17
+ * file mtime advanced since the last load, the store re-parses
18
+ * and atomically swaps the in-memory file; parse errors keep
19
+ * the previous good state and log loudly. One `stat()` call per
20
+ * reload-aware request — too cheap to matter vs. the network
21
+ * round-trip, no FSWatcher platform fragility (WSL / NFS).
17
22
  */
18
23
  export interface Product {
19
24
  /** Stable identifier — used in URLs, audit entries, /api/products/{id}. */
@@ -47,7 +52,30 @@ export declare function parseProductsText(text: string, origin: string): Product
47
52
  /** In-memory store with tenant- and status-aware queries. */
48
53
  export declare class ProductsStore {
49
54
  private file;
50
- constructor(file?: ProductsFile);
55
+ /** Optional source file path. When set, `maybeReload()` polls its
56
+ * mtime and re-parses on change. Mutations via upsert/delete update
57
+ * `lastMtimeMs` after the caller persists, so the store does not
58
+ * reload its own writes. */
59
+ private path?;
60
+ private lastMtimeMs;
61
+ constructor(file?: ProductsFile, opts?: {
62
+ path?: string;
63
+ initialMtimeMs?: number;
64
+ });
65
+ /** Re-read the source file if its mtime has advanced since the last
66
+ * load. No-op when no path was supplied at construction. Parse or
67
+ * IO errors are logged and the previous good state is kept — the
68
+ * invariant is "the store always reflects a valid catalogue", so a
69
+ * broken edit on disk never takes the running server down. */
70
+ maybeReload(): Promise<{
71
+ reloaded: boolean;
72
+ }>;
73
+ /** Re-stat the source file and pin the mtime cursor to its current
74
+ * value. Call this after a successful write so the store does not
75
+ * treat its own change as an external reload trigger. Best-effort:
76
+ * if the stat fails, the next maybeReload() will simply reload the
77
+ * file once and find it identical. */
78
+ pinMtimeAfterWrite(): Promise<void>;
51
79
  /** Return the product list. When `tenant` is set, filters to that
52
80
  * tenant (entries without a tenant field treated as "default").
53
81
  * When `includeStaging` is false (default), staging products are
@@ -12,10 +12,15 @@
12
12
  * (YAML or JSON). Missing/empty file → empty catalog.
13
13
  * - Strict validation: unknown action / unknown resource /
14
14
  * unexpected keys reject loudly.
15
- * - Hot-reload on next /api/products call (slice 2 wires the
16
- * reload trigger; for now the file is read once at boot).
15
+ * - Mtime-poll hot-reload: callers (e.g. each /api/products
16
+ * handler) `await store.maybeReload()` before reading. If the
17
+ * file mtime advanced since the last load, the store re-parses
18
+ * and atomically swaps the in-memory file; parse errors keep
19
+ * the previous good state and log loudly. One `stat()` call per
20
+ * reload-aware request — too cheap to matter vs. the network
21
+ * round-trip, no FSWatcher platform fragility (WSL / NFS).
17
22
  */
18
- import { readFile, writeFile, rename } from "node:fs/promises";
23
+ import { readFile, writeFile, rename, stat } from "node:fs/promises";
19
24
  import yaml from "js-yaml";
20
25
  const EMPTY = { products: [] };
21
26
  const VALID_STATUS = new Set(["published", "staging"]);
@@ -134,8 +139,76 @@ export function parseProductsText(text, origin) {
134
139
  /** In-memory store with tenant- and status-aware queries. */
135
140
  export class ProductsStore {
136
141
  file;
137
- constructor(file = EMPTY) {
142
+ /** Optional source file path. When set, `maybeReload()` polls its
143
+ * mtime and re-parses on change. Mutations via upsert/delete update
144
+ * `lastMtimeMs` after the caller persists, so the store does not
145
+ * reload its own writes. */
146
+ path;
147
+ lastMtimeMs = 0;
148
+ constructor(file = EMPTY, opts = {}) {
138
149
  this.file = file;
150
+ this.path = opts.path;
151
+ this.lastMtimeMs = opts.initialMtimeMs ?? 0;
152
+ }
153
+ /** Re-read the source file if its mtime has advanced since the last
154
+ * load. No-op when no path was supplied at construction. Parse or
155
+ * IO errors are logged and the previous good state is kept — the
156
+ * invariant is "the store always reflects a valid catalogue", so a
157
+ * broken edit on disk never takes the running server down. */
158
+ async maybeReload() {
159
+ if (!this.path)
160
+ return { reloaded: false };
161
+ let mtimeMs;
162
+ try {
163
+ const s = await stat(this.path);
164
+ mtimeMs = s.mtimeMs;
165
+ }
166
+ catch (e) {
167
+ const code = e.code;
168
+ // File gone (ENOENT) — keep last good state. Re-creating the
169
+ // file will land in this branch's else on the next call when
170
+ // stat succeeds again with a fresh mtime.
171
+ if (code !== "ENOENT") {
172
+ console.warn(`[products] hot-reload stat(${this.path}) failed: ${e.message} — keeping previous catalogue`);
173
+ }
174
+ return { reloaded: false };
175
+ }
176
+ if (mtimeMs <= this.lastMtimeMs)
177
+ return { reloaded: false };
178
+ let next;
179
+ try {
180
+ next = await readProductsFile(this.path);
181
+ }
182
+ catch (e) {
183
+ // readProductsFile downgrades IO errors to EMPTY but lets
184
+ // parse errors (ProductsLoadError) propagate — so a broken
185
+ // YAML edit lands here, and we explicitly do NOT swap state.
186
+ console.warn(`[products] hot-reload of ${this.path} failed: ${e.message} — keeping previous catalogue`);
187
+ // Bump the mtime cursor anyway so we don't re-log the same
188
+ // failure on every subsequent request until the operator fixes
189
+ // the file (next save advances mtime past this value).
190
+ this.lastMtimeMs = mtimeMs;
191
+ return { reloaded: false };
192
+ }
193
+ this.file = next;
194
+ this.lastMtimeMs = mtimeMs;
195
+ return { reloaded: true };
196
+ }
197
+ /** Re-stat the source file and pin the mtime cursor to its current
198
+ * value. Call this after a successful write so the store does not
199
+ * treat its own change as an external reload trigger. Best-effort:
200
+ * if the stat fails, the next maybeReload() will simply reload the
201
+ * file once and find it identical. */
202
+ async pinMtimeAfterWrite() {
203
+ if (!this.path)
204
+ return;
205
+ try {
206
+ const s = await stat(this.path);
207
+ this.lastMtimeMs = s.mtimeMs;
208
+ }
209
+ catch {
210
+ // Silent — see method JSDoc.
211
+ }
139
212
  }
140
213
  /** Return the product list. When `tenant` is set, filters to that
141
214
  * tenant (entries without a tenant field treated as "default").
@@ -1,6 +1,6 @@
1
1
  import { test } from "node:test";
2
2
  import assert from "node:assert/strict";
3
- import { parseProductsText, ProductsStore, ProductsLoadError } from "./loader.js";
3
+ import { parseProductsText, ProductsStore, ProductsLoadError, readProductsFile } from "./loader.js";
4
4
  test("parseProductsText — empty/minimal products array", () => {
5
5
  const f = parseProductsText("products: []", "test");
6
6
  assert.deepEqual(f.products, []);
@@ -166,3 +166,92 @@ test("ProductsLoadError is the throw class", () => {
166
166
  }
167
167
  assert.fail("expected throw");
168
168
  });
169
+ test("ProductsStore.maybeReload — picks up out-of-band edits on next call", async () => {
170
+ const { mkdtemp, rm, writeFile, utimes } = await import("node:fs/promises");
171
+ const { tmpdir } = await import("node:os");
172
+ const { join } = await import("node:path");
173
+ const dir = await mkdtemp(join(tmpdir(), "omcp-products-reload-"));
174
+ try {
175
+ const file = join(dir, "products.yaml");
176
+ await writeFile(file, "products:\n - id: a\n name: A\n", "utf8");
177
+ const initial = await readProductsFile(file);
178
+ const store = new ProductsStore(initial, { path: file });
179
+ await store.pinMtimeAfterWrite();
180
+ assert.equal(store.list().length, 1);
181
+ assert.equal(store.list()[0].id, "a");
182
+ // Simulate an out-of-band edit. Bump mtime explicitly because
183
+ // some filesystems (WSL → 9P) round mtime to the second, so a
184
+ // back-to-back write can land in the same second and look
185
+ // unchanged to stat().
186
+ await writeFile(file, "products:\n - id: a\n name: A\n - id: b\n name: B\n", "utf8");
187
+ const future = new Date(Date.now() + 5_000);
188
+ await utimes(file, future, future);
189
+ const { reloaded } = await store.maybeReload();
190
+ assert.equal(reloaded, true);
191
+ assert.equal(store.list().length, 2);
192
+ // A second call with no further edit is a no-op.
193
+ const r2 = await store.maybeReload();
194
+ assert.equal(r2.reloaded, false);
195
+ }
196
+ finally {
197
+ await rm(dir, { recursive: true, force: true });
198
+ }
199
+ });
200
+ test("ProductsStore.maybeReload — broken YAML on disk keeps previous good state", async () => {
201
+ const { mkdtemp, rm, writeFile, utimes } = await import("node:fs/promises");
202
+ const { tmpdir } = await import("node:os");
203
+ const { join } = await import("node:path");
204
+ const dir = await mkdtemp(join(tmpdir(), "omcp-products-broken-"));
205
+ try {
206
+ const file = join(dir, "products.yaml");
207
+ await writeFile(file, "products:\n - id: a\n name: A\n", "utf8");
208
+ const store = new ProductsStore(await readProductsFile(file), { path: file });
209
+ await store.pinMtimeAfterWrite();
210
+ // Corrupt the file with an unknown top-level key — fails the
211
+ // strict typo guard inside parseProductsText.
212
+ await writeFile(file, "products:\n - id: a\n name: A\n junk: true\n", "utf8");
213
+ const future = new Date(Date.now() + 5_000);
214
+ await utimes(file, future, future);
215
+ const { reloaded } = await store.maybeReload();
216
+ // We did NOT swap state — caller sees the previous good catalogue.
217
+ assert.equal(reloaded, false);
218
+ assert.equal(store.list().length, 1);
219
+ assert.equal(store.list()[0].name, "A");
220
+ }
221
+ finally {
222
+ await rm(dir, { recursive: true, force: true });
223
+ }
224
+ });
225
+ test("ProductsStore.maybeReload — no path = no-op", async () => {
226
+ const store = new ProductsStore({ products: [{ id: "a", name: "A" }] });
227
+ const r = await store.maybeReload();
228
+ assert.equal(r.reloaded, false);
229
+ assert.equal(store.list().length, 1);
230
+ });
231
+ test("ProductsStore.pinMtimeAfterWrite — own writes do not trigger a redundant reload", async () => {
232
+ const { mkdtemp, rm, writeFile, utimes } = await import("node:fs/promises");
233
+ const { tmpdir } = await import("node:os");
234
+ const { join } = await import("node:path");
235
+ const { writeProductsFile } = await import("./loader.js");
236
+ const dir = await mkdtemp(join(tmpdir(), "omcp-products-pin-"));
237
+ try {
238
+ const file = join(dir, "products.yaml");
239
+ await writeFile(file, "products:\n - id: a\n name: A\n", "utf8");
240
+ const store = new ProductsStore(await readProductsFile(file), { path: file });
241
+ await store.pinMtimeAfterWrite();
242
+ // Simulate the server-side mutate-then-persist path.
243
+ store.upsert({ id: "b", name: "B" });
244
+ // Move mtime forward so writeProductsFile genuinely advances it
245
+ // past our cursor (1-second-resolution FS guard).
246
+ const future = new Date(Date.now() + 5_000);
247
+ await writeProductsFile(file, store.snapshot());
248
+ await utimes(file, future, future);
249
+ await store.pinMtimeAfterWrite();
250
+ const { reloaded } = await store.maybeReload();
251
+ assert.equal(reloaded, false, "own write must not re-trigger maybeReload");
252
+ assert.equal(store.list().length, 2);
253
+ }
254
+ finally {
255
+ await rm(dir, { recursive: true, force: true });
256
+ }
257
+ });