@thotischner/observability-mcp 1.4.1 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/analysis/anomaly.d.ts +89 -0
  2. package/dist/analysis/anomaly.js +235 -0
  3. package/dist/analysis/anomaly.test.js +149 -1
  4. package/dist/analysis/backtest.d.ts +31 -0
  5. package/dist/analysis/backtest.js +206 -0
  6. package/dist/analysis/backtest.test.d.ts +1 -0
  7. package/dist/analysis/backtest.test.js +34 -0
  8. package/dist/analysis/correlator.d.ts +35 -0
  9. package/dist/analysis/correlator.js +95 -0
  10. package/dist/analysis/correlator.test.js +60 -1
  11. package/dist/analysis/health.d.ts +2 -3
  12. package/dist/analysis/index.d.ts +32 -0
  13. package/dist/analysis/index.js +29 -0
  14. package/dist/analysis/library.test.d.ts +1 -0
  15. package/dist/analysis/library.test.js +44 -0
  16. package/dist/auth/credentials.d.ts +29 -0
  17. package/dist/auth/credentials.js +76 -0
  18. package/dist/auth/credentials.test.d.ts +1 -0
  19. package/dist/auth/credentials.test.js +57 -0
  20. package/dist/context.d.ts +27 -0
  21. package/dist/context.js +18 -0
  22. package/dist/index.js +53 -44
  23. package/dist/net/egress-policy.d.ts +31 -0
  24. package/dist/net/egress-policy.js +37 -0
  25. package/dist/net/egress-policy.test.d.ts +1 -0
  26. package/dist/net/egress-policy.test.js +52 -0
  27. package/dist/tools/context-seam.test.d.ts +1 -0
  28. package/dist/tools/context-seam.test.js +23 -0
  29. package/dist/tools/detect-anomalies.d.ts +2 -1
  30. package/dist/tools/detect-anomalies.js +47 -11
  31. package/dist/tools/get-service-health.d.ts +2 -1
  32. package/dist/tools/get-service-health.js +2 -1
  33. package/dist/tools/handlers.test.js +73 -0
  34. package/dist/tools/list-services.d.ts +2 -1
  35. package/dist/tools/list-services.js +2 -1
  36. package/dist/tools/list-sources.d.ts +2 -1
  37. package/dist/tools/list-sources.js +2 -1
  38. package/dist/tools/query-logs.d.ts +2 -1
  39. package/dist/tools/query-logs.js +2 -1
  40. package/dist/tools/query-metrics.d.ts +2 -1
  41. package/dist/tools/query-metrics.js +9 -1
  42. package/package.json +10 -2
@@ -0,0 +1,206 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Backtesting harness — a labelled synthetic suite + scorer for the anomaly
3
+ // engine. Each case carries ground truth (anomalous or not); the harness runs
4
+ // the production detector over it and computes a confusion matrix →
5
+ // precision / recall / F1. backtest.test.ts asserts a quality bar so a
6
+ // detector regression fails CI, and the published numbers in the README are
7
+ // regenerated from exactly this suite (they cannot silently drift).
8
+ // ---------------------------------------------------------------------------
9
+ import { detectAnomaly, } from "./anomaly.js";
10
+ /** Deterministic LCG so the suite is byte-stable across runs and CI. */
11
+ function rng(seed) {
12
+ let s = seed >>> 0;
13
+ return () => {
14
+ s = (s * 1664525 + 1013904223) >>> 0;
15
+ return s / 0x100000000;
16
+ };
17
+ }
18
+ const HOUR = 3_600_000;
19
+ const T0 = Date.UTC(2026, 0, 1);
20
+ const ts = (i, stepMs = 60_000) => T0 + i * stepMs;
21
+ function series(values, stepMs = 60_000) {
22
+ return values.map((v, i) => ({ timestamp: ts(i, stepMs), value: v }));
23
+ }
24
+ /**
25
+ * Build the labelled suite. Multiple seeded variants per regime so the
26
+ * precision/recall figures are stable rather than hostage to one sample.
27
+ */
28
+ export function buildSuite() {
29
+ const cases = [];
30
+ for (let v = 0; v < 6; v++) {
31
+ const r = rng(1000 + v);
32
+ const noise = (amp) => (r() - 0.5) * 2 * amp;
33
+ // --- POSITIVES -------------------------------------------------------
34
+ // Slow memory-leak ramp toward OOM (the production false-negative).
35
+ cases.push({
36
+ name: `mem-leak-ramp#${v}`,
37
+ category: "slow-ramp",
38
+ points: series(Array.from({ length: 40 }, (_, i) => 120 + i * 7 + noise(4)), HOUR / 6),
39
+ metricKind: "saturation",
40
+ anomalous: true,
41
+ });
42
+ // Hard latency spike sustained over the recent window.
43
+ cases.push({
44
+ name: `latency-spike#${v}`,
45
+ category: "spike",
46
+ points: series([
47
+ ...Array.from({ length: 25 }, () => 50 + noise(3)),
48
+ ...Array.from({ length: 6 }, () => 480 + noise(20)),
49
+ ]),
50
+ metricKind: "latency",
51
+ anomalous: true,
52
+ });
53
+ // Error-rate step jump.
54
+ cases.push({
55
+ name: `error-step#${v}`,
56
+ category: "step",
57
+ points: series([
58
+ ...Array.from({ length: 25 }, () => 1 + Math.abs(noise(0.5))),
59
+ ...Array.from({ length: 6 }, () => 40 + noise(3)),
60
+ ]),
61
+ metricKind: "error_rate",
62
+ anomalous: true,
63
+ });
64
+ // Gradual latency creep (no single spike point).
65
+ cases.push({
66
+ name: `latency-creep#${v}`,
67
+ category: "slow-ramp",
68
+ points: series(Array.from({ length: 36 }, (_, i) => 60 + i * 5 + noise(3)), HOUR / 6),
69
+ metricKind: "latency",
70
+ anomalous: true,
71
+ });
72
+ // --- NEGATIVES -------------------------------------------------------
73
+ // Stable noisy traffic.
74
+ cases.push({
75
+ name: `stable-noisy#${v}`,
76
+ category: "stable",
77
+ points: series(Array.from({ length: 40 }, () => 100 + noise(6))),
78
+ metricKind: "generic",
79
+ anomalous: false,
80
+ });
81
+ // Single transient blip — dwell/hysteresis must suppress it.
82
+ cases.push({
83
+ name: `transient-blip#${v}`,
84
+ category: "transient",
85
+ points: series([
86
+ ...Array.from({ length: 34 }, () => 50 + noise(3)),
87
+ 520,
88
+ ...Array.from({ length: 3 }, () => 50 + noise(3)),
89
+ ]),
90
+ metricKind: "latency",
91
+ anomalous: false,
92
+ });
93
+ // Recovery: error-rate drops to zero — one-sided, not an anomaly.
94
+ cases.push({
95
+ name: `error-recovery#${v}`,
96
+ category: "one-sided",
97
+ points: series([
98
+ ...Array.from({ length: 25 }, () => 15 + Math.abs(noise(2))),
99
+ ...Array.from({ length: 6 }, () => 0),
100
+ ]),
101
+ metricKind: "error_rate",
102
+ anomalous: false,
103
+ });
104
+ // Diurnal pattern, sampled within a normal nightly trough — the
105
+ // seasonality-aware baseline must treat this as expected.
106
+ {
107
+ const pts = [];
108
+ for (let d = 0; d < 6; d++) {
109
+ for (let h = 0; h < 24; h++) {
110
+ const night = h >= 22 || h <= 5;
111
+ pts.push({ timestamp: T0 + (d * 24 + h) * HOUR, value: (night ? 10 : 100) + noise(2) });
112
+ }
113
+ }
114
+ cases.push({
115
+ name: `diurnal-trough#${v}`,
116
+ category: "seasonal",
117
+ points: pts.slice(0, 6 * 24 - 20), // ends mid-night
118
+ metricKind: "generic",
119
+ anomalous: false,
120
+ });
121
+ }
122
+ }
123
+ // --- HARD TIER -------------------------------------------------------
124
+ // Deliberately ambiguous / low-SNR cases. A perfect score here would be a
125
+ // sign the suite is too easy; we publish whatever the engine actually does.
126
+ for (let v = 0; v < 4; v++) {
127
+ const r = rng(7000 + v);
128
+ const noise = (amp) => (r() - 0.5) * 2 * amp;
129
+ // Low-SNR ramp: real leak, but noise amplitude ~ the per-step rise.
130
+ cases.push({
131
+ name: `noisy-ramp#${v}`,
132
+ category: "hard",
133
+ points: series(Array.from({ length: 38 }, (_, i) => 100 + i * 3 + noise(9)), HOUR / 6),
134
+ metricKind: "saturation",
135
+ anomalous: true,
136
+ });
137
+ // Modest step (≈3σ) just above the recent baseline.
138
+ cases.push({
139
+ name: `small-step#${v}`,
140
+ category: "hard",
141
+ points: series([
142
+ ...Array.from({ length: 25 }, () => 100 + noise(5)),
143
+ ...Array.from({ length: 6 }, () => 122 + noise(5)),
144
+ ]),
145
+ metricKind: "latency",
146
+ anomalous: true,
147
+ });
148
+ // Heavy noise, no real shift — must NOT alarm.
149
+ cases.push({
150
+ name: `heavy-noise-stable#${v}`,
151
+ category: "hard",
152
+ points: series(Array.from({ length: 40 }, () => 100 + noise(18))),
153
+ metricKind: "generic",
154
+ anomalous: false,
155
+ });
156
+ // Two-point blip (still below dwell-sustained) — must NOT alarm.
157
+ cases.push({
158
+ name: `double-blip#${v}`,
159
+ category: "hard",
160
+ points: series([
161
+ ...Array.from({ length: 32 }, () => 60 + noise(4)),
162
+ 300,
163
+ 300,
164
+ ...Array.from({ length: 4 }, () => 60 + noise(4)),
165
+ ]),
166
+ metricKind: "latency",
167
+ anomalous: false,
168
+ });
169
+ }
170
+ return cases;
171
+ }
172
+ export function runBacktest(cases = buildSuite()) {
173
+ let tp = 0, fp = 0, tn = 0, fn = 0;
174
+ const byCategory = {};
175
+ for (const c of cases) {
176
+ const verdict = detectAnomaly(c.points, { metricKind: c.metricKind }).isAnomaly;
177
+ const correct = verdict === c.anomalous;
178
+ if (verdict && c.anomalous)
179
+ tp++;
180
+ else if (verdict && !c.anomalous)
181
+ fp++;
182
+ else if (!verdict && !c.anomalous)
183
+ tn++;
184
+ else
185
+ fn++;
186
+ const cat = (byCategory[c.category] ??= { total: 0, correct: 0 });
187
+ cat.total++;
188
+ if (correct)
189
+ cat.correct++;
190
+ }
191
+ const precision = tp + fp === 0 ? 1 : tp / (tp + fp);
192
+ const recall = tp + fn === 0 ? 1 : tp / (tp + fn);
193
+ const f1 = precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
194
+ return { total: cases.length, tp, fp, tn, fn, precision, recall, f1, byCategory };
195
+ }
196
+ export function formatReport(r) {
197
+ const pct = (n) => `${(n * 100).toFixed(1)}%`;
198
+ const lines = [
199
+ `Backtest: ${r.total} labelled cases`,
200
+ ` TP=${r.tp} FP=${r.fp} TN=${r.tn} FN=${r.fn}`,
201
+ ` precision=${pct(r.precision)} recall=${pct(r.recall)} F1=${pct(r.f1)}`,
202
+ ` by category:`,
203
+ ...Object.entries(r.byCategory).map(([k, v]) => ` ${k}: ${v.correct}/${v.total}`),
204
+ ];
205
+ return lines.join("\n");
206
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,34 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { buildSuite, runBacktest, formatReport } from "./backtest.js";
4
+ // CI quality gate. The suite is deterministic (seeded), so these bounds are
5
+ // stable; a detector change that regresses precision/recall fails CI here.
6
+ // The README "Detection quality" table is regenerated from this same suite.
7
+ describe("anomaly backtest — quality gate", () => {
8
+ const report = runBacktest(buildSuite());
9
+ it("suite is non-trivial and balanced", () => {
10
+ assert.ok(report.total >= 60, `expected ≥60 labelled cases, got ${report.total}`);
11
+ const positives = report.tp + report.fn;
12
+ const negatives = report.tn + report.fp;
13
+ assert.ok(positives >= 20 && negatives >= 20, "suite must have enough of both classes");
14
+ });
15
+ it("precision ≥ 0.95 (no spurious alerts)", () => {
16
+ assert.ok(report.precision >= 0.95, `precision ${report.precision.toFixed(3)} below gate\n${formatReport(report)}`);
17
+ });
18
+ it("recall ≥ 0.80", () => {
19
+ assert.ok(report.recall >= 0.8, `recall ${report.recall.toFixed(3)} below gate\n${formatReport(report)}`);
20
+ });
21
+ it("F1 ≥ 0.88", () => {
22
+ assert.ok(report.f1 >= 0.88, `F1 ${report.f1.toFixed(3)} below gate\n${formatReport(report)}`);
23
+ });
24
+ it("clean regimes are detected perfectly (no regression)", () => {
25
+ for (const regime of ["slow-ramp", "spike", "step", "stable", "transient", "one-sided", "seasonal"]) {
26
+ const c = report.byCategory[regime];
27
+ assert.ok(c, `missing category ${regime}`);
28
+ assert.equal(c.correct, c.total, `${regime}: ${c.correct}/${c.total} — regression\n${formatReport(report)}`);
29
+ }
30
+ });
31
+ it("prints the report (visible in CI logs)", () => {
32
+ console.log("\n" + formatReport(report) + "\n");
33
+ });
34
+ });
@@ -5,3 +5,38 @@ import type { AnomalyReport, LogResult, MetricResult } from "../types.js";
5
5
  * in the same time window, they are correlated.
6
6
  */
7
7
  export declare function correlateSignals(anomalies: AnomalyReport[], logResults: LogResult[], metricResults: MetricResult[]): string[];
8
+ /** Directed edge: `from` calls / depends on `to`. */
9
+ export interface ServiceEdge {
10
+ from: string;
11
+ to: string;
12
+ }
13
+ export interface RankInputAnomaly {
14
+ service: string;
15
+ metric: string;
16
+ severity: "low" | "medium" | "high";
17
+ /** Epoch ms when the anomaly first breached, if known. Lower = earlier. */
18
+ onsetTs?: number;
19
+ }
20
+ export interface ChangeMarker {
21
+ service: string;
22
+ /** Epoch ms of a deploy / config change / rollout. */
23
+ ts: number;
24
+ kind?: string;
25
+ }
26
+ export interface RootCauseCandidate {
27
+ service: string;
28
+ score: number;
29
+ confidence: "low" | "medium" | "high";
30
+ reasons: string[];
31
+ }
32
+ export interface RootCauseResult {
33
+ ranked: RootCauseCandidate[];
34
+ summary: string;
35
+ }
36
+ /**
37
+ * Rank likely root-cause services among co-occurring anomalies.
38
+ *
39
+ * `edges` is the (caller → callee) service graph; it may be empty, in which
40
+ * case ranking falls back to onset ordering + change markers + severity.
41
+ */
42
+ export declare function rankRootCause(anomalies: RankInputAnomaly[], edges?: ServiceEdge[], changes?: ChangeMarker[]): RootCauseResult;
@@ -29,3 +29,98 @@ export function correlateSignals(anomalies, logResults, metricResults) {
29
29
  }
30
30
  return [...new Set(correlations)]; // Deduplicate
31
31
  }
32
+ const SEV_WEIGHT = { low: 1, medium: 2, high: 3 };
33
+ /**
34
+ * Rank likely root-cause services among co-occurring anomalies.
35
+ *
36
+ * `edges` is the (caller → callee) service graph; it may be empty, in which
37
+ * case ranking falls back to onset ordering + change markers + severity.
38
+ */
39
+ export function rankRootCause(anomalies, edges = [], changes = []) {
40
+ const services = [...new Set(anomalies.map((a) => a.service))];
41
+ if (services.length === 0) {
42
+ return { ranked: [], summary: "No anomalies to attribute." };
43
+ }
44
+ // "depends on": from -> set(to). A root cause is a service that other
45
+ // anomalous services (transitively) depend on.
46
+ const deps = new Map();
47
+ for (const e of edges) {
48
+ if (!deps.has(e.from))
49
+ deps.set(e.from, new Set());
50
+ deps.get(e.from).add(e.to);
51
+ }
52
+ const dependsOn = (from, to, seen = new Set()) => {
53
+ if (seen.has(from))
54
+ return false;
55
+ seen.add(from);
56
+ const direct = deps.get(from);
57
+ if (!direct)
58
+ return false;
59
+ if (direct.has(to))
60
+ return true;
61
+ for (const mid of direct)
62
+ if (dependsOn(mid, to, seen))
63
+ return true;
64
+ return false;
65
+ };
66
+ const earliest = Math.min(...anomalies.filter((a) => a.onsetTs !== undefined).map((a) => a.onsetTs));
67
+ const haveOnset = Number.isFinite(earliest);
68
+ const candidates = services.map((svc) => {
69
+ const svcAnoms = anomalies.filter((a) => a.service === svc);
70
+ const reasons = [];
71
+ let score = 0;
72
+ // (1) Dependency position: how many *other* anomalous services depend on
73
+ // this one. Each dependent is a downstream symptom this service explains.
74
+ const dependents = services.filter((other) => other !== svc && dependsOn(other, svc));
75
+ if (dependents.length > 0) {
76
+ score += 5 * dependents.length;
77
+ reasons.push(`${dependents.length} anomalous service(s) depend on it (${dependents.join(", ")}) — their symptoms are likely downstream`);
78
+ }
79
+ // Penalty: this service depends on another anomalous one → likely a victim.
80
+ const upstreamCauses = services.filter((other) => other !== svc && dependsOn(svc, other));
81
+ if (upstreamCauses.length > 0) {
82
+ score -= 3 * upstreamCauses.length;
83
+ reasons.push(`depends on anomalous ${upstreamCauses.join(", ")} — may be a downstream victim`);
84
+ }
85
+ // (2) Onset ordering: started at/near the earliest onset.
86
+ if (haveOnset) {
87
+ const myOnset = Math.min(...svcAnoms.filter((a) => a.onsetTs !== undefined).map((a) => a.onsetTs));
88
+ if (Number.isFinite(myOnset)) {
89
+ const lagSec = Math.round((myOnset - earliest) / 1000);
90
+ if (lagSec <= 0) {
91
+ score += 4;
92
+ reasons.push("anomaly started first (earliest onset)");
93
+ }
94
+ else if (lagSec <= 60) {
95
+ score += 1;
96
+ reasons.push(`onset ${lagSec}s after the first signal`);
97
+ }
98
+ else {
99
+ reasons.push(`onset ${lagSec}s after the first signal — likely reactive`);
100
+ }
101
+ }
102
+ }
103
+ // (3) Deploy/change marker shortly before onset.
104
+ const myOnset = svcAnoms.find((a) => a.onsetTs !== undefined)?.onsetTs;
105
+ const marker = changes
106
+ .filter((c) => c.service === svc)
107
+ .find((c) => myOnset === undefined || (c.ts <= myOnset && myOnset - c.ts <= 15 * 60_000));
108
+ if (marker) {
109
+ score += 4;
110
+ reasons.push(`${marker.kind || "change"} on this service ${myOnset ? `${Math.round((myOnset - marker.ts) / 1000)}s before onset` : "near the incident"}`);
111
+ }
112
+ // Tie-breaker: signal breadth × severity (small weight).
113
+ const breadth = svcAnoms.reduce((s, a) => s + SEV_WEIGHT[a.severity], 0);
114
+ score += 0.25 * breadth;
115
+ return { service: svc, score, confidence: "low", reasons };
116
+ });
117
+ candidates.sort((a, b) => b.score - a.score);
118
+ // Confidence from the score gap between #1 and #2.
119
+ const top = candidates[0];
120
+ const gap = candidates.length > 1 ? top.score - candidates[1].score : top.score;
121
+ top.confidence = gap >= 5 ? "high" : gap >= 2 ? "medium" : "low";
122
+ const summary = candidates.length === 1
123
+ ? `Single anomalous service: ${top.service}.`
124
+ : `Likely root cause: ${top.service} (${top.confidence} confidence). ${top.reasons[0] || "ranked by severity"}. ${candidates.length - 1} other service(s) likely downstream.`;
125
+ return { ranked: candidates, summary };
126
+ }
@@ -1,6 +1,6 @@
1
1
  import { describe, it } from "node:test";
2
2
  import assert from "node:assert/strict";
3
- import { correlateSignals } from "./correlator.js";
3
+ import { correlateSignals, rankRootCause } from "./correlator.js";
4
4
  describe("correlateSignals", () => {
5
5
  it("returns empty for no anomalies", () => {
6
6
  const result = correlateSignals([], [], []);
@@ -51,3 +51,62 @@ describe("correlateSignals", () => {
51
51
  assert.equal(result.length, unique.size);
52
52
  });
53
53
  });
54
+ describe("rankRootCause", () => {
55
+ const A = (service, severity = "high", onsetTs) => ({
56
+ service,
57
+ metric: "latency_p99",
58
+ severity,
59
+ onsetTs,
60
+ });
61
+ it("returns empty result with no anomalies", () => {
62
+ const r = rankRootCause([]);
63
+ assert.deepEqual(r.ranked, []);
64
+ });
65
+ it("single anomalous service is the trivial answer", () => {
66
+ const r = rankRootCause([A("payment-service")]);
67
+ assert.equal(r.ranked.length, 1);
68
+ assert.equal(r.ranked[0].service, "payment-service");
69
+ assert.match(r.summary, /Single anomalous service/);
70
+ });
71
+ it("KEY: the depended-on service outranks its loud downstream caller", () => {
72
+ // api-gateway calls payment-service. Both anomalous, gateway has more
73
+ // signals — but payment-service is the cause; gateway is a victim.
74
+ const anomalies = [
75
+ A("api-gateway", "high"),
76
+ { service: "api-gateway", metric: "error_rate", severity: "high" },
77
+ A("payment-service", "medium"),
78
+ ];
79
+ const edges = [{ from: "api-gateway", to: "payment-service" }];
80
+ const r = rankRootCause(anomalies, edges);
81
+ assert.equal(r.ranked[0].service, "payment-service");
82
+ assert.ok(r.ranked.find((c) => c.service === "api-gateway").score <
83
+ r.ranked[0].score);
84
+ assert.match(r.summary, /payment-service/);
85
+ });
86
+ it("transitive dependency: gateway → order → payment ranks payment first", () => {
87
+ const r = rankRootCause([A("api-gateway"), A("order-service"), A("payment-service")], [
88
+ { from: "api-gateway", to: "order-service" },
89
+ { from: "order-service", to: "payment-service" },
90
+ ]);
91
+ assert.equal(r.ranked[0].service, "payment-service");
92
+ });
93
+ it("onset ordering breaks ties when no graph is available", () => {
94
+ const t = 1_700_000_000_000;
95
+ const r = rankRootCause([
96
+ A("order-service", "high", t + 90_000),
97
+ A("payment-service", "high", t),
98
+ ]);
99
+ assert.equal(r.ranked[0].service, "payment-service");
100
+ assert.ok(r.ranked[0].reasons.some((x) => /started first/.test(x)));
101
+ });
102
+ it("a deploy marker shortly before onset boosts that service", () => {
103
+ const t = 1_700_000_000_000;
104
+ const r = rankRootCause([A("payment-service", "medium", t), A("order-service", "high", t)], [], [{ service: "payment-service", ts: t - 120_000, kind: "deploy" }]);
105
+ assert.equal(r.ranked[0].service, "payment-service");
106
+ assert.ok(r.ranked[0].reasons.some((x) => /deploy/.test(x)));
107
+ });
108
+ it("confidence reflects the score gap", () => {
109
+ const clear = rankRootCause([A("api-gateway"), A("payment-service")], [{ from: "api-gateway", to: "payment-service" }]);
110
+ assert.equal(clear.ranked[0].confidence, "high");
111
+ });
112
+ });
@@ -1,12 +1,12 @@
1
1
  import type { HealthStatus, HealthThresholds } from "../types.js";
2
- interface HealthInputs {
2
+ export interface HealthInputs {
3
3
  cpu: number;
4
4
  memory: number;
5
5
  errorRate: number;
6
6
  latencyP99: number;
7
7
  logErrorRate: number;
8
8
  }
9
- interface HealthResult {
9
+ export interface HealthResult {
10
10
  score: number;
11
11
  status: HealthStatus;
12
12
  details: Record<string, {
@@ -16,4 +16,3 @@ interface HealthResult {
16
16
  }>;
17
17
  }
18
18
  export declare function calculateHealthScore(inputs: HealthInputs, thresholds: HealthThresholds): HealthResult;
19
- export {};
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Embeddable analysis library — the deterministic analysis engine
3
+ * (anomaly detection, seasonality, causal root-cause, health scoring) usable
4
+ * in-process, without running the MCP server or any transport.
5
+ *
6
+ * import { analyzeMetric, rankRootCause, calculateHealthScore }
7
+ * from "@thotischner/observability-mcp/analysis";
8
+ *
9
+ * Same code path as the MCP tools — verdicts are identical whether reached via
10
+ * the gateway or this library.
11
+ */
12
+ export { detectAnomaly, detectRobustAnomaly, detectSeasonalAnomaly, detectRecentAnomaly, detectAnomalyPoints, calculateZScore, classifyMetric, median, mad, type MetricKind, type SeasonalPoint, type SeasonalAnomalyOptions, type SeasonalAnomalyResult, type RobustAnomalyOptions, type RobustAnomalyResult, type AnomalyPoint, type ZScoreResult, } from "./anomaly.js";
13
+ export { correlateSignals, rankRootCause, type ServiceEdge, type RankInputAnomaly, type ChangeMarker, type RootCauseCandidate, type RootCauseResult, } from "./correlator.js";
14
+ export { calculateHealthScore, type HealthInputs, type HealthResult, } from "./health.js";
15
+ import { type SeasonalPoint } from "./anomaly.js";
16
+ /**
17
+ * One-call façade: classify the metric by name and run the orchestrated
18
+ * detector (seasonal when enough history, else robust). Thin convenience over
19
+ * {@link detectAnomaly}; identical result to calling it directly with the
20
+ * classified `metricKind`.
21
+ */
22
+ export declare function analyzeMetric(metric: string, points: SeasonalPoint[], opts?: {
23
+ threshold?: number;
24
+ }): {
25
+ isAnomaly: boolean;
26
+ method: "seasonal" | "robust-z" | "trend" | "none";
27
+ score: number;
28
+ recentValue: number;
29
+ baselineValue: number;
30
+ direction: "above" | "below" | "flat";
31
+ reason: string;
32
+ };
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Embeddable analysis library — the deterministic analysis engine
3
+ * (anomaly detection, seasonality, causal root-cause, health scoring) usable
4
+ * in-process, without running the MCP server or any transport.
5
+ *
6
+ * import { analyzeMetric, rankRootCause, calculateHealthScore }
7
+ * from "@thotischner/observability-mcp/analysis";
8
+ *
9
+ * Same code path as the MCP tools — verdicts are identical whether reached via
10
+ * the gateway or this library.
11
+ */
12
+ export {
13
+ // robust + seasonal + orchestrated anomaly detection
14
+ detectAnomaly, detectRobustAnomaly, detectSeasonalAnomaly, detectRecentAnomaly, detectAnomalyPoints, calculateZScore, classifyMetric, median, mad, } from "./anomaly.js";
15
+ export { correlateSignals, rankRootCause, } from "./correlator.js";
16
+ export { calculateHealthScore, } from "./health.js";
17
+ import { detectAnomaly, classifyMetric } from "./anomaly.js";
18
+ /**
19
+ * One-call façade: classify the metric by name and run the orchestrated
20
+ * detector (seasonal when enough history, else robust). Thin convenience over
21
+ * {@link detectAnomaly}; identical result to calling it directly with the
22
+ * classified `metricKind`.
23
+ */
24
+ export function analyzeMetric(metric, points, opts = {}) {
25
+ return detectAnomaly(points, {
26
+ metricKind: classifyMetric(metric),
27
+ threshold: opts.threshold,
28
+ });
29
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,44 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import * as lib from "./index.js";
4
+ import { detectAnomaly } from "./anomaly.js";
5
+ // Contract test for the embeddable analysis library surface. Guards that the
6
+ // public API stays importable in-process (no MCP/transport) and that the
7
+ // analyzeMetric façade is exactly the engine path, not a divergent reimpl.
8
+ describe("embeddable analysis library", () => {
9
+ it("exposes the documented public API", () => {
10
+ for (const name of [
11
+ "detectAnomaly",
12
+ "detectRobustAnomaly",
13
+ "detectSeasonalAnomaly",
14
+ "rankRootCause",
15
+ "correlateSignals",
16
+ "calculateHealthScore",
17
+ "classifyMetric",
18
+ "analyzeMetric",
19
+ ]) {
20
+ assert.equal(typeof lib[name], "function", `missing export: ${name}`);
21
+ }
22
+ });
23
+ it("analyzeMetric is identical to detectAnomaly with classified kind", () => {
24
+ const points = Array.from({ length: 40 }, (_, i) => ({
25
+ timestamp: 1_700_000_000_000 + i * 60_000,
26
+ value: i < 30 ? 50 + (i % 3) : 800,
27
+ }));
28
+ const viaFacade = lib.analyzeMetric("latency_p99", points);
29
+ const viaEngine = detectAnomaly(points, { metricKind: "latency" });
30
+ assert.deepEqual(viaFacade, viaEngine);
31
+ });
32
+ it("health scoring is callable standalone", () => {
33
+ const r = lib.calculateHealthScore({ cpu: 20, memory: 30, errorRate: 0, latencyP99: 0.2, logErrorRate: 0 }, {
34
+ weights: { errorRate: 1, latency: 1, cpu: 1, logErrors: 1 },
35
+ cpu: { good: 50, warn: 80, crit: 95 },
36
+ errorRate: { good: 1, warn: 5, crit: 10 },
37
+ latencyP99: { good: 0.5, warn: 1, crit: 2 },
38
+ logErrors: { good: 1, warn: 5, crit: 10 },
39
+ statusBoundaries: { healthy: 80, degraded: 50 },
40
+ });
41
+ assert.ok(r.score >= 0 && r.score <= 100);
42
+ assert.ok(["healthy", "degraded", "critical"].includes(r.status));
43
+ });
44
+ });
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Single-tenant authentication primitive (opt-in, backward compatible).
3
+ *
4
+ * If no credentials are configured the server behaves exactly as before
5
+ * (anonymous, all access). If `OMCP_API_KEYS` is set, the `/mcp` endpoint
6
+ * requires a valid `Authorization: Bearer <token>` or `X-API-Key: <token>`.
7
+ *
8
+ * Config (env, no secrets in files):
9
+ * OMCP_API_KEYS="ci:tok_abc,agent:tok_def" # name:token, comma-separated
10
+ * (a bare "tok_xyz" is allowed; name defaults to "key")
11
+ * OMCP_KEY_SOURCES="agent=prom-prod|loki-prod;ci=prom-staging"
12
+ * # optional coarse per-key source allow-list
13
+ *
14
+ * Rich role-based access control (tools/services/lookback/read-only, the
15
+ * full governance object) is intentionally NOT here — this is only the
16
+ * authentication + identity + coarse source-scoping primitive.
17
+ */
18
+ export interface Credential {
19
+ name: string;
20
+ token: string;
21
+ allowedSources?: string[];
22
+ }
23
+ /** Parse credentials from env. Returns an empty list when unconfigured. */
24
+ export declare function loadCredentials(env?: NodeJS.ProcessEnv): Credential[];
25
+ export declare function credentialsConfigured(env?: NodeJS.ProcessEnv): boolean;
26
+ /** Extract a bearer/api-key token from request headers. */
27
+ export declare function extractToken(headers: Record<string, unknown>): string | null;
28
+ /** Constant-time-ish token match → resolved credential, or null. */
29
+ export declare function resolveToken(token: string | null, creds: Credential[]): Credential | null;