@thotischner/observability-mcp 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/analysis/anomaly.d.ts +89 -0
  2. package/dist/analysis/anomaly.js +235 -0
  3. package/dist/analysis/anomaly.test.js +149 -1
  4. package/dist/analysis/backtest.d.ts +31 -0
  5. package/dist/analysis/backtest.js +206 -0
  6. package/dist/analysis/backtest.test.d.ts +1 -0
  7. package/dist/analysis/backtest.test.js +34 -0
  8. package/dist/analysis/correlator.d.ts +35 -0
  9. package/dist/analysis/correlator.js +95 -0
  10. package/dist/analysis/correlator.test.js +60 -1
  11. package/dist/analysis/health.d.ts +2 -3
  12. package/dist/analysis/index.d.ts +32 -0
  13. package/dist/analysis/index.js +29 -0
  14. package/dist/analysis/library.test.d.ts +1 -0
  15. package/dist/analysis/library.test.js +44 -0
  16. package/dist/auth/credentials.d.ts +29 -0
  17. package/dist/auth/credentials.js +76 -0
  18. package/dist/auth/credentials.test.d.ts +1 -0
  19. package/dist/auth/credentials.test.js +57 -0
  20. package/dist/context.d.ts +27 -0
  21. package/dist/context.js +18 -0
  22. package/dist/enterprise-gate.d.ts +132 -0
  23. package/dist/enterprise-gate.js +510 -0
  24. package/dist/enterprise-gate.test.d.ts +1 -0
  25. package/dist/enterprise-gate.test.js +178 -0
  26. package/dist/index.js +125 -44
  27. package/dist/net/egress-policy.d.ts +31 -0
  28. package/dist/net/egress-policy.js +37 -0
  29. package/dist/net/egress-policy.test.d.ts +1 -0
  30. package/dist/net/egress-policy.test.js +52 -0
  31. package/dist/tools/context-seam.test.d.ts +1 -0
  32. package/dist/tools/context-seam.test.js +23 -0
  33. package/dist/tools/detect-anomalies.d.ts +2 -1
  34. package/dist/tools/detect-anomalies.js +47 -11
  35. package/dist/tools/get-service-health.d.ts +2 -1
  36. package/dist/tools/get-service-health.js +13 -9
  37. package/dist/tools/handlers.test.js +104 -0
  38. package/dist/tools/list-services.d.ts +2 -1
  39. package/dist/tools/list-services.js +2 -1
  40. package/dist/tools/list-sources.d.ts +2 -1
  41. package/dist/tools/list-sources.js +2 -1
  42. package/dist/tools/query-logs.d.ts +2 -1
  43. package/dist/tools/query-logs.js +2 -1
  44. package/dist/tools/query-metrics.d.ts +2 -1
  45. package/dist/tools/query-metrics.js +9 -1
  46. package/dist/ui/index.html +1510 -67
  47. package/package.json +10 -2
@@ -12,6 +12,95 @@ export interface AnomalyPoint {
12
12
  severity: AnomalySeverity;
13
13
  }
14
14
  export declare function detectAnomalyPoints(values: number[], threshold?: number): AnomalyPoint[];
15
+ export declare function median(values: number[]): number;
16
+ /** Median Absolute Deviation, scaled to be a consistent estimator of stdDev. */
17
+ export declare function mad(values: number[], center?: number): number;
18
+ export type MetricKind = "latency" | "error_rate" | "saturation" | "throughput" | "generic";
19
+ export declare function classifyMetric(metric: string): MetricKind;
20
+ export interface RobustAnomalyOptions {
21
+ /** Minimum samples before any detection (cold-start guard). */
22
+ minSamples?: number;
23
+ /** Number of trailing points evaluated as "recent". */
24
+ recentWindow?: number;
25
+ /** Robust-z threshold. */
26
+ threshold?: number;
27
+ /** Consecutive breaching recent points required to fire (dwell/hysteresis). */
28
+ dwell?: number;
29
+ metricKind?: MetricKind;
30
+ }
31
+ export interface RobustAnomalyResult {
32
+ isAnomaly: boolean;
33
+ /** Robust z = (median(recent) - median(baseline)) / MAD(baseline). */
34
+ score: number;
35
+ method: "robust-z" | "trend" | "none";
36
+ direction: "above" | "below" | "flat";
37
+ recentValue: number;
38
+ baselineValue: number;
39
+ reason: string;
40
+ }
41
+ /**
42
+ * Robust anomaly detection.
43
+ *
44
+ * Unlike {@link detectRecentAnomaly}, the baseline is the *early stable* portion
45
+ * of the series (it excludes the recent window AND the trailing ramp), so a slow
46
+ * monotonic increase — e.g. a memory leak heading toward OOM — no longer poisons
47
+ * its own baseline. Saturation/latency metrics additionally run a trend detector
48
+ * that catches gradual ramps even when no single point is a spike.
49
+ */
50
+ export declare function detectRobustAnomaly(values: number[], opts?: RobustAnomalyOptions): RobustAnomalyResult;
51
+ export interface SeasonalPoint {
52
+ /** Unix epoch milliseconds, or an ISO-8601 timestamp string. */
53
+ timestamp: number | string;
54
+ value: number;
55
+ }
56
+ export interface SeasonalAnomalyOptions {
57
+ /** Season length in seconds. Default: 86400 (daily / time-of-day). */
58
+ periodSeconds?: number;
59
+ /** Phase tolerance in seconds — how close in-phase a historical sample
60
+ * must be to count toward the baseline. Default: periodSeconds / 48
61
+ * (≈30 min for a daily period). */
62
+ phaseToleranceSeconds?: number;
63
+ /** Trailing points treated as "recent". Default: 5. */
64
+ recentWindow?: number;
65
+ /** Robust-z threshold against the same-phase distribution. Default: 3.5. */
66
+ threshold?: number;
67
+ /** Minimum same-phase historical samples required to trust the baseline. */
68
+ minPhaseSamples?: number;
69
+ metricKind?: MetricKind;
70
+ }
71
+ export interface SeasonalAnomalyResult {
72
+ isAnomaly: boolean;
73
+ /** false when there is not enough multi-period history — caller should
74
+ * fall back to {@link detectRobustAnomaly}. */
75
+ applicable: boolean;
76
+ score: number;
77
+ expected: number;
78
+ recentValue: number;
79
+ direction: "above" | "below" | "flat";
80
+ phaseSamples: number;
81
+ reason: string;
82
+ }
83
+ /**
84
+ * Seasonal-naive detection: predict the recent value from the robust
85
+ * (median/MAD) distribution of historical points at the same phase of the
86
+ * season, and flag a deviation. Falls back (applicable=false) when the series
87
+ * does not span enough periods to build a same-phase baseline.
88
+ */
89
+ export declare function detectSeasonalAnomaly(points: SeasonalPoint[], opts?: SeasonalAnomalyOptions): SeasonalAnomalyResult;
90
+ /**
91
+ * Orchestrator: prefer the seasonality-aware baseline when the series spans
92
+ * enough periods to build a same-phase distribution; otherwise fall back to
93
+ * the robust windowed detector. Returns a normalized verdict.
94
+ */
95
+ export declare function detectAnomaly(points: SeasonalPoint[], opts?: SeasonalAnomalyOptions & RobustAnomalyOptions): {
96
+ isAnomaly: boolean;
97
+ method: "seasonal" | "robust-z" | "trend" | "none";
98
+ score: number;
99
+ recentValue: number;
100
+ baselineValue: number;
101
+ direction: "above" | "below" | "flat";
102
+ reason: string;
103
+ };
15
104
  /**
16
105
  * Check if the most recent values deviate significantly from the baseline.
17
106
  * Compares the last `recentWindow` values against the rest.
@@ -27,6 +27,241 @@ export function detectAnomalyPoints(values, threshold = 2.0) {
27
27
  }
28
28
  return anomalies;
29
29
  }
30
+ // ---------------------------------------------------------------------------
31
+ // Robust detection (median/MAD) — resistant to the trend & outliers that skew
32
+ // mean/stdDev. Adds warmup, dwell/hysteresis, a slow-ramp trend detector, and
33
+ // per-metric-type behaviour.
34
+ // ---------------------------------------------------------------------------
35
+ export function median(values) {
36
+ if (values.length === 0)
37
+ return 0;
38
+ const sorted = [...values].sort((a, b) => a - b);
39
+ const mid = sorted.length >> 1;
40
+ return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
41
+ }
42
+ /** Median Absolute Deviation, scaled to be a consistent estimator of stdDev. */
43
+ export function mad(values, center) {
44
+ if (values.length === 0)
45
+ return 0;
46
+ const med = center ?? median(values);
47
+ const deviations = values.map((v) => Math.abs(v - med));
48
+ return 1.4826 * median(deviations);
49
+ }
50
+ export function classifyMetric(metric) {
51
+ const m = metric.toLowerCase();
52
+ if (/(latency|duration|response_time|p\d{2,3})/.test(m))
53
+ return "latency";
54
+ if (/(error|fail|5xx|4xx)/.test(m))
55
+ return "error_rate";
56
+ if (/(cpu|mem|memory|heap|disk|saturat|util|queue|pool|fd|gc)/.test(m))
57
+ return "saturation";
58
+ if (/(request_rate|rps|qps|throughput|traffic)/.test(m))
59
+ return "throughput";
60
+ return "generic";
61
+ }
62
+ const NONE = {
63
+ isAnomaly: false,
64
+ score: 0,
65
+ method: "none",
66
+ direction: "flat",
67
+ recentValue: 0,
68
+ baselineValue: 0,
69
+ reason: "insufficient data (warmup)",
70
+ };
71
+ /**
72
+ * Robust anomaly detection.
73
+ *
74
+ * Unlike {@link detectRecentAnomaly}, the baseline is the *early stable* portion
75
+ * of the series (it excludes the recent window AND the trailing ramp), so a slow
76
+ * monotonic increase — e.g. a memory leak heading toward OOM — no longer poisons
77
+ * its own baseline. Saturation/latency metrics additionally run a trend detector
78
+ * that catches gradual ramps even when no single point is a spike.
79
+ */
80
+ export function detectRobustAnomaly(values, opts = {}) {
81
+ const minSamples = opts.minSamples ?? 15;
82
+ const recentWindow = opts.recentWindow ?? 5;
83
+ const threshold = opts.threshold ?? 3.0;
84
+ const dwell = opts.dwell ?? 2;
85
+ const kind = opts.metricKind ?? "generic";
86
+ // Warmup guard.
87
+ if (values.length < Math.max(minSamples, recentWindow * 3))
88
+ return { ...NONE };
89
+ const recent = values.slice(-recentWindow);
90
+ // Baseline = leading stable portion only; exclude the recent window and a
91
+ // trailing margin so a ramp that ends in `recent` cannot inflate it.
92
+ const baselineEnd = Math.max(Math.floor(values.length * 0.5), values.length - recentWindow * 3);
93
+ const baseline = values.slice(0, baselineEnd);
94
+ if (baseline.length < 3)
95
+ return { ...NONE };
96
+ const baseMed = median(baseline);
97
+ const baseMad = mad(baseline, baseMed);
98
+ const recentMed = median(recent);
99
+ // One-sided metrics: a drop in error_rate / latency / saturation is good news.
100
+ const oneSidedUp = kind === "error_rate" || kind === "latency" || kind === "saturation";
101
+ // Robust z. Guard against MAD == 0 (perfectly flat baseline) with a tiny
102
+ // relative epsilon so a real shift off a flat baseline still registers.
103
+ const scale = baseMad > 0 ? baseMad : Math.max(Math.abs(baseMed) * 1e-3, 1e-9);
104
+ const z = (recentMed - baseMed) / scale;
105
+ const direction = z > 0 ? "above" : z < 0 ? "below" : "flat";
106
+ // Dwell: require the last `dwell` points to each individually breach.
107
+ const tail = values.slice(-dwell);
108
+ const breaches = tail.filter((v) => {
109
+ const pz = (v - baseMed) / scale;
110
+ return oneSidedUp ? pz >= threshold : Math.abs(pz) >= threshold;
111
+ });
112
+ const dwellMet = breaches.length >= dwell;
113
+ const zHit = (oneSidedUp ? z >= threshold : Math.abs(z) >= threshold) && dwellMet;
114
+ // Trend detector for slow ramps (saturation/latency). Catches a sustained
115
+ // monotonic climb even when the windowed robust-z is still sub-threshold.
116
+ let trendHit = false;
117
+ let trendReason = "";
118
+ if (!zHit && (kind === "saturation" || kind === "latency") && values.length >= minSamples) {
119
+ let ups = 0;
120
+ for (let i = 1; i < values.length; i++)
121
+ if (values[i] > values[i - 1])
122
+ ups++;
123
+ const monotonicFrac = ups / (values.length - 1);
124
+ const netRise = (recentMed - baseMed) / scale;
125
+ if (monotonicFrac >= 0.7 && netRise >= 2.0) {
126
+ trendHit = true;
127
+ trendReason = `sustained upward trend: ${Math.round(monotonicFrac * 100)}% of steps rising, +${netRise.toFixed(1)} robust-σ net`;
128
+ }
129
+ }
130
+ if (zHit) {
131
+ return {
132
+ isAnomaly: true,
133
+ score: z,
134
+ method: "robust-z",
135
+ direction,
136
+ recentValue: recentMed,
137
+ baselineValue: baseMed,
138
+ reason: `recent median ${recentMed.toFixed(2)} is ${z.toFixed(1)} robust-σ ${direction} baseline ${baseMed.toFixed(2)} (dwell ${breaches.length}/${dwell})`,
139
+ };
140
+ }
141
+ if (trendHit) {
142
+ return {
143
+ isAnomaly: true,
144
+ score: (recentMed - baseMed) / scale,
145
+ method: "trend",
146
+ direction: "above",
147
+ recentValue: recentMed,
148
+ baselineValue: baseMed,
149
+ reason: trendReason,
150
+ };
151
+ }
152
+ return {
153
+ isAnomaly: false,
154
+ score: z,
155
+ method: "none",
156
+ direction,
157
+ recentValue: recentMed,
158
+ baselineValue: baseMed,
159
+ reason: "within robust baseline",
160
+ };
161
+ }
162
+ function toEpochSeconds(t) {
163
+ if (typeof t === "number")
164
+ return t > 1e12 ? t / 1000 : t;
165
+ return new Date(t).getTime() / 1000;
166
+ }
167
+ /**
168
+ * Seasonal-naive detection: predict the recent value from the robust
169
+ * (median/MAD) distribution of historical points at the same phase of the
170
+ * season, and flag a deviation. Falls back (applicable=false) when the series
171
+ * does not span enough periods to build a same-phase baseline.
172
+ */
173
+ export function detectSeasonalAnomaly(points, opts = {}) {
174
+ const period = opts.periodSeconds ?? 86400;
175
+ const tol = opts.phaseToleranceSeconds ?? period / 48;
176
+ const recentWindow = opts.recentWindow ?? 5;
177
+ const threshold = opts.threshold ?? 3.5;
178
+ const minPhaseSamples = opts.minPhaseSamples ?? 4;
179
+ const kind = opts.metricKind ?? "generic";
180
+ const NA = {
181
+ isAnomaly: false,
182
+ applicable: false,
183
+ score: 0,
184
+ expected: 0,
185
+ recentValue: 0,
186
+ direction: "flat",
187
+ phaseSamples: 0,
188
+ reason: "insufficient multi-period history",
189
+ };
190
+ if (points.length < recentWindow + 2)
191
+ return NA;
192
+ const series = points
193
+ .map((p) => ({ t: toEpochSeconds(p.timestamp), v: p.value }))
194
+ .filter((p) => Number.isFinite(p.t) && Number.isFinite(p.v))
195
+ .sort((a, b) => a.t - b.t);
196
+ if (series.length < recentWindow + 2)
197
+ return NA;
198
+ const span = series[series.length - 1].t - series[0].t;
199
+ // Need at least ~2 full periods of history to have any same-phase samples.
200
+ if (span < period * 2)
201
+ return NA;
202
+ const recent = series.slice(-recentWindow);
203
+ const history = series.slice(0, -recentWindow);
204
+ const recentPhase = ((recent[recent.length - 1].t % period) + period) % period;
205
+ // Same-phase historical samples: phase distance within tolerance (wrapping).
206
+ const samePhase = history
207
+ .filter((p) => {
208
+ const ph = ((p.t % period) + period) % period;
209
+ const d = Math.abs(ph - recentPhase);
210
+ return Math.min(d, period - d) <= tol;
211
+ })
212
+ .map((p) => p.v);
213
+ if (samePhase.length < minPhaseSamples)
214
+ return NA;
215
+ const expected = median(samePhase);
216
+ const spread = mad(samePhase, expected);
217
+ const recentMed = median(recent.map((p) => p.v));
218
+ const scale = spread > 0 ? spread : Math.max(Math.abs(expected) * 1e-3, 1e-9);
219
+ const z = (recentMed - expected) / scale;
220
+ const direction = z > 0 ? "above" : z < 0 ? "below" : "flat";
221
+ const oneSidedUp = kind === "error_rate" || kind === "latency" || kind === "saturation";
222
+ const hit = oneSidedUp ? z >= threshold : Math.abs(z) >= threshold;
223
+ return {
224
+ isAnomaly: hit,
225
+ applicable: true,
226
+ score: z,
227
+ expected,
228
+ recentValue: recentMed,
229
+ direction,
230
+ phaseSamples: samePhase.length,
231
+ reason: hit
232
+ ? `recent ${recentMed.toFixed(2)} is ${z.toFixed(1)} robust-σ ${direction} the seasonal baseline ${expected.toFixed(2)} (n=${samePhase.length} same-phase samples)`
233
+ : `within seasonal baseline (${expected.toFixed(2)}, n=${samePhase.length})`,
234
+ };
235
+ }
236
+ /**
237
+ * Orchestrator: prefer the seasonality-aware baseline when the series spans
238
+ * enough periods to build a same-phase distribution; otherwise fall back to
239
+ * the robust windowed detector. Returns a normalized verdict.
240
+ */
241
+ export function detectAnomaly(points, opts = {}) {
242
+ const seasonal = detectSeasonalAnomaly(points, opts);
243
+ if (seasonal.applicable) {
244
+ return {
245
+ isAnomaly: seasonal.isAnomaly,
246
+ method: "seasonal",
247
+ score: seasonal.score,
248
+ recentValue: seasonal.recentValue,
249
+ baselineValue: seasonal.expected,
250
+ direction: seasonal.direction,
251
+ reason: seasonal.reason,
252
+ };
253
+ }
254
+ const r = detectRobustAnomaly(points.map((p) => p.value), opts);
255
+ return {
256
+ isAnomaly: r.isAnomaly,
257
+ method: r.method,
258
+ score: r.score,
259
+ recentValue: r.recentValue,
260
+ baselineValue: r.baselineValue,
261
+ direction: r.direction,
262
+ reason: r.reason,
263
+ };
264
+ }
30
265
  /**
31
266
  * Check if the most recent values deviate significantly from the baseline.
32
267
  * Compares the last `recentWindow` values against the rest.
@@ -1,6 +1,6 @@
1
1
  import { describe, it } from "node:test";
2
2
  import assert from "node:assert/strict";
3
- import { calculateZScore, detectAnomalyPoints, detectRecentAnomaly } from "./anomaly.js";
3
+ import { calculateZScore, detectAnomalyPoints, detectRecentAnomaly, detectRobustAnomaly, detectSeasonalAnomaly, detectAnomaly, classifyMetric, median, mad, } from "./anomaly.js";
4
4
  describe("calculateZScore", () => {
5
5
  it("returns zeros for empty array", () => {
6
6
  const result = calculateZScore([]);
@@ -85,3 +85,151 @@ describe("detectRecentAnomaly", () => {
85
85
  assert.ok(lowResult.isAnomaly || !highResult.isAnomaly);
86
86
  });
87
87
  });
88
+ describe("median / mad", () => {
89
+ it("median handles odd and even lengths", () => {
90
+ assert.equal(median([3, 1, 2]), 2);
91
+ assert.equal(median([1, 2, 3, 4]), 2.5);
92
+ assert.equal(median([]), 0);
93
+ });
94
+ it("mad is robust to outliers", () => {
95
+ const stable = [8, 10, 12, 9, 11, 10, 13, 7];
96
+ const withOutlier = [...stable, 100000];
97
+ const stdDev = (xs) => {
98
+ const m = xs.reduce((a, b) => a + b, 0) / xs.length;
99
+ return Math.sqrt(xs.reduce((s, v) => s + (v - m) ** 2, 0) / xs.length);
100
+ };
101
+ // MAD barely moves; stdDev explodes by orders of magnitude.
102
+ assert.ok(mad(stable) > 0);
103
+ assert.ok(mad(withOutlier) < mad(stable) * 2);
104
+ assert.ok(stdDev(withOutlier) > stdDev(stable) * 100);
105
+ });
106
+ });
107
+ describe("classifyMetric", () => {
108
+ it("classifies by name", () => {
109
+ assert.equal(classifyMetric("latency_p99"), "latency");
110
+ assert.equal(classifyMetric("error_rate"), "error_rate");
111
+ assert.equal(classifyMetric("cpu"), "saturation");
112
+ assert.equal(classifyMetric("memory_used_bytes"), "saturation");
113
+ assert.equal(classifyMetric("request_rate"), "throughput");
114
+ assert.equal(classifyMetric("widgets_total"), "generic");
115
+ });
116
+ });
117
+ describe("detectRobustAnomaly", () => {
118
+ it("warmup: no detection below minSamples", () => {
119
+ const r = detectRobustAnomaly([1, 2, 3, 4, 5]);
120
+ assert.equal(r.isAnomaly, false);
121
+ assert.equal(r.method, "none");
122
+ });
123
+ it("no anomaly for stable noisy data", () => {
124
+ const v = Array.from({ length: 40 }, (_, i) => 100 + (i % 3) - 1);
125
+ assert.equal(detectRobustAnomaly(v).isAnomaly, false);
126
+ });
127
+ // The exact production false-negative: a slow memory-leak ramp toward OOM.
128
+ // detectRecentAnomaly misses it because the rising baseline poisons its own
129
+ // mean/stdDev; detectRobustAnomaly must catch it.
130
+ it("REGRESSION: detects slow memory-leak ramp the legacy detector misses", () => {
131
+ // The query window opened AFTER the leak began, so there is no flat
132
+ // baseline — the metric climbs monotonically across the whole window.
133
+ // Legacy windowed z-score stays sub-threshold (the baseline already
134
+ // contains the ramp); this is the production "all healthy during OOM"
135
+ // false-negative. The robust trend detector must catch it.
136
+ const series = Array.from({ length: 40 }, (_, i) => 120 + i * 7);
137
+ const legacy = detectRecentAnomaly(series, 5, 2.0);
138
+ assert.equal(legacy.isAnomaly, false, "legacy detector misses the leak spanning the window");
139
+ const robust = detectRobustAnomaly(series, { metricKind: "saturation" });
140
+ assert.equal(robust.isAnomaly, true, "robust detector must catch the leak");
141
+ assert.equal(robust.method, "trend");
142
+ assert.equal(robust.direction, "above");
143
+ });
144
+ it("detects a hard spike via robust-z", () => {
145
+ const base = Array.from({ length: 25 }, (_, i) => 50 + (i % 3));
146
+ const spike = Array(5).fill(500);
147
+ const r = detectRobustAnomaly([...base, ...spike], { metricKind: "latency" });
148
+ assert.equal(r.isAnomaly, true);
149
+ assert.equal(r.method, "robust-z");
150
+ });
151
+ it("dwell/hysteresis: a single transient spike does not fire", () => {
152
+ const base = Array.from({ length: 30 }, (_, i) => 50 + (i % 3));
153
+ const series = [...base, 50, 51, 49, 500]; // one lone spike at the very end
154
+ const r = detectRobustAnomaly(series, { metricKind: "latency", dwell: 2 });
155
+ assert.equal(r.isAnomaly, false, "single point should not satisfy dwell");
156
+ });
157
+ it("one-sided: a drop in error_rate is not an anomaly", () => {
158
+ const base = Array.from({ length: 25 }, (_, i) => 20 + (i % 3));
159
+ const drop = Array(5).fill(0);
160
+ const r = detectRobustAnomaly([...base, ...drop], { metricKind: "error_rate" });
161
+ assert.equal(r.isAnomaly, false);
162
+ });
163
+ it("two-sided generic metric flags a drop", () => {
164
+ const base = Array.from({ length: 25 }, (_, i) => 100 + (i % 3));
165
+ const drop = Array(5).fill(5);
166
+ const r = detectRobustAnomaly([...base, ...drop], { metricKind: "generic" });
167
+ assert.equal(r.isAnomaly, true);
168
+ assert.equal(r.direction, "below");
169
+ });
170
+ });
171
+ describe("detectSeasonalAnomaly", () => {
172
+ const HOUR = 3600_000;
173
+ const NIGHT = (h) => h >= 22 || h <= 5;
174
+ // `days` of hourly samples with a strong diurnal pattern (night ~10, day
175
+ // ~100). `lastDayNight` overrides the final day's night value to inject a
176
+ // regression. Returns points ending at day-`days` hour 3 (a night hour).
177
+ function diurnal(days, lastDayNight) {
178
+ const pts = [];
179
+ const start = Date.UTC(2026, 0, 1, 0, 0, 0);
180
+ for (let d = 0; d < days; d++) {
181
+ for (let h = 0; h < 24; h++) {
182
+ let v = NIGHT(h) ? 10 : 100;
183
+ v += (d % 3) - 1; // small deterministic spread so MAD > 0
184
+ if (d === days - 1 && NIGHT(h) && lastDayNight !== undefined)
185
+ v = lastDayNight;
186
+ pts.push({ timestamp: start + (d * 24 + h) * HOUR, value: v });
187
+ }
188
+ }
189
+ // Trim so the series ends mid-night (…23, 0, 1, 2, 3).
190
+ return pts.slice(0, days * 24 - 20);
191
+ }
192
+ it("not applicable with <2 periods of history", () => {
193
+ const r = detectSeasonalAnomaly(diurnal(1));
194
+ assert.equal(r.applicable, false);
195
+ assert.equal(r.isAnomaly, false);
196
+ });
197
+ it("KEY: a normal nightly trough is NOT an anomaly (robust would false-positive)", () => {
198
+ const series = diurnal(6); // ends in a normal low-night window
199
+ const seasonal = detectSeasonalAnomaly(series);
200
+ assert.equal(seasonal.applicable, true);
201
+ assert.equal(seasonal.isAnomaly, false, "night low is expected at this phase");
202
+ // The naive robust detector, lacking phase awareness, flags the trough.
203
+ const robust = detectRobustAnomaly(series.map((p) => p.value), { metricKind: "generic" });
204
+ assert.equal(robust.isAnomaly, true, "robust mistakes the diurnal trough for a drop");
205
+ });
206
+ it("flags a real same-phase regression (night value where day-level is wrong)", () => {
207
+ const series = diurnal(6, 100); // last day's night sits at daytime level
208
+ const r = detectSeasonalAnomaly(series, { metricKind: "saturation" });
209
+ assert.equal(r.applicable, true);
210
+ assert.equal(r.isAnomaly, true);
211
+ assert.equal(r.direction, "above");
212
+ assert.ok(r.phaseSamples >= 4);
213
+ });
214
+ });
215
+ describe("detectAnomaly orchestrator", () => {
216
+ it("uses seasonal when multi-period history is available", () => {
217
+ const HOUR = 3600_000;
218
+ const start = Date.UTC(2026, 0, 1);
219
+ const pts = [];
220
+ for (let i = 0; i < 24 * 5; i++) {
221
+ const h = i % 24;
222
+ pts.push({ timestamp: start + i * HOUR, value: (h >= 22 || h <= 5 ? 10 : 100) + (i % 3) });
223
+ }
224
+ const r = detectAnomaly(pts, { metricKind: "generic" });
225
+ assert.equal(r.method, "seasonal");
226
+ });
227
+ it("falls back to robust when history is too short", () => {
228
+ const pts = Array.from({ length: 30 }, (_, i) => ({
229
+ timestamp: 1_700_000_000_000 + i * 60_000,
230
+ value: 100 + (i % 3),
231
+ }));
232
+ const r = detectAnomaly(pts, { metricKind: "generic" });
233
+ assert.ok(r.method === "none" || r.method === "robust-z" || r.method === "trend");
234
+ });
235
+ });
@@ -0,0 +1,31 @@
1
+ import { type MetricKind, type SeasonalPoint } from "./anomaly.js";
2
+ export interface BacktestCase {
3
+ name: string;
4
+ /** Regime label for the per-category breakdown. */
5
+ category: string;
6
+ points: SeasonalPoint[];
7
+ metricKind: MetricKind;
8
+ /** Ground truth. */
9
+ anomalous: boolean;
10
+ }
11
+ /**
12
+ * Build the labelled suite. Multiple seeded variants per regime so the
13
+ * precision/recall figures are stable rather than hostage to one sample.
14
+ */
15
+ export declare function buildSuite(): BacktestCase[];
16
+ export interface BacktestReport {
17
+ total: number;
18
+ tp: number;
19
+ fp: number;
20
+ tn: number;
21
+ fn: number;
22
+ precision: number;
23
+ recall: number;
24
+ f1: number;
25
+ byCategory: Record<string, {
26
+ total: number;
27
+ correct: number;
28
+ }>;
29
+ }
30
+ export declare function runBacktest(cases?: BacktestCase[]): BacktestReport;
31
+ export declare function formatReport(r: BacktestReport): string;