@thotischner/observability-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/sources.yaml +45 -0
- package/dist/analysis/anomaly.d.ts +24 -0
- package/dist/analysis/anomaly.js +50 -0
- package/dist/analysis/anomaly.test.d.ts +1 -0
- package/dist/analysis/anomaly.test.js +87 -0
- package/dist/analysis/correlator.d.ts +7 -0
- package/dist/analysis/correlator.js +31 -0
- package/dist/analysis/correlator.test.d.ts +1 -0
- package/dist/analysis/correlator.test.js +53 -0
- package/dist/analysis/health.d.ts +19 -0
- package/dist/analysis/health.js +34 -0
- package/dist/analysis/health.test.d.ts +1 -0
- package/dist/analysis/health.test.js +70 -0
- package/dist/config/loader.d.ts +5 -0
- package/dist/config/loader.js +81 -0
- package/dist/config/loader.test.d.ts +1 -0
- package/dist/config/loader.test.js +163 -0
- package/dist/connectors/interface.d.ts +17 -0
- package/dist/connectors/interface.js +1 -0
- package/dist/connectors/loki.d.ts +25 -0
- package/dist/connectors/loki.js +182 -0
- package/dist/connectors/loki.test.d.ts +1 -0
- package/dist/connectors/loki.test.js +111 -0
- package/dist/connectors/prometheus.d.ts +28 -0
- package/dist/connectors/prometheus.js +196 -0
- package/dist/connectors/prometheus.test.d.ts +1 -0
- package/dist/connectors/prometheus.test.js +103 -0
- package/dist/connectors/registry.d.ts +18 -0
- package/dist/connectors/registry.js +90 -0
- package/dist/connectors/registry.test.d.ts +1 -0
- package/dist/connectors/registry.test.js +93 -0
- package/dist/connectors/tls.d.ts +7 -0
- package/dist/connectors/tls.js +25 -0
- package/dist/connectors/tls.test.d.ts +1 -0
- package/dist/connectors/tls.test.js +99 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +421 -0
- package/dist/tools/detect-anomalies.d.ts +33 -0
- package/dist/tools/detect-anomalies.js +137 -0
- package/dist/tools/get-service-health.d.ts +25 -0
- package/dist/tools/get-service-health.js +111 -0
- package/dist/tools/handlers.test.d.ts +1 -0
- package/dist/tools/handlers.test.js +138 -0
- package/dist/tools/list-services.d.ts +22 -0
- package/dist/tools/list-services.js +57 -0
- package/dist/tools/list-sources.d.ts +15 -0
- package/dist/tools/list-sources.js +27 -0
- package/dist/tools/query-logs.d.ts +49 -0
- package/dist/tools/query-logs.js +93 -0
- package/dist/tools/query-metrics.d.ts +44 -0
- package/dist/tools/query-metrics.js +91 -0
- package/dist/tools/validation.d.ts +17 -0
- package/dist/tools/validation.js +45 -0
- package/dist/tools/validation.test.d.ts +1 -0
- package/dist/tools/validation.test.js +84 -0
- package/dist/types.d.ts +171 -0
- package/dist/types.js +1 -0
- package/dist/ui/index.html +675 -0
- package/package.json +35 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
sources:
|
|
2
|
+
- name: prometheus
|
|
3
|
+
type: prometheus
|
|
4
|
+
url: http://prometheus:9090
|
|
5
|
+
enabled: true
|
|
6
|
+
# metrics: # Uncomment to override default Prometheus metrics
|
|
7
|
+
# - name: custom_metric
|
|
8
|
+
# query: 'my_custom_metric{job="{{service}}"}'
|
|
9
|
+
# unit: requests
|
|
10
|
+
# description: My custom metric
|
|
11
|
+
|
|
12
|
+
- name: loki
|
|
13
|
+
type: loki
|
|
14
|
+
url: http://loki:3100
|
|
15
|
+
enabled: true
|
|
16
|
+
|
|
17
|
+
settings:
|
|
18
|
+
checkIntervalMs: 30000
|
|
19
|
+
defaultSensitivity: medium
|
|
20
|
+
|
|
21
|
+
healthThresholds:
|
|
22
|
+
weights:
|
|
23
|
+
errorRate: 0.35
|
|
24
|
+
latency: 0.25
|
|
25
|
+
cpu: 0.20
|
|
26
|
+
logErrors: 0.20
|
|
27
|
+
cpu:
|
|
28
|
+
good: 50
|
|
29
|
+
warn: 80
|
|
30
|
+
crit: 95
|
|
31
|
+
errorRate:
|
|
32
|
+
good: 0.01
|
|
33
|
+
warn: 0.1
|
|
34
|
+
crit: 0.5
|
|
35
|
+
latencyP99:
|
|
36
|
+
good: 0.5
|
|
37
|
+
warn: 1.0
|
|
38
|
+
crit: 3.0
|
|
39
|
+
logErrors:
|
|
40
|
+
good: 1
|
|
41
|
+
warn: 5
|
|
42
|
+
crit: 20
|
|
43
|
+
statusBoundaries:
|
|
44
|
+
healthy: 80
|
|
45
|
+
degraded: 50
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { AnomalySeverity } from "../types.js";
|
|
2
|
+
export interface ZScoreResult {
|
|
3
|
+
mean: number;
|
|
4
|
+
stdDev: number;
|
|
5
|
+
zScores: number[];
|
|
6
|
+
}
|
|
7
|
+
export declare function calculateZScore(values: number[]): ZScoreResult;
|
|
8
|
+
export interface AnomalyPoint {
|
|
9
|
+
index: number;
|
|
10
|
+
value: number;
|
|
11
|
+
zScore: number;
|
|
12
|
+
severity: AnomalySeverity;
|
|
13
|
+
}
|
|
14
|
+
export declare function detectAnomalyPoints(values: number[], threshold?: number): AnomalyPoint[];
|
|
15
|
+
/**
|
|
16
|
+
* Check if the most recent values deviate significantly from the baseline.
|
|
17
|
+
* Compares the last `recentWindow` values against the rest.
|
|
18
|
+
*/
|
|
19
|
+
export declare function detectRecentAnomaly(values: number[], recentWindow?: number, threshold?: number): {
|
|
20
|
+
isAnomaly: boolean;
|
|
21
|
+
zScore: number;
|
|
22
|
+
recentAvg: number;
|
|
23
|
+
baselineAvg: number;
|
|
24
|
+
};
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export function calculateZScore(values) {
|
|
2
|
+
if (values.length === 0)
|
|
3
|
+
return { mean: 0, stdDev: 0, zScores: [] };
|
|
4
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
5
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
6
|
+
const stdDev = Math.sqrt(variance);
|
|
7
|
+
const zScores = stdDev === 0
|
|
8
|
+
? values.map(() => 0)
|
|
9
|
+
: values.map((v) => (v - mean) / stdDev);
|
|
10
|
+
return { mean, stdDev, zScores };
|
|
11
|
+
}
|
|
12
|
+
export function detectAnomalyPoints(values, threshold = 2.0) {
|
|
13
|
+
const { mean, stdDev, zScores } = calculateZScore(values);
|
|
14
|
+
if (stdDev === 0)
|
|
15
|
+
return [];
|
|
16
|
+
const anomalies = [];
|
|
17
|
+
for (let i = 0; i < values.length; i++) {
|
|
18
|
+
const absZ = Math.abs(zScores[i]);
|
|
19
|
+
if (absZ >= threshold) {
|
|
20
|
+
anomalies.push({
|
|
21
|
+
index: i,
|
|
22
|
+
value: values[i],
|
|
23
|
+
zScore: zScores[i],
|
|
24
|
+
severity: absZ >= 3 ? "high" : absZ >= 2 ? "medium" : "low",
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return anomalies;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Check if the most recent values deviate significantly from the baseline.
|
|
32
|
+
* Compares the last `recentWindow` values against the rest.
|
|
33
|
+
*/
|
|
34
|
+
export function detectRecentAnomaly(values, recentWindow = 5, threshold = 2.0) {
|
|
35
|
+
if (values.length < recentWindow + 5) {
|
|
36
|
+
return { isAnomaly: false, zScore: 0, recentAvg: 0, baselineAvg: 0 };
|
|
37
|
+
}
|
|
38
|
+
const baseline = values.slice(0, -recentWindow);
|
|
39
|
+
const recent = values.slice(-recentWindow);
|
|
40
|
+
const baselineAvg = baseline.reduce((a, b) => a + b, 0) / baseline.length;
|
|
41
|
+
const baselineStdDev = Math.sqrt(baseline.reduce((sum, v) => sum + (v - baselineAvg) ** 2, 0) / baseline.length);
|
|
42
|
+
const recentAvg = recent.reduce((a, b) => a + b, 0) / recent.length;
|
|
43
|
+
const zScore = baselineStdDev === 0 ? 0 : (recentAvg - baselineAvg) / baselineStdDev;
|
|
44
|
+
return {
|
|
45
|
+
isAnomaly: Math.abs(zScore) >= threshold,
|
|
46
|
+
zScore,
|
|
47
|
+
recentAvg,
|
|
48
|
+
baselineAvg,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { calculateZScore, detectAnomalyPoints, detectRecentAnomaly } from "./anomaly.js";
|
|
4
|
+
describe("calculateZScore", () => {
|
|
5
|
+
it("returns zeros for empty array", () => {
|
|
6
|
+
const result = calculateZScore([]);
|
|
7
|
+
assert.equal(result.mean, 0);
|
|
8
|
+
assert.equal(result.stdDev, 0);
|
|
9
|
+
assert.deepEqual(result.zScores, []);
|
|
10
|
+
});
|
|
11
|
+
it("calculates correct mean and stdDev", () => {
|
|
12
|
+
const result = calculateZScore([2, 4, 4, 4, 5, 5, 7, 9]);
|
|
13
|
+
assert.ok(Math.abs(result.mean - 5) < 0.001);
|
|
14
|
+
assert.ok(result.stdDev > 0);
|
|
15
|
+
});
|
|
16
|
+
it("returns zero z-scores for constant values", () => {
|
|
17
|
+
const result = calculateZScore([5, 5, 5, 5]);
|
|
18
|
+
assert.equal(result.mean, 5);
|
|
19
|
+
assert.equal(result.stdDev, 0);
|
|
20
|
+
assert.deepEqual(result.zScores, [0, 0, 0, 0]);
|
|
21
|
+
});
|
|
22
|
+
it("outlier has high z-score", () => {
|
|
23
|
+
const values = [10, 10, 10, 10, 10, 10, 10, 10, 10, 100];
|
|
24
|
+
const result = calculateZScore(values);
|
|
25
|
+
const lastZ = result.zScores[result.zScores.length - 1];
|
|
26
|
+
assert.ok(lastZ > 2, `Expected z-score > 2, got ${lastZ}`);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
describe("detectAnomalyPoints", () => {
|
|
30
|
+
it("returns empty for constant values", () => {
|
|
31
|
+
assert.deepEqual(detectAnomalyPoints([5, 5, 5, 5]), []);
|
|
32
|
+
});
|
|
33
|
+
it("detects outlier with default threshold", () => {
|
|
34
|
+
const values = [10, 10, 10, 10, 10, 10, 10, 10, 10, 50];
|
|
35
|
+
const anomalies = detectAnomalyPoints(values);
|
|
36
|
+
assert.ok(anomalies.length > 0);
|
|
37
|
+
assert.equal(anomalies[0].index, 9);
|
|
38
|
+
assert.ok(anomalies[0].value === 50);
|
|
39
|
+
});
|
|
40
|
+
it("respects custom threshold", () => {
|
|
41
|
+
const values = [10, 10, 10, 10, 10, 10, 10, 10, 10, 15];
|
|
42
|
+
const lowThreshold = detectAnomalyPoints(values, 1.0);
|
|
43
|
+
const highThreshold = detectAnomalyPoints(values, 3.0);
|
|
44
|
+
assert.ok(lowThreshold.length >= highThreshold.length);
|
|
45
|
+
});
|
|
46
|
+
it("assigns correct severity", () => {
|
|
47
|
+
const values = [10, 10, 10, 10, 10, 10, 10, 10, 10, 100];
|
|
48
|
+
const anomalies = detectAnomalyPoints(values);
|
|
49
|
+
assert.ok(anomalies.length > 0);
|
|
50
|
+
assert.ok(["low", "medium", "high"].includes(anomalies[0].severity));
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
describe("detectRecentAnomaly", () => {
|
|
54
|
+
it("returns no anomaly for insufficient data", () => {
|
|
55
|
+
const result = detectRecentAnomaly([1, 2, 3]);
|
|
56
|
+
assert.equal(result.isAnomaly, false);
|
|
57
|
+
});
|
|
58
|
+
it("returns no anomaly for stable data", () => {
|
|
59
|
+
const values = Array(20).fill(10);
|
|
60
|
+
const result = detectRecentAnomaly(values);
|
|
61
|
+
assert.equal(result.isAnomaly, false);
|
|
62
|
+
});
|
|
63
|
+
it("detects spike in recent values", () => {
|
|
64
|
+
// Baseline needs some variance so stdDev > 0
|
|
65
|
+
const baseline = Array.from({ length: 20 }, (_, i) => 10 + (i % 3));
|
|
66
|
+
const spike = Array(5).fill(50);
|
|
67
|
+
const result = detectRecentAnomaly([...baseline, ...spike]);
|
|
68
|
+
assert.equal(result.isAnomaly, true);
|
|
69
|
+
assert.ok(result.zScore > 0);
|
|
70
|
+
assert.ok(result.recentAvg > 40);
|
|
71
|
+
});
|
|
72
|
+
it("detects drop in recent values", () => {
|
|
73
|
+
const baseline = Array.from({ length: 20 }, (_, i) => 50 + (i % 3));
|
|
74
|
+
const drop = Array(5).fill(5);
|
|
75
|
+
const result = detectRecentAnomaly([...baseline, ...drop]);
|
|
76
|
+
assert.equal(result.isAnomaly, true);
|
|
77
|
+
assert.ok(result.zScore < 0);
|
|
78
|
+
});
|
|
79
|
+
it("respects custom threshold", () => {
|
|
80
|
+
const baseline = Array(20).fill(10);
|
|
81
|
+
const slight = Array(5).fill(13);
|
|
82
|
+
const lowResult = detectRecentAnomaly([...baseline, ...slight], 5, 1.0);
|
|
83
|
+
const highResult = detectRecentAnomaly([...baseline, ...slight], 5, 3.0);
|
|
84
|
+
// Slight increase might trigger low threshold but not high
|
|
85
|
+
assert.ok(lowResult.isAnomaly || !highResult.isAnomaly);
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { AnomalyReport, LogResult, MetricResult } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Cross-signal correlation: find relationships between metric anomalies and log patterns.
|
|
4
|
+
* Simple timestamp-based approach: if a metric spike and error log increase happen
|
|
5
|
+
* in the same time window, they are correlated.
|
|
6
|
+
*/
|
|
7
|
+
export declare function correlateSignals(anomalies: AnomalyReport[], logResults: LogResult[], metricResults: MetricResult[]): string[];
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-signal correlation: find relationships between metric anomalies and log patterns.
|
|
3
|
+
* Simple timestamp-based approach: if a metric spike and error log increase happen
|
|
4
|
+
* in the same time window, they are correlated.
|
|
5
|
+
*/
|
|
6
|
+
export function correlateSignals(anomalies, logResults, metricResults) {
|
|
7
|
+
const correlations = [];
|
|
8
|
+
for (const anomaly of anomalies) {
|
|
9
|
+
// Check if there's a corresponding log signal
|
|
10
|
+
const serviceLogs = logResults.find((l) => l.service === anomaly.service);
|
|
11
|
+
if (serviceLogs && serviceLogs.summary.errorCount > 0) {
|
|
12
|
+
const errorPct = serviceLogs.summary.total > 0
|
|
13
|
+
? Math.round((serviceLogs.summary.errorCount / serviceLogs.summary.total) * 100)
|
|
14
|
+
: 0;
|
|
15
|
+
correlations.push(`${anomaly.service}: ${anomaly.metric} anomaly (${anomaly.severity}) correlates with ` +
|
|
16
|
+
`${serviceLogs.summary.errorCount} error logs (${errorPct}% of total). ` +
|
|
17
|
+
`Top error: ${serviceLogs.summary.topPatterns[0] || "N/A"}`);
|
|
18
|
+
}
|
|
19
|
+
// Check for metric cross-correlations (e.g., CPU spike + latency increase)
|
|
20
|
+
const serviceMetrics = metricResults.filter((m) => m.service === anomaly.service);
|
|
21
|
+
for (const metric of serviceMetrics) {
|
|
22
|
+
if (metric.metric === anomaly.metric)
|
|
23
|
+
continue;
|
|
24
|
+
if (metric.summary.trend === "rising") {
|
|
25
|
+
correlations.push(`${anomaly.service}: ${anomaly.metric} anomaly coincides with rising ${metric.metric} ` +
|
|
26
|
+
`(current: ${metric.summary.current.toFixed(2)})`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return [...new Set(correlations)]; // Deduplicate
|
|
31
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { correlateSignals } from "./correlator.js";
|
|
4
|
+
describe("correlateSignals", () => {
|
|
5
|
+
it("returns empty for no anomalies", () => {
|
|
6
|
+
const result = correlateSignals([], [], []);
|
|
7
|
+
assert.deepEqual(result, []);
|
|
8
|
+
});
|
|
9
|
+
it("correlates metric anomaly with error logs", () => {
|
|
10
|
+
const anomalies = [{
|
|
11
|
+
metric: "cpu", severity: "high", description: "cpu spike",
|
|
12
|
+
currentValue: 95, baselineValue: 20, deviationPercent: 375,
|
|
13
|
+
source: "prometheus", service: "api-gateway",
|
|
14
|
+
}];
|
|
15
|
+
const logs = [{
|
|
16
|
+
source: "loki", service: "api-gateway",
|
|
17
|
+
entries: [],
|
|
18
|
+
summary: { total: 100, errorCount: 30, warnCount: 5, topPatterns: ["NullPointerException (15x)"] },
|
|
19
|
+
}];
|
|
20
|
+
const result = correlateSignals(anomalies, logs, []);
|
|
21
|
+
assert.ok(result.length > 0);
|
|
22
|
+
assert.ok(result[0].includes("api-gateway"));
|
|
23
|
+
assert.ok(result[0].includes("cpu"));
|
|
24
|
+
});
|
|
25
|
+
it("correlates metric cross-signals", () => {
|
|
26
|
+
const anomalies = [{
|
|
27
|
+
metric: "cpu", severity: "high", description: "cpu spike",
|
|
28
|
+
currentValue: 95, baselineValue: 20, deviationPercent: 375,
|
|
29
|
+
source: "prometheus", service: "payment",
|
|
30
|
+
}];
|
|
31
|
+
const metrics = [{
|
|
32
|
+
source: "prometheus", service: "payment", metric: "latency_p99",
|
|
33
|
+
unit: "seconds", values: [], summary: { current: 2.5, average: 0.5, min: 0.3, max: 3.0, trend: "rising" },
|
|
34
|
+
}];
|
|
35
|
+
const result = correlateSignals(anomalies, [], metrics);
|
|
36
|
+
assert.ok(result.some(c => c.includes("latency_p99") && c.includes("rising")));
|
|
37
|
+
});
|
|
38
|
+
it("does not duplicate correlations", () => {
|
|
39
|
+
const anomalies = [{
|
|
40
|
+
metric: "cpu", severity: "medium", description: "test",
|
|
41
|
+
currentValue: 80, baselineValue: 20, deviationPercent: 300,
|
|
42
|
+
source: "prometheus", service: "svc",
|
|
43
|
+
}];
|
|
44
|
+
const logs = [{
|
|
45
|
+
source: "loki", service: "svc",
|
|
46
|
+
entries: [],
|
|
47
|
+
summary: { total: 10, errorCount: 5, warnCount: 0, topPatterns: ["err"] },
|
|
48
|
+
}];
|
|
49
|
+
const result = correlateSignals(anomalies, logs, []);
|
|
50
|
+
const unique = new Set(result);
|
|
51
|
+
assert.equal(result.length, unique.size);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { HealthStatus, HealthThresholds } from "../types.js";
|
|
2
|
+
interface HealthInputs {
|
|
3
|
+
cpu: number;
|
|
4
|
+
memory: number;
|
|
5
|
+
errorRate: number;
|
|
6
|
+
latencyP99: number;
|
|
7
|
+
logErrorRate: number;
|
|
8
|
+
}
|
|
9
|
+
interface HealthResult {
|
|
10
|
+
score: number;
|
|
11
|
+
status: HealthStatus;
|
|
12
|
+
details: Record<string, {
|
|
13
|
+
score: number;
|
|
14
|
+
value: number;
|
|
15
|
+
threshold: string;
|
|
16
|
+
}>;
|
|
17
|
+
}
|
|
18
|
+
export declare function calculateHealthScore(inputs: HealthInputs, thresholds: HealthThresholds): HealthResult;
|
|
19
|
+
export {};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export function calculateHealthScore(inputs, thresholds) {
|
|
2
|
+
const w = thresholds.weights;
|
|
3
|
+
const t = thresholds;
|
|
4
|
+
const cpuScore = scoreFromThreshold(inputs.cpu, t.cpu.good, t.cpu.warn, t.cpu.crit);
|
|
5
|
+
const errorRateScore = scoreFromThreshold(inputs.errorRate, t.errorRate.good, t.errorRate.warn, t.errorRate.crit);
|
|
6
|
+
const latencyScore = scoreFromThreshold(inputs.latencyP99, t.latencyP99.good, t.latencyP99.warn, t.latencyP99.crit);
|
|
7
|
+
const logErrorScore = scoreFromThreshold(inputs.logErrorRate, t.logErrors.good, t.logErrors.warn, t.logErrors.crit);
|
|
8
|
+
const weightedScore = cpuScore * w.cpu +
|
|
9
|
+
errorRateScore * w.errorRate +
|
|
10
|
+
latencyScore * w.latency +
|
|
11
|
+
logErrorScore * w.logErrors;
|
|
12
|
+
const score = Math.round(Math.max(0, Math.min(100, weightedScore)));
|
|
13
|
+
const status = score > t.statusBoundaries.healthy ? "healthy" :
|
|
14
|
+
score > t.statusBoundaries.degraded ? "degraded" : "critical";
|
|
15
|
+
return {
|
|
16
|
+
score,
|
|
17
|
+
status,
|
|
18
|
+
details: {
|
|
19
|
+
cpu: { score: Math.round(cpuScore), value: inputs.cpu, threshold: `warn >${t.cpu.warn}%, crit >${t.cpu.crit}%` },
|
|
20
|
+
errorRate: { score: Math.round(errorRateScore), value: inputs.errorRate, threshold: `warn >${t.errorRate.warn}/s, crit >${t.errorRate.crit}/s` },
|
|
21
|
+
latencyP99: { score: Math.round(latencyScore), value: inputs.latencyP99, threshold: `warn >${t.latencyP99.warn}s, crit >${t.latencyP99.crit}s` },
|
|
22
|
+
logErrors: { score: Math.round(logErrorScore), value: inputs.logErrorRate, threshold: `warn >${t.logErrors.warn}/min, crit >${t.logErrors.crit}/min` },
|
|
23
|
+
},
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
function scoreFromThreshold(value, good, warn, crit) {
|
|
27
|
+
if (value <= good)
|
|
28
|
+
return 100;
|
|
29
|
+
if (value <= warn)
|
|
30
|
+
return 100 - ((value - good) / (warn - good)) * 40;
|
|
31
|
+
if (value <= crit)
|
|
32
|
+
return 60 - ((value - warn) / (crit - warn)) * 40;
|
|
33
|
+
return Math.max(0, 20 - ((value - crit) / crit) * 20);
|
|
34
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { calculateHealthScore } from "./health.js";
|
|
4
|
+
const defaults = {
|
|
5
|
+
weights: { errorRate: 0.35, latency: 0.25, cpu: 0.20, logErrors: 0.20 },
|
|
6
|
+
cpu: { good: 50, warn: 80, crit: 95 },
|
|
7
|
+
errorRate: { good: 0.01, warn: 0.1, crit: 0.5 },
|
|
8
|
+
latencyP99: { good: 0.5, warn: 1.0, crit: 3.0 },
|
|
9
|
+
logErrors: { good: 1, warn: 5, crit: 20 },
|
|
10
|
+
statusBoundaries: { healthy: 80, degraded: 50 },
|
|
11
|
+
};
|
|
12
|
+
describe("calculateHealthScore", () => {
|
|
13
|
+
it("returns healthy for all-zero inputs", () => {
|
|
14
|
+
const result = calculateHealthScore({
|
|
15
|
+
cpu: 0, memory: 0, errorRate: 0, latencyP99: 0, logErrorRate: 0,
|
|
16
|
+
}, defaults);
|
|
17
|
+
assert.equal(result.status, "healthy");
|
|
18
|
+
assert.equal(result.score, 100);
|
|
19
|
+
});
|
|
20
|
+
it("returns healthy for normal values", () => {
|
|
21
|
+
const result = calculateHealthScore({
|
|
22
|
+
cpu: 20, memory: 100_000_000, errorRate: 0.005, latencyP99: 0.3, logErrorRate: 0,
|
|
23
|
+
}, defaults);
|
|
24
|
+
assert.equal(result.status, "healthy");
|
|
25
|
+
assert.ok(result.score > 80);
|
|
26
|
+
});
|
|
27
|
+
it("returns degraded for elevated values", () => {
|
|
28
|
+
const result = calculateHealthScore({
|
|
29
|
+
cpu: 65, memory: 200_000_000, errorRate: 0.05, latencyP99: 0.8, logErrorRate: 3,
|
|
30
|
+
}, defaults);
|
|
31
|
+
assert.equal(result.status, "degraded");
|
|
32
|
+
assert.ok(result.score > 50 && result.score <= 80, `Expected degraded score 50-80, got ${result.score}`);
|
|
33
|
+
});
|
|
34
|
+
it("returns critical for extreme values", () => {
|
|
35
|
+
const result = calculateHealthScore({
|
|
36
|
+
cpu: 98, memory: 500_000_000, errorRate: 1.0, latencyP99: 5.0, logErrorRate: 50,
|
|
37
|
+
}, defaults);
|
|
38
|
+
assert.equal(result.status, "critical");
|
|
39
|
+
assert.ok(result.score <= 50);
|
|
40
|
+
});
|
|
41
|
+
it("score is between 0 and 100", () => {
|
|
42
|
+
for (const cpu of [0, 50, 100]) {
|
|
43
|
+
for (const err of [0, 0.5, 2]) {
|
|
44
|
+
const result = calculateHealthScore({
|
|
45
|
+
cpu, memory: 0, errorRate: err, latencyP99: 0, logErrorRate: 0,
|
|
46
|
+
}, defaults);
|
|
47
|
+
assert.ok(result.score >= 0 && result.score <= 100, `Score ${result.score} out of range for cpu=${cpu} err=${err}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
it("respects custom thresholds", () => {
|
|
52
|
+
const strict = {
|
|
53
|
+
...defaults,
|
|
54
|
+
cpu: { good: 10, warn: 20, crit: 30 },
|
|
55
|
+
};
|
|
56
|
+
const result = calculateHealthScore({
|
|
57
|
+
cpu: 25, memory: 0, errorRate: 0, latencyP99: 0, logErrorRate: 0,
|
|
58
|
+
}, strict);
|
|
59
|
+
// CPU 25% with strict thresholds should lower the score
|
|
60
|
+
assert.ok(result.score < 100);
|
|
61
|
+
});
|
|
62
|
+
it("includes detail breakdown", () => {
|
|
63
|
+
const result = calculateHealthScore({
|
|
64
|
+
cpu: 60, memory: 0, errorRate: 0, latencyP99: 0, logErrorRate: 0,
|
|
65
|
+
}, defaults);
|
|
66
|
+
assert.ok("cpu" in result.details);
|
|
67
|
+
assert.ok("errorRate" in result.details);
|
|
68
|
+
assert.ok(result.details.cpu.score < 100);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Config, GeneralSettings, HealthThresholds } from "../types.js";
|
|
2
|
+
export declare const DEFAULT_SETTINGS: GeneralSettings;
|
|
3
|
+
export declare const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds;
|
|
4
|
+
export declare function loadConfig(): Config;
|
|
5
|
+
export declare function saveConfig(config: Config): void;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import yaml from "js-yaml";
|
|
5
|
+
function resolveConfigPath() {
|
|
6
|
+
if (process.env.CONFIG_PATH)
|
|
7
|
+
return process.env.CONFIG_PATH;
|
|
8
|
+
const localPath = "./config/sources.yaml";
|
|
9
|
+
if (existsSync(localPath))
|
|
10
|
+
return localPath;
|
|
11
|
+
return join(homedir(), ".observability-mcp", "sources.yaml");
|
|
12
|
+
}
|
|
13
|
+
const CONFIG_PATH = resolveConfigPath();
|
|
14
|
+
export const DEFAULT_SETTINGS = {
|
|
15
|
+
checkIntervalMs: 30000,
|
|
16
|
+
defaultSensitivity: "medium",
|
|
17
|
+
};
|
|
18
|
+
export const DEFAULT_HEALTH_THRESHOLDS = {
|
|
19
|
+
weights: { errorRate: 0.35, latency: 0.25, cpu: 0.20, logErrors: 0.20 },
|
|
20
|
+
cpu: { good: 50, warn: 80, crit: 95 },
|
|
21
|
+
errorRate: { good: 0.01, warn: 0.1, crit: 0.5 },
|
|
22
|
+
latencyP99: { good: 0.5, warn: 1.0, crit: 3.0 },
|
|
23
|
+
logErrors: { good: 1, warn: 5, crit: 20 },
|
|
24
|
+
statusBoundaries: { healthy: 80, degraded: 50 },
|
|
25
|
+
};
|
|
26
|
+
export function loadConfig() {
|
|
27
|
+
try {
|
|
28
|
+
const raw = readFileSync(CONFIG_PATH, "utf-8");
|
|
29
|
+
const parsed = yaml.load(raw);
|
|
30
|
+
return {
|
|
31
|
+
sources: parsed?.sources || [],
|
|
32
|
+
settings: { ...DEFAULT_SETTINGS, ...parsed?.settings },
|
|
33
|
+
healthThresholds: deepMerge(DEFAULT_HEALTH_THRESHOLDS, (parsed?.healthThresholds || {})),
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
console.warn(`Config file not found at ${CONFIG_PATH}, using env vars + defaults`);
|
|
38
|
+
return buildConfigFromEnv();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
function parseUrlList(envVar, type) {
|
|
42
|
+
if (!envVar)
|
|
43
|
+
return [];
|
|
44
|
+
return envVar.split(",").map((url, i, arr) => ({
|
|
45
|
+
name: arr.length === 1 ? type : `${type}-${i + 1}`,
|
|
46
|
+
type,
|
|
47
|
+
url: url.trim(),
|
|
48
|
+
enabled: true,
|
|
49
|
+
}));
|
|
50
|
+
}
|
|
51
|
+
function buildConfigFromEnv() {
|
|
52
|
+
const sources = [
|
|
53
|
+
...parseUrlList(process.env.PROMETHEUS_URL, "prometheus"),
|
|
54
|
+
...parseUrlList(process.env.LOKI_URL, "loki"),
|
|
55
|
+
];
|
|
56
|
+
return {
|
|
57
|
+
sources,
|
|
58
|
+
settings: { ...DEFAULT_SETTINGS },
|
|
59
|
+
healthThresholds: DEFAULT_HEALTH_THRESHOLDS,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
export function saveConfig(config) {
|
|
63
|
+
const dir = dirname(CONFIG_PATH);
|
|
64
|
+
mkdirSync(dir, { recursive: true });
|
|
65
|
+
const yamlStr = yaml.dump(config, { indent: 2, lineWidth: 200 });
|
|
66
|
+
writeFileSync(CONFIG_PATH, yamlStr, "utf-8");
|
|
67
|
+
}
|
|
68
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
69
|
+
function deepMerge(base, override) {
|
|
70
|
+
const result = { ...base };
|
|
71
|
+
for (const key of Object.keys(override)) {
|
|
72
|
+
const val = override[key];
|
|
73
|
+
if (val && typeof val === "object" && !Array.isArray(val) && typeof base[key] === "object") {
|
|
74
|
+
result[key] = deepMerge(base[key], val);
|
|
75
|
+
}
|
|
76
|
+
else if (val !== undefined) {
|
|
77
|
+
result[key] = val;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|