@contractspec/lib.observability 1.57.0 → 1.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/anomaly/alert-manager.d.ts +17 -0
- package/dist/anomaly/alert-manager.js +24 -0
- package/dist/anomaly/anomaly-detector.d.ts +22 -0
- package/dist/anomaly/anomaly-detector.js +102 -0
- package/dist/anomaly/baseline-calculator.d.ts +23 -0
- package/dist/anomaly/baseline-calculator.js +40 -0
- package/dist/anomaly/root-cause-analyzer.d.ts +19 -0
- package/dist/anomaly/root-cause-analyzer.js +32 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +1078 -0
- package/dist/intent/aggregator.d.ts +57 -0
- package/dist/intent/aggregator.js +110 -0
- package/dist/intent/detector.d.ts +28 -0
- package/dist/intent/detector.js +133 -0
- package/dist/logging/index.d.ts +17 -0
- package/dist/logging/index.js +42 -0
- package/dist/metrics/index.d.ts +12 -0
- package/dist/metrics/index.js +31 -0
- package/dist/node/anomaly/alert-manager.js +23 -0
- package/dist/node/anomaly/anomaly-detector.js +101 -0
- package/dist/node/anomaly/baseline-calculator.js +39 -0
- package/dist/node/anomaly/root-cause-analyzer.js +31 -0
- package/dist/node/index.js +1077 -0
- package/dist/node/intent/aggregator.js +109 -0
- package/dist/node/intent/detector.js +132 -0
- package/dist/node/logging/index.js +41 -0
- package/dist/node/metrics/index.js +30 -0
- package/dist/node/pipeline/evolution-pipeline.js +299 -0
- package/dist/node/pipeline/lifecycle-pipeline.js +85 -0
- package/dist/node/telemetry/posthog-baseline-reader.js +308 -0
- package/dist/node/telemetry/posthog-telemetry.js +60 -0
- package/dist/node/tracing/index.js +52 -0
- package/dist/node/tracing/middleware.js +150 -0
- package/dist/pipeline/evolution-pipeline.d.ts +36 -0
- package/dist/pipeline/evolution-pipeline.js +300 -0
- package/dist/pipeline/lifecycle-pipeline.d.ts +40 -0
- package/dist/pipeline/lifecycle-pipeline.js +86 -0
- package/dist/telemetry/posthog-baseline-reader.d.ts +27 -0
- package/dist/telemetry/posthog-baseline-reader.js +309 -0
- package/dist/telemetry/posthog-telemetry.d.ts +15 -0
- package/dist/telemetry/posthog-telemetry.js +61 -0
- package/dist/tracing/index.d.ts +5 -0
- package/dist/tracing/index.js +53 -0
- package/dist/tracing/middleware.d.ts +15 -0
- package/dist/tracing/middleware.js +151 -0
- package/package.json +140 -43
- package/dist/anomaly/alert-manager.d.mts +0 -21
- package/dist/anomaly/alert-manager.mjs +0 -23
- package/dist/anomaly/anomaly-detector.d.mts +0 -26
- package/dist/anomaly/anomaly-detector.mjs +0 -58
- package/dist/anomaly/baseline-calculator.d.mts +0 -26
- package/dist/anomaly/baseline-calculator.mjs +0 -37
- package/dist/anomaly/root-cause-analyzer.d.mts +0 -23
- package/dist/anomaly/root-cause-analyzer.mjs +0 -27
- package/dist/index.d.mts +0 -15
- package/dist/index.mjs +0 -16
- package/dist/intent/aggregator.d.mts +0 -60
- package/dist/intent/aggregator.mjs +0 -98
- package/dist/intent/detector.d.mts +0 -32
- package/dist/intent/detector.mjs +0 -122
- package/dist/logging/index.d.mts +0 -20
- package/dist/logging/index.mjs +0 -40
- package/dist/metrics/index.d.mts +0 -17
- package/dist/metrics/index.mjs +0 -26
- package/dist/pipeline/evolution-pipeline.d.mts +0 -40
- package/dist/pipeline/evolution-pipeline.mjs +0 -66
- package/dist/pipeline/lifecycle-pipeline.d.mts +0 -44
- package/dist/pipeline/lifecycle-pipeline.mjs +0 -73
- package/dist/telemetry/posthog-baseline-reader.d.mts +0 -31
- package/dist/telemetry/posthog-baseline-reader.mjs +0 -266
- package/dist/telemetry/posthog-telemetry.d.mts +0 -19
- package/dist/telemetry/posthog-telemetry.mjs +0 -61
- package/dist/tracing/index.d.mts +0 -9
- package/dist/tracing/index.mjs +0 -47
- package/dist/tracing/middleware.d.mts +0 -19
- package/dist/tracing/middleware.mjs +0 -80
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
export interface TelemetrySample {
|
|
2
|
+
operation: {
|
|
3
|
+
name: string;
|
|
4
|
+
version: string;
|
|
5
|
+
};
|
|
6
|
+
durationMs: number;
|
|
7
|
+
success: boolean;
|
|
8
|
+
timestamp: Date;
|
|
9
|
+
errorCode?: string;
|
|
10
|
+
tenantId?: string;
|
|
11
|
+
traceId?: string;
|
|
12
|
+
actorId?: string;
|
|
13
|
+
metadata?: Record<string, unknown>;
|
|
14
|
+
}
|
|
15
|
+
export interface AggregatedOperationMetrics {
|
|
16
|
+
operation: {
|
|
17
|
+
name: string;
|
|
18
|
+
version: string;
|
|
19
|
+
};
|
|
20
|
+
totalCalls: number;
|
|
21
|
+
successRate: number;
|
|
22
|
+
errorRate: number;
|
|
23
|
+
averageLatencyMs: number;
|
|
24
|
+
p95LatencyMs: number;
|
|
25
|
+
p99LatencyMs: number;
|
|
26
|
+
maxLatencyMs: number;
|
|
27
|
+
windowStart: Date;
|
|
28
|
+
windowEnd: Date;
|
|
29
|
+
topErrors: Record<string, number>;
|
|
30
|
+
}
|
|
31
|
+
export interface OperationSequence {
|
|
32
|
+
steps: string[];
|
|
33
|
+
tenantId?: string;
|
|
34
|
+
count: number;
|
|
35
|
+
}
|
|
36
|
+
export interface IntentAggregatorSnapshot {
|
|
37
|
+
metrics: AggregatedOperationMetrics[];
|
|
38
|
+
sequences: OperationSequence[];
|
|
39
|
+
sampleCount: number;
|
|
40
|
+
windowStart?: Date;
|
|
41
|
+
windowEnd?: Date;
|
|
42
|
+
}
|
|
43
|
+
export interface IntentAggregatorOptions {
|
|
44
|
+
windowMs?: number;
|
|
45
|
+
sequenceSampleSize?: number;
|
|
46
|
+
}
|
|
47
|
+
export declare class IntentAggregator {
|
|
48
|
+
private readonly windowMs;
|
|
49
|
+
private readonly sequenceSampleSize;
|
|
50
|
+
private readonly samples;
|
|
51
|
+
constructor(options?: IntentAggregatorOptions);
|
|
52
|
+
add(sample: TelemetrySample): void;
|
|
53
|
+
flush(now?: Date): IntentAggregatorSnapshot;
|
|
54
|
+
private aggregateMetrics;
|
|
55
|
+
private buildSequences;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=aggregator.d.ts.map
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/intent/aggregator.ts
|
|
3
|
+
var DEFAULT_WINDOW_MS = 15 * 60 * 1000;
|
|
4
|
+
|
|
5
|
+
class IntentAggregator {
|
|
6
|
+
windowMs;
|
|
7
|
+
sequenceSampleSize;
|
|
8
|
+
samples = [];
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;
|
|
11
|
+
this.sequenceSampleSize = options.sequenceSampleSize ?? 1000;
|
|
12
|
+
}
|
|
13
|
+
add(sample) {
|
|
14
|
+
this.samples.push(sample);
|
|
15
|
+
}
|
|
16
|
+
flush(now = new Date) {
|
|
17
|
+
const minTimestamp = now.getTime() - this.windowMs;
|
|
18
|
+
const windowSamples = this.samples.filter((sample) => sample.timestamp.getTime() >= minTimestamp);
|
|
19
|
+
this.samples.length = 0;
|
|
20
|
+
const metrics = this.aggregateMetrics(windowSamples);
|
|
21
|
+
const sequences = this.buildSequences(windowSamples);
|
|
22
|
+
const timestamps = windowSamples.map((sample) => sample.timestamp.getTime());
|
|
23
|
+
return {
|
|
24
|
+
metrics,
|
|
25
|
+
sequences,
|
|
26
|
+
sampleCount: windowSamples.length,
|
|
27
|
+
windowStart: timestamps.length ? new Date(Math.min(...timestamps)) : undefined,
|
|
28
|
+
windowEnd: timestamps.length ? new Date(Math.max(...timestamps)) : undefined
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
aggregateMetrics(samples) {
|
|
32
|
+
if (!samples.length)
|
|
33
|
+
return [];
|
|
34
|
+
const groups = new Map;
|
|
35
|
+
for (const sample of samples) {
|
|
36
|
+
const key = `${sample.operation.name}.v${sample.operation.version}`;
|
|
37
|
+
const arr = groups.get(key) ?? [];
|
|
38
|
+
arr.push(sample);
|
|
39
|
+
groups.set(key, arr);
|
|
40
|
+
}
|
|
41
|
+
return [...groups.values()].map((group) => {
|
|
42
|
+
const first = group[0];
|
|
43
|
+
if (!first)
|
|
44
|
+
throw new Error("Empty group in aggregation");
|
|
45
|
+
const durations = group.map((s) => s.durationMs).sort((a, b) => a - b);
|
|
46
|
+
const errors = group.filter((s) => !s.success);
|
|
47
|
+
const totalCalls = group.length;
|
|
48
|
+
const topErrors = errors.reduce((acc, sample) => {
|
|
49
|
+
if (!sample.errorCode)
|
|
50
|
+
return acc;
|
|
51
|
+
acc[sample.errorCode] = (acc[sample.errorCode] ?? 0) + 1;
|
|
52
|
+
return acc;
|
|
53
|
+
}, {});
|
|
54
|
+
const timestamps = group.map((s) => s.timestamp.getTime());
|
|
55
|
+
return {
|
|
56
|
+
operation: first.operation,
|
|
57
|
+
totalCalls,
|
|
58
|
+
successRate: (totalCalls - errors.length) / totalCalls,
|
|
59
|
+
errorRate: errors.length / totalCalls,
|
|
60
|
+
averageLatencyMs: durations.reduce((sum, value) => sum + value, 0) / totalCalls,
|
|
61
|
+
p95LatencyMs: percentile(durations, 0.95),
|
|
62
|
+
p99LatencyMs: percentile(durations, 0.99),
|
|
63
|
+
maxLatencyMs: Math.max(...durations),
|
|
64
|
+
windowStart: new Date(Math.min(...timestamps)),
|
|
65
|
+
windowEnd: new Date(Math.max(...timestamps)),
|
|
66
|
+
topErrors
|
|
67
|
+
};
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
buildSequences(samples) {
|
|
71
|
+
const byTrace = new Map;
|
|
72
|
+
for (const sample of samples.slice(-this.sequenceSampleSize)) {
|
|
73
|
+
if (!sample.traceId)
|
|
74
|
+
continue;
|
|
75
|
+
const arr = byTrace.get(sample.traceId) ?? [];
|
|
76
|
+
arr.push(sample);
|
|
77
|
+
byTrace.set(sample.traceId, arr);
|
|
78
|
+
}
|
|
79
|
+
const sequences = {};
|
|
80
|
+
for (const events of byTrace.values()) {
|
|
81
|
+
const ordered = events.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
82
|
+
const steps = ordered.map((event) => event.operation.name);
|
|
83
|
+
if (steps.length < 2)
|
|
84
|
+
continue;
|
|
85
|
+
const key = `${steps.join(">")}@${ordered[0]?.tenantId ?? "global"}`;
|
|
86
|
+
const existing = sequences[key];
|
|
87
|
+
if (existing) {
|
|
88
|
+
existing.count += 1;
|
|
89
|
+
} else {
|
|
90
|
+
sequences[key] = {
|
|
91
|
+
steps,
|
|
92
|
+
tenantId: ordered[0]?.tenantId,
|
|
93
|
+
count: 1
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return Object.values(sequences).sort((a, b) => b.count - a.count);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function percentile(values, ratio) {
|
|
101
|
+
if (!values.length)
|
|
102
|
+
return 0;
|
|
103
|
+
if (values.length === 1)
|
|
104
|
+
return values[0] ?? 0;
|
|
105
|
+
const index = Math.min(values.length - 1, Math.floor(ratio * values.length));
|
|
106
|
+
return values[index] ?? 0;
|
|
107
|
+
}
|
|
108
|
+
export {
|
|
109
|
+
IntentAggregator
|
|
110
|
+
};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { AggregatedOperationMetrics, OperationSequence } from './aggregator';
|
|
2
|
+
export type IntentSignalType = 'latency-regression' | 'error-spike' | 'throughput-drop' | 'missing-workflow-step';
|
|
3
|
+
export interface IntentSignal {
|
|
4
|
+
id: string;
|
|
5
|
+
type: IntentSignalType;
|
|
6
|
+
operation?: AggregatedOperationMetrics['operation'];
|
|
7
|
+
confidence: number;
|
|
8
|
+
description: string;
|
|
9
|
+
metadata?: Record<string, unknown>;
|
|
10
|
+
evidence: {
|
|
11
|
+
type: 'metric' | 'sequence' | 'anomaly';
|
|
12
|
+
description: string;
|
|
13
|
+
data?: Record<string, unknown>;
|
|
14
|
+
}[];
|
|
15
|
+
}
|
|
16
|
+
export interface IntentDetectorOptions {
|
|
17
|
+
errorRateThreshold?: number;
|
|
18
|
+
latencyP99ThresholdMs?: number;
|
|
19
|
+
throughputDropThreshold?: number;
|
|
20
|
+
minSequenceLength?: number;
|
|
21
|
+
}
|
|
22
|
+
export declare class IntentDetector {
|
|
23
|
+
private readonly options;
|
|
24
|
+
constructor(options?: IntentDetectorOptions);
|
|
25
|
+
detectFromMetrics(current: AggregatedOperationMetrics[], previous?: AggregatedOperationMetrics[]): IntentSignal[];
|
|
26
|
+
detectSequentialIntents(sequences: OperationSequence[]): IntentSignal[];
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=detector.d.ts.map
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/intent/detector.ts
|
|
3
|
+
import { randomUUID } from "crypto";
|
|
4
|
+
var DEFAULTS = {
|
|
5
|
+
errorRateThreshold: 0.05,
|
|
6
|
+
latencyP99ThresholdMs: 750,
|
|
7
|
+
throughputDropThreshold: 0.3,
|
|
8
|
+
minSequenceLength: 3
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
class IntentDetector {
|
|
12
|
+
options;
|
|
13
|
+
constructor(options = {}) {
|
|
14
|
+
this.options = {
|
|
15
|
+
errorRateThreshold: options.errorRateThreshold ?? DEFAULTS.errorRateThreshold,
|
|
16
|
+
latencyP99ThresholdMs: options.latencyP99ThresholdMs ?? DEFAULTS.latencyP99ThresholdMs,
|
|
17
|
+
throughputDropThreshold: options.throughputDropThreshold ?? DEFAULTS.throughputDropThreshold,
|
|
18
|
+
minSequenceLength: options.minSequenceLength ?? DEFAULTS.minSequenceLength
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
detectFromMetrics(current, previous) {
|
|
22
|
+
const signals = [];
|
|
23
|
+
const baseline = new Map((previous ?? []).map((metric) => [
|
|
24
|
+
`${metric.operation.name}.v${metric.operation.version}`,
|
|
25
|
+
metric
|
|
26
|
+
]));
|
|
27
|
+
for (const metric of current) {
|
|
28
|
+
if (metric.errorRate >= this.options.errorRateThreshold) {
|
|
29
|
+
signals.push({
|
|
30
|
+
id: randomUUID(),
|
|
31
|
+
type: "error-spike",
|
|
32
|
+
operation: metric.operation,
|
|
33
|
+
confidence: Math.min(1, metric.errorRate / this.options.errorRateThreshold),
|
|
34
|
+
description: `Error rate ${metric.errorRate.toFixed(2)} exceeded threshold`,
|
|
35
|
+
metadata: {
|
|
36
|
+
errorRate: metric.errorRate,
|
|
37
|
+
topErrors: metric.topErrors
|
|
38
|
+
},
|
|
39
|
+
evidence: [
|
|
40
|
+
{
|
|
41
|
+
type: "metric",
|
|
42
|
+
description: "error-rate",
|
|
43
|
+
data: {
|
|
44
|
+
errorRate: metric.errorRate,
|
|
45
|
+
threshold: this.options.errorRateThreshold
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
});
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
if (metric.p99LatencyMs >= this.options.latencyP99ThresholdMs) {
|
|
53
|
+
signals.push({
|
|
54
|
+
id: randomUUID(),
|
|
55
|
+
type: "latency-regression",
|
|
56
|
+
operation: metric.operation,
|
|
57
|
+
confidence: Math.min(1, metric.p99LatencyMs / this.options.latencyP99ThresholdMs),
|
|
58
|
+
description: `P99 latency ${metric.p99LatencyMs}ms exceeded threshold`,
|
|
59
|
+
metadata: { p99LatencyMs: metric.p99LatencyMs },
|
|
60
|
+
evidence: [
|
|
61
|
+
{
|
|
62
|
+
type: "metric",
|
|
63
|
+
description: "p99-latency",
|
|
64
|
+
data: {
|
|
65
|
+
p99LatencyMs: metric.p99LatencyMs,
|
|
66
|
+
threshold: this.options.latencyP99ThresholdMs
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
});
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
const base = baseline.get(`${metric.operation.name}.v${metric.operation.version}`);
|
|
74
|
+
if (base) {
|
|
75
|
+
const drop = (base.totalCalls - metric.totalCalls) / Math.max(base.totalCalls, 1);
|
|
76
|
+
if (drop >= this.options.throughputDropThreshold) {
|
|
77
|
+
signals.push({
|
|
78
|
+
id: randomUUID(),
|
|
79
|
+
type: "throughput-drop",
|
|
80
|
+
operation: metric.operation,
|
|
81
|
+
confidence: Math.min(1, drop / this.options.throughputDropThreshold),
|
|
82
|
+
description: `Throughput dropped ${(drop * 100).toFixed(1)}% vs baseline`,
|
|
83
|
+
metadata: {
|
|
84
|
+
baselineCalls: base.totalCalls,
|
|
85
|
+
currentCalls: metric.totalCalls
|
|
86
|
+
},
|
|
87
|
+
evidence: [
|
|
88
|
+
{
|
|
89
|
+
type: "metric",
|
|
90
|
+
description: "throughput-drop",
|
|
91
|
+
data: {
|
|
92
|
+
baselineCalls: base.totalCalls,
|
|
93
|
+
currentCalls: metric.totalCalls
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
]
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return signals;
|
|
102
|
+
}
|
|
103
|
+
detectSequentialIntents(sequences) {
|
|
104
|
+
const signals = [];
|
|
105
|
+
for (const sequence of sequences) {
|
|
106
|
+
if (sequence.steps.length < this.options.minSequenceLength)
|
|
107
|
+
continue;
|
|
108
|
+
const description = sequence.steps.join(" \u2192 ");
|
|
109
|
+
signals.push({
|
|
110
|
+
id: randomUUID(),
|
|
111
|
+
type: "missing-workflow-step",
|
|
112
|
+
confidence: 0.6,
|
|
113
|
+
description: `Repeated workflow detected: ${description}`,
|
|
114
|
+
metadata: {
|
|
115
|
+
steps: sequence.steps,
|
|
116
|
+
tenantId: sequence.tenantId,
|
|
117
|
+
occurrences: sequence.count
|
|
118
|
+
},
|
|
119
|
+
evidence: [
|
|
120
|
+
{
|
|
121
|
+
type: "sequence",
|
|
122
|
+
description: "sequential-calls",
|
|
123
|
+
data: { steps: sequence.steps, count: sequence.count }
|
|
124
|
+
}
|
|
125
|
+
]
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
return signals;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
export {
|
|
132
|
+
IntentDetector
|
|
133
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
2
|
+
export interface LogEntry {
|
|
3
|
+
level: LogLevel;
|
|
4
|
+
message: string;
|
|
5
|
+
[key: string]: unknown;
|
|
6
|
+
}
|
|
7
|
+
export declare class Logger {
|
|
8
|
+
private readonly serviceName;
|
|
9
|
+
constructor(serviceName: string);
|
|
10
|
+
private log;
|
|
11
|
+
debug(message: string, meta?: Record<string, unknown>): void;
|
|
12
|
+
info(message: string, meta?: Record<string, unknown>): void;
|
|
13
|
+
warn(message: string, meta?: Record<string, unknown>): void;
|
|
14
|
+
error(message: string, meta?: Record<string, unknown>): void;
|
|
15
|
+
}
|
|
16
|
+
export declare const logger: Logger;
|
|
17
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/logging/index.ts
|
|
3
|
+
import { trace, context } from "@opentelemetry/api";
|
|
4
|
+
|
|
5
|
+
class Logger {
|
|
6
|
+
serviceName;
|
|
7
|
+
constructor(serviceName) {
|
|
8
|
+
this.serviceName = serviceName;
|
|
9
|
+
}
|
|
10
|
+
log(level, message, meta = {}) {
|
|
11
|
+
const span = trace.getSpan(context.active());
|
|
12
|
+
const traceId = span?.spanContext().traceId;
|
|
13
|
+
const spanId = span?.spanContext().spanId;
|
|
14
|
+
const entry = {
|
|
15
|
+
timestamp: new Date().toISOString(),
|
|
16
|
+
service: this.serviceName,
|
|
17
|
+
level,
|
|
18
|
+
message,
|
|
19
|
+
traceId,
|
|
20
|
+
spanId,
|
|
21
|
+
...meta
|
|
22
|
+
};
|
|
23
|
+
console.log(JSON.stringify(entry));
|
|
24
|
+
}
|
|
25
|
+
debug(message, meta) {
|
|
26
|
+
this.log("debug", message, meta);
|
|
27
|
+
}
|
|
28
|
+
info(message, meta) {
|
|
29
|
+
this.log("info", message, meta);
|
|
30
|
+
}
|
|
31
|
+
warn(message, meta) {
|
|
32
|
+
this.log("warn", message, meta);
|
|
33
|
+
}
|
|
34
|
+
error(message, meta) {
|
|
35
|
+
this.log("error", message, meta);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
var logger = new Logger(process.env.OTEL_SERVICE_NAME || "unknown-service");
|
|
39
|
+
export {
|
|
40
|
+
logger,
|
|
41
|
+
Logger
|
|
42
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { type Meter, type Counter, type Histogram, type UpDownCounter } from '@opentelemetry/api';
|
|
2
|
+
export declare function getMeter(name?: string): Meter;
|
|
3
|
+
export declare function createCounter(name: string, description?: string, meterName?: string): Counter;
|
|
4
|
+
export declare function createUpDownCounter(name: string, description?: string, meterName?: string): UpDownCounter;
|
|
5
|
+
export declare function createHistogram(name: string, description?: string, meterName?: string): Histogram;
|
|
6
|
+
export declare const standardMetrics: {
|
|
7
|
+
httpRequests: Counter<import("@opentelemetry/api").Attributes>;
|
|
8
|
+
httpDuration: Histogram<import("@opentelemetry/api").Attributes>;
|
|
9
|
+
operationErrors: Counter<import("@opentelemetry/api").Attributes>;
|
|
10
|
+
workflowDuration: Histogram<import("@opentelemetry/api").Attributes>;
|
|
11
|
+
};
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/metrics/index.ts
|
|
3
|
+
import {
|
|
4
|
+
metrics
|
|
5
|
+
} from "@opentelemetry/api";
|
|
6
|
+
var DEFAULT_METER_NAME = "@contractspec/lib.observability";
|
|
7
|
+
function getMeter(name = DEFAULT_METER_NAME) {
|
|
8
|
+
return metrics.getMeter(name);
|
|
9
|
+
}
|
|
10
|
+
function createCounter(name, description, meterName) {
|
|
11
|
+
return getMeter(meterName).createCounter(name, { description });
|
|
12
|
+
}
|
|
13
|
+
function createUpDownCounter(name, description, meterName) {
|
|
14
|
+
return getMeter(meterName).createUpDownCounter(name, { description });
|
|
15
|
+
}
|
|
16
|
+
function createHistogram(name, description, meterName) {
|
|
17
|
+
return getMeter(meterName).createHistogram(name, { description });
|
|
18
|
+
}
|
|
19
|
+
var standardMetrics = {
|
|
20
|
+
httpRequests: createCounter("http_requests_total", "Total HTTP requests"),
|
|
21
|
+
httpDuration: createHistogram("http_request_duration_seconds", "HTTP request duration"),
|
|
22
|
+
operationErrors: createCounter("operation_errors_total", "Total operation errors"),
|
|
23
|
+
workflowDuration: createHistogram("workflow_duration_seconds", "Workflow execution duration")
|
|
24
|
+
};
|
|
25
|
+
export {
|
|
26
|
+
standardMetrics,
|
|
27
|
+
getMeter,
|
|
28
|
+
createUpDownCounter,
|
|
29
|
+
createHistogram,
|
|
30
|
+
createCounter
|
|
31
|
+
};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
// src/anomaly/alert-manager.ts
|
|
2
|
+
class AlertManager {
|
|
3
|
+
options;
|
|
4
|
+
cooldownMs;
|
|
5
|
+
lastAlert = new Map;
|
|
6
|
+
constructor(options) {
|
|
7
|
+
this.options = options;
|
|
8
|
+
this.cooldownMs = options.cooldownMs ?? 60000;
|
|
9
|
+
}
|
|
10
|
+
async notify(signal, analysis) {
|
|
11
|
+
const key = `${signal.type}:${analysis.culprit?.id ?? "none"}`;
|
|
12
|
+
const now = Date.now();
|
|
13
|
+
const last = this.lastAlert.get(key) ?? 0;
|
|
14
|
+
if (now - last < this.cooldownMs) {
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
await this.options.transport({ signal, analysis });
|
|
18
|
+
this.lastAlert.set(key, now);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export {
|
|
22
|
+
AlertManager
|
|
23
|
+
};
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// src/anomaly/baseline-calculator.ts
|
|
2
|
+
class BaselineCalculator {
|
|
3
|
+
alpha;
|
|
4
|
+
snapshot = {
|
|
5
|
+
latencyP99: 0,
|
|
6
|
+
latencyP95: 0,
|
|
7
|
+
errorRate: 0,
|
|
8
|
+
throughput: 0,
|
|
9
|
+
sampleCount: 0
|
|
10
|
+
};
|
|
11
|
+
constructor(alpha = 0.2) {
|
|
12
|
+
this.alpha = alpha;
|
|
13
|
+
}
|
|
14
|
+
update(point) {
|
|
15
|
+
const { sampleCount } = this.snapshot;
|
|
16
|
+
const nextCount = sampleCount + 1;
|
|
17
|
+
const weight = sampleCount === 0 ? 1 : this.alpha;
|
|
18
|
+
this.snapshot = {
|
|
19
|
+
latencyP99: this.mix(this.snapshot.latencyP99, point.latencyP99, weight),
|
|
20
|
+
latencyP95: this.mix(this.snapshot.latencyP95, point.latencyP95, weight),
|
|
21
|
+
errorRate: this.mix(this.snapshot.errorRate, point.errorRate, weight),
|
|
22
|
+
throughput: this.mix(this.snapshot.throughput, point.throughput, weight),
|
|
23
|
+
sampleCount: nextCount
|
|
24
|
+
};
|
|
25
|
+
return this.snapshot;
|
|
26
|
+
}
|
|
27
|
+
getSnapshot() {
|
|
28
|
+
return this.snapshot;
|
|
29
|
+
}
|
|
30
|
+
mix(current, next, weight) {
|
|
31
|
+
if (this.snapshot.sampleCount === 0) {
|
|
32
|
+
return next;
|
|
33
|
+
}
|
|
34
|
+
return current * (1 - weight) + next * weight;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// src/anomaly/anomaly-detector.ts
|
|
39
|
+
class AnomalyDetector {
|
|
40
|
+
baseline;
|
|
41
|
+
thresholds = {
|
|
42
|
+
errorRateDelta: 0.5,
|
|
43
|
+
latencyDelta: 0.35,
|
|
44
|
+
throughputDrop: 0.4,
|
|
45
|
+
minSamples: 10
|
|
46
|
+
};
|
|
47
|
+
constructor(options = {}) {
|
|
48
|
+
this.baseline = new BaselineCalculator;
|
|
49
|
+
this.thresholds = { ...this.thresholds, ...options };
|
|
50
|
+
}
|
|
51
|
+
evaluate(point) {
|
|
52
|
+
const baselineSnapshot = this.baseline.update(point);
|
|
53
|
+
if (baselineSnapshot.sampleCount < this.thresholds.minSamples) {
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
const signals = [];
|
|
57
|
+
const errorDelta = this.relativeDelta(point.errorRate, baselineSnapshot.errorRate);
|
|
58
|
+
if (errorDelta > this.thresholds.errorRateDelta) {
|
|
59
|
+
signals.push({
|
|
60
|
+
type: "error_rate_spike",
|
|
61
|
+
delta: errorDelta,
|
|
62
|
+
point,
|
|
63
|
+
baseline: baselineSnapshot
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
const latencyDelta = this.relativeDelta(point.latencyP99, baselineSnapshot.latencyP99);
|
|
67
|
+
if (latencyDelta > this.thresholds.latencyDelta) {
|
|
68
|
+
signals.push({
|
|
69
|
+
type: "latency_regression",
|
|
70
|
+
delta: latencyDelta,
|
|
71
|
+
point,
|
|
72
|
+
baseline: baselineSnapshot
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
const throughputDelta = this.relativeDrop(point.throughput, baselineSnapshot.throughput);
|
|
76
|
+
if (throughputDelta > this.thresholds.throughputDrop) {
|
|
77
|
+
signals.push({
|
|
78
|
+
type: "throughput_drop",
|
|
79
|
+
delta: throughputDelta,
|
|
80
|
+
point,
|
|
81
|
+
baseline: baselineSnapshot
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return signals;
|
|
85
|
+
}
|
|
86
|
+
relativeDelta(value, baseline) {
|
|
87
|
+
if (baseline === 0) {
|
|
88
|
+
return 0;
|
|
89
|
+
}
|
|
90
|
+
return (value - baseline) / baseline;
|
|
91
|
+
}
|
|
92
|
+
relativeDrop(value, baseline) {
|
|
93
|
+
if (baseline === 0) {
|
|
94
|
+
return 0;
|
|
95
|
+
}
|
|
96
|
+
return (baseline - value) / baseline;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
export {
|
|
100
|
+
AnomalyDetector
|
|
101
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// src/anomaly/baseline-calculator.ts
|
|
2
|
+
class BaselineCalculator {
|
|
3
|
+
alpha;
|
|
4
|
+
snapshot = {
|
|
5
|
+
latencyP99: 0,
|
|
6
|
+
latencyP95: 0,
|
|
7
|
+
errorRate: 0,
|
|
8
|
+
throughput: 0,
|
|
9
|
+
sampleCount: 0
|
|
10
|
+
};
|
|
11
|
+
constructor(alpha = 0.2) {
|
|
12
|
+
this.alpha = alpha;
|
|
13
|
+
}
|
|
14
|
+
update(point) {
|
|
15
|
+
const { sampleCount } = this.snapshot;
|
|
16
|
+
const nextCount = sampleCount + 1;
|
|
17
|
+
const weight = sampleCount === 0 ? 1 : this.alpha;
|
|
18
|
+
this.snapshot = {
|
|
19
|
+
latencyP99: this.mix(this.snapshot.latencyP99, point.latencyP99, weight),
|
|
20
|
+
latencyP95: this.mix(this.snapshot.latencyP95, point.latencyP95, weight),
|
|
21
|
+
errorRate: this.mix(this.snapshot.errorRate, point.errorRate, weight),
|
|
22
|
+
throughput: this.mix(this.snapshot.throughput, point.throughput, weight),
|
|
23
|
+
sampleCount: nextCount
|
|
24
|
+
};
|
|
25
|
+
return this.snapshot;
|
|
26
|
+
}
|
|
27
|
+
getSnapshot() {
|
|
28
|
+
return this.snapshot;
|
|
29
|
+
}
|
|
30
|
+
mix(current, next, weight) {
|
|
31
|
+
if (this.snapshot.sampleCount === 0) {
|
|
32
|
+
return next;
|
|
33
|
+
}
|
|
34
|
+
return current * (1 - weight) + next * weight;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
export {
|
|
38
|
+
BaselineCalculator
|
|
39
|
+
};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// src/anomaly/root-cause-analyzer.ts
|
|
2
|
+
class RootCauseAnalyzer {
|
|
3
|
+
lookbackMs;
|
|
4
|
+
constructor(lookbackMs = 15 * 60 * 1000) {
|
|
5
|
+
this.lookbackMs = lookbackMs;
|
|
6
|
+
}
|
|
7
|
+
analyze(signal, deployments) {
|
|
8
|
+
const windowStart = new Date(signal.point.timestamp.getTime() - this.lookbackMs);
|
|
9
|
+
const candidates = deployments.filter((deployment) => deployment.deployedAt >= windowStart).sort((a, b) => b.deployedAt.getTime() - a.deployedAt.getTime());
|
|
10
|
+
const notes = [];
|
|
11
|
+
let culprit;
|
|
12
|
+
if (candidates.length > 0) {
|
|
13
|
+
culprit = candidates[0];
|
|
14
|
+
if (culprit) {
|
|
15
|
+
notes.push(`Closest deployment ${culprit.id} (${culprit.operation}) at ${culprit.deployedAt.toISOString()}`);
|
|
16
|
+
}
|
|
17
|
+
} else {
|
|
18
|
+
notes.push("No deployments found within lookback window.");
|
|
19
|
+
}
|
|
20
|
+
if (signal.type === "latency_regression") {
|
|
21
|
+
notes.push("Verify recent schema changes and external dependency latency.");
|
|
22
|
+
}
|
|
23
|
+
if (signal.type === "error_rate_spike") {
|
|
24
|
+
notes.push("Check SLO monitor for correlated incidents.");
|
|
25
|
+
}
|
|
26
|
+
return { signal, culprit, notes };
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
export {
|
|
30
|
+
RootCauseAnalyzer
|
|
31
|
+
};
|