@contractspec/lib.observability 3.7.17 → 3.7.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/anomaly/alert-manager.js +1 -23
- package/dist/anomaly/anomaly-detector.js +1 -101
- package/dist/anomaly/baseline-calculator.js +1 -39
- package/dist/anomaly/root-cause-analyzer.js +1 -31
- package/dist/index.js +5 -1128
- package/dist/intent/aggregator.js +1 -109
- package/dist/intent/detector.js +1 -132
- package/dist/logging/index.js +1 -41
- package/dist/metrics/index.js +1 -30
- package/dist/node/anomaly/alert-manager.js +1 -23
- package/dist/node/anomaly/anomaly-detector.js +1 -101
- package/dist/node/anomaly/baseline-calculator.js +1 -39
- package/dist/node/anomaly/root-cause-analyzer.js +1 -31
- package/dist/node/index.js +5 -1128
- package/dist/node/intent/aggregator.js +1 -109
- package/dist/node/intent/detector.js +1 -132
- package/dist/node/logging/index.js +1 -41
- package/dist/node/metrics/index.js +1 -30
- package/dist/node/pipeline/evolution-pipeline.js +1 -299
- package/dist/node/pipeline/lifecycle-pipeline.js +1 -85
- package/dist/node/telemetry/model-selection-telemetry.js +1 -30
- package/dist/node/telemetry/posthog-baseline-reader.js +5 -308
- package/dist/node/telemetry/posthog-telemetry.js +1 -60
- package/dist/node/tracing/core.js +1 -52
- package/dist/node/tracing/index.js +1 -75
- package/dist/node/tracing/middleware.js +1 -171
- package/dist/node/tracing/model-selection.span.js +1 -72
- package/dist/pipeline/evolution-pipeline.js +1 -299
- package/dist/pipeline/lifecycle-pipeline.js +1 -85
- package/dist/telemetry/model-selection-telemetry.js +1 -30
- package/dist/telemetry/posthog-baseline-reader.js +5 -308
- package/dist/telemetry/posthog-telemetry.js +1 -60
- package/dist/tracing/core.js +1 -52
- package/dist/tracing/index.js +1 -75
- package/dist/tracing/middleware.js +1 -171
- package/dist/tracing/model-selection.span.js +1 -72
- package/package.json +7 -7
|
@@ -1,110 +1,2 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
|
|
3
|
-
var DEFAULT_WINDOW_MS = 15 * 60 * 1000;
|
|
4
|
-
|
|
5
|
-
class IntentAggregator {
|
|
6
|
-
windowMs;
|
|
7
|
-
sequenceSampleSize;
|
|
8
|
-
samples = [];
|
|
9
|
-
constructor(options = {}) {
|
|
10
|
-
this.windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;
|
|
11
|
-
this.sequenceSampleSize = options.sequenceSampleSize ?? 1000;
|
|
12
|
-
}
|
|
13
|
-
add(sample) {
|
|
14
|
-
this.samples.push(sample);
|
|
15
|
-
}
|
|
16
|
-
flush(now = new Date) {
|
|
17
|
-
const minTimestamp = now.getTime() - this.windowMs;
|
|
18
|
-
const windowSamples = this.samples.filter((sample) => sample.timestamp.getTime() >= minTimestamp);
|
|
19
|
-
this.samples.length = 0;
|
|
20
|
-
const metrics = this.aggregateMetrics(windowSamples);
|
|
21
|
-
const sequences = this.buildSequences(windowSamples);
|
|
22
|
-
const timestamps = windowSamples.map((sample) => sample.timestamp.getTime());
|
|
23
|
-
return {
|
|
24
|
-
metrics,
|
|
25
|
-
sequences,
|
|
26
|
-
sampleCount: windowSamples.length,
|
|
27
|
-
windowStart: timestamps.length ? new Date(Math.min(...timestamps)) : undefined,
|
|
28
|
-
windowEnd: timestamps.length ? new Date(Math.max(...timestamps)) : undefined
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
aggregateMetrics(samples) {
|
|
32
|
-
if (!samples.length)
|
|
33
|
-
return [];
|
|
34
|
-
const groups = new Map;
|
|
35
|
-
for (const sample of samples) {
|
|
36
|
-
const key = `${sample.operation.name}.v${sample.operation.version}`;
|
|
37
|
-
const arr = groups.get(key) ?? [];
|
|
38
|
-
arr.push(sample);
|
|
39
|
-
groups.set(key, arr);
|
|
40
|
-
}
|
|
41
|
-
return [...groups.values()].map((group) => {
|
|
42
|
-
const first = group[0];
|
|
43
|
-
if (!first)
|
|
44
|
-
throw new Error("Empty group in aggregation");
|
|
45
|
-
const durations = group.map((s) => s.durationMs).sort((a, b) => a - b);
|
|
46
|
-
const errors = group.filter((s) => !s.success);
|
|
47
|
-
const totalCalls = group.length;
|
|
48
|
-
const topErrors = errors.reduce((acc, sample) => {
|
|
49
|
-
if (!sample.errorCode)
|
|
50
|
-
return acc;
|
|
51
|
-
acc[sample.errorCode] = (acc[sample.errorCode] ?? 0) + 1;
|
|
52
|
-
return acc;
|
|
53
|
-
}, {});
|
|
54
|
-
const timestamps = group.map((s) => s.timestamp.getTime());
|
|
55
|
-
return {
|
|
56
|
-
operation: first.operation,
|
|
57
|
-
totalCalls,
|
|
58
|
-
successRate: (totalCalls - errors.length) / totalCalls,
|
|
59
|
-
errorRate: errors.length / totalCalls,
|
|
60
|
-
averageLatencyMs: durations.reduce((sum, value) => sum + value, 0) / totalCalls,
|
|
61
|
-
p95LatencyMs: percentile(durations, 0.95),
|
|
62
|
-
p99LatencyMs: percentile(durations, 0.99),
|
|
63
|
-
maxLatencyMs: Math.max(...durations),
|
|
64
|
-
windowStart: new Date(Math.min(...timestamps)),
|
|
65
|
-
windowEnd: new Date(Math.max(...timestamps)),
|
|
66
|
-
topErrors
|
|
67
|
-
};
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
buildSequences(samples) {
|
|
71
|
-
const byTrace = new Map;
|
|
72
|
-
for (const sample of samples.slice(-this.sequenceSampleSize)) {
|
|
73
|
-
if (!sample.traceId)
|
|
74
|
-
continue;
|
|
75
|
-
const arr = byTrace.get(sample.traceId) ?? [];
|
|
76
|
-
arr.push(sample);
|
|
77
|
-
byTrace.set(sample.traceId, arr);
|
|
78
|
-
}
|
|
79
|
-
const sequences = {};
|
|
80
|
-
for (const events of byTrace.values()) {
|
|
81
|
-
const ordered = events.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
82
|
-
const steps = ordered.map((event) => event.operation.name);
|
|
83
|
-
if (steps.length < 2)
|
|
84
|
-
continue;
|
|
85
|
-
const key = `${steps.join(">")}@${ordered[0]?.tenantId ?? "global"}`;
|
|
86
|
-
const existing = sequences[key];
|
|
87
|
-
if (existing) {
|
|
88
|
-
existing.count += 1;
|
|
89
|
-
} else {
|
|
90
|
-
sequences[key] = {
|
|
91
|
-
steps,
|
|
92
|
-
tenantId: ordered[0]?.tenantId,
|
|
93
|
-
count: 1
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
return Object.values(sequences).sort((a, b) => b.count - a.count);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
function percentile(values, ratio) {
|
|
101
|
-
if (!values.length)
|
|
102
|
-
return 0;
|
|
103
|
-
if (values.length === 1)
|
|
104
|
-
return values[0] ?? 0;
|
|
105
|
-
const index = Math.min(values.length - 1, Math.floor(ratio * values.length));
|
|
106
|
-
return values[index] ?? 0;
|
|
107
|
-
}
|
|
108
|
-
export {
|
|
109
|
-
IntentAggregator
|
|
110
|
-
};
|
|
2
|
+
class u{windowMs;sequenceSampleSize;samples=[];constructor(t={}){this.windowMs=t.windowMs??900000,this.sequenceSampleSize=t.sequenceSampleSize??1000}add(t){this.samples.push(t)}flush(t=new Date){let i=t.getTime()-this.windowMs,e=this.samples.filter((o)=>o.timestamp.getTime()>=i);this.samples.length=0;let r=this.aggregateMetrics(e),n=this.buildSequences(e),a=e.map((o)=>o.timestamp.getTime());return{metrics:r,sequences:n,sampleCount:e.length,windowStart:a.length?new Date(Math.min(...a)):void 0,windowEnd:a.length?new Date(Math.max(...a)):void 0}}aggregateMetrics(t){if(!t.length)return[];let i=new Map;for(let e of t){let r=`${e.operation.name}.v${e.operation.version}`,n=i.get(r)??[];n.push(e),i.set(r,n)}return[...i.values()].map((e)=>{let r=e[0];if(!r)throw Error("Empty group in aggregation");let n=e.map((s)=>s.durationMs).sort((s,c)=>s-c),a=e.filter((s)=>!s.success),o=e.length,g=a.reduce((s,c)=>{if(!c.errorCode)return s;return s[c.errorCode]=(s[c.errorCode]??0)+1,s},{}),m=e.map((s)=>s.timestamp.getTime());return{operation:r.operation,totalCalls:o,successRate:(o-a.length)/o,errorRate:a.length/o,averageLatencyMs:n.reduce((s,c)=>s+c,0)/o,p95LatencyMs:p(n,0.95),p99LatencyMs:p(n,0.99),maxLatencyMs:Math.max(...n),windowStart:new Date(Math.min(...m)),windowEnd:new Date(Math.max(...m)),topErrors:g}})}buildSequences(t){let i=new Map;for(let r of t.slice(-this.sequenceSampleSize)){if(!r.traceId)continue;let n=i.get(r.traceId)??[];n.push(r),i.set(r.traceId,n)}let e={};for(let r of i.values()){let n=r.sort((m,s)=>m.timestamp.getTime()-s.timestamp.getTime()),a=n.map((m)=>m.operation.name);if(a.length<2)continue;let o=`${a.join(">")}@${n[0]?.tenantId??"global"}`,g=e[o];if(g)g.count+=1;else e[o]={steps:a,tenantId:n[0]?.tenantId,count:1}}return Object.values(e).sort((r,n)=>n.count-r.count)}}function p(t,i){if(!t.length)return 0;if(t.length===1)return t[0]??0;let e=Math.min(t.length-1,Math.floor(i*t.length));return t[e]??0}export{u as IntentAggregator};
|
package/dist/intent/detector.js
CHANGED
|
@@ -1,133 +1,2 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
|
|
3
|
-
import { randomUUID } from "crypto";
|
|
4
|
-
var DEFAULTS = {
|
|
5
|
-
errorRateThreshold: 0.05,
|
|
6
|
-
latencyP99ThresholdMs: 750,
|
|
7
|
-
throughputDropThreshold: 0.3,
|
|
8
|
-
minSequenceLength: 3
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
class IntentDetector {
|
|
12
|
-
options;
|
|
13
|
-
constructor(options = {}) {
|
|
14
|
-
this.options = {
|
|
15
|
-
errorRateThreshold: options.errorRateThreshold ?? DEFAULTS.errorRateThreshold,
|
|
16
|
-
latencyP99ThresholdMs: options.latencyP99ThresholdMs ?? DEFAULTS.latencyP99ThresholdMs,
|
|
17
|
-
throughputDropThreshold: options.throughputDropThreshold ?? DEFAULTS.throughputDropThreshold,
|
|
18
|
-
minSequenceLength: options.minSequenceLength ?? DEFAULTS.minSequenceLength
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
|
-
detectFromMetrics(current, previous) {
|
|
22
|
-
const signals = [];
|
|
23
|
-
const baseline = new Map((previous ?? []).map((metric) => [
|
|
24
|
-
`${metric.operation.name}.v${metric.operation.version}`,
|
|
25
|
-
metric
|
|
26
|
-
]));
|
|
27
|
-
for (const metric of current) {
|
|
28
|
-
if (metric.errorRate >= this.options.errorRateThreshold) {
|
|
29
|
-
signals.push({
|
|
30
|
-
id: randomUUID(),
|
|
31
|
-
type: "error-spike",
|
|
32
|
-
operation: metric.operation,
|
|
33
|
-
confidence: Math.min(1, metric.errorRate / this.options.errorRateThreshold),
|
|
34
|
-
description: `Error rate ${metric.errorRate.toFixed(2)} exceeded threshold`,
|
|
35
|
-
metadata: {
|
|
36
|
-
errorRate: metric.errorRate,
|
|
37
|
-
topErrors: metric.topErrors
|
|
38
|
-
},
|
|
39
|
-
evidence: [
|
|
40
|
-
{
|
|
41
|
-
type: "metric",
|
|
42
|
-
description: "error-rate",
|
|
43
|
-
data: {
|
|
44
|
-
errorRate: metric.errorRate,
|
|
45
|
-
threshold: this.options.errorRateThreshold
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
]
|
|
49
|
-
});
|
|
50
|
-
continue;
|
|
51
|
-
}
|
|
52
|
-
if (metric.p99LatencyMs >= this.options.latencyP99ThresholdMs) {
|
|
53
|
-
signals.push({
|
|
54
|
-
id: randomUUID(),
|
|
55
|
-
type: "latency-regression",
|
|
56
|
-
operation: metric.operation,
|
|
57
|
-
confidence: Math.min(1, metric.p99LatencyMs / this.options.latencyP99ThresholdMs),
|
|
58
|
-
description: `P99 latency ${metric.p99LatencyMs}ms exceeded threshold`,
|
|
59
|
-
metadata: { p99LatencyMs: metric.p99LatencyMs },
|
|
60
|
-
evidence: [
|
|
61
|
-
{
|
|
62
|
-
type: "metric",
|
|
63
|
-
description: "p99-latency",
|
|
64
|
-
data: {
|
|
65
|
-
p99LatencyMs: metric.p99LatencyMs,
|
|
66
|
-
threshold: this.options.latencyP99ThresholdMs
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
]
|
|
70
|
-
});
|
|
71
|
-
continue;
|
|
72
|
-
}
|
|
73
|
-
const base = baseline.get(`${metric.operation.name}.v${metric.operation.version}`);
|
|
74
|
-
if (base) {
|
|
75
|
-
const drop = (base.totalCalls - metric.totalCalls) / Math.max(base.totalCalls, 1);
|
|
76
|
-
if (drop >= this.options.throughputDropThreshold) {
|
|
77
|
-
signals.push({
|
|
78
|
-
id: randomUUID(),
|
|
79
|
-
type: "throughput-drop",
|
|
80
|
-
operation: metric.operation,
|
|
81
|
-
confidence: Math.min(1, drop / this.options.throughputDropThreshold),
|
|
82
|
-
description: `Throughput dropped ${(drop * 100).toFixed(1)}% vs baseline`,
|
|
83
|
-
metadata: {
|
|
84
|
-
baselineCalls: base.totalCalls,
|
|
85
|
-
currentCalls: metric.totalCalls
|
|
86
|
-
},
|
|
87
|
-
evidence: [
|
|
88
|
-
{
|
|
89
|
-
type: "metric",
|
|
90
|
-
description: "throughput-drop",
|
|
91
|
-
data: {
|
|
92
|
-
baselineCalls: base.totalCalls,
|
|
93
|
-
currentCalls: metric.totalCalls
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
]
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
return signals;
|
|
102
|
-
}
|
|
103
|
-
detectSequentialIntents(sequences) {
|
|
104
|
-
const signals = [];
|
|
105
|
-
for (const sequence of sequences) {
|
|
106
|
-
if (sequence.steps.length < this.options.minSequenceLength)
|
|
107
|
-
continue;
|
|
108
|
-
const description = sequence.steps.join(" \u2192 ");
|
|
109
|
-
signals.push({
|
|
110
|
-
id: randomUUID(),
|
|
111
|
-
type: "missing-workflow-step",
|
|
112
|
-
confidence: 0.6,
|
|
113
|
-
description: `Repeated workflow detected: ${description}`,
|
|
114
|
-
metadata: {
|
|
115
|
-
steps: sequence.steps,
|
|
116
|
-
tenantId: sequence.tenantId,
|
|
117
|
-
occurrences: sequence.count
|
|
118
|
-
},
|
|
119
|
-
evidence: [
|
|
120
|
-
{
|
|
121
|
-
type: "sequence",
|
|
122
|
-
description: "sequential-calls",
|
|
123
|
-
data: { steps: sequence.steps, count: sequence.count }
|
|
124
|
-
}
|
|
125
|
-
]
|
|
126
|
-
});
|
|
127
|
-
}
|
|
128
|
-
return signals;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
export {
|
|
132
|
-
IntentDetector
|
|
133
|
-
};
|
|
2
|
+
import{randomUUID as s}from"crypto";var a={errorRateThreshold:0.05,latencyP99ThresholdMs:750,throughputDropThreshold:0.3,minSequenceLength:3};class c{options;constructor(r={}){this.options={errorRateThreshold:r.errorRateThreshold??a.errorRateThreshold,latencyP99ThresholdMs:r.latencyP99ThresholdMs??a.latencyP99ThresholdMs,throughputDropThreshold:r.throughputDropThreshold??a.throughputDropThreshold,minSequenceLength:r.minSequenceLength??a.minSequenceLength}}detectFromMetrics(r,n){let t=[],i=new Map((n??[]).map((e)=>[`${e.operation.name}.v${e.operation.version}`,e]));for(let e of r){if(e.errorRate>=this.options.errorRateThreshold){t.push({id:s(),type:"error-spike",operation:e.operation,confidence:Math.min(1,e.errorRate/this.options.errorRateThreshold),description:`Error rate ${e.errorRate.toFixed(2)} exceeded threshold`,metadata:{errorRate:e.errorRate,topErrors:e.topErrors},evidence:[{type:"metric",description:"error-rate",data:{errorRate:e.errorRate,threshold:this.options.errorRateThreshold}}]});continue}if(e.p99LatencyMs>=this.options.latencyP99ThresholdMs){t.push({id:s(),type:"latency-regression",operation:e.operation,confidence:Math.min(1,e.p99LatencyMs/this.options.latencyP99ThresholdMs),description:`P99 latency ${e.p99LatencyMs}ms exceeded threshold`,metadata:{p99LatencyMs:e.p99LatencyMs},evidence:[{type:"metric",description:"p99-latency",data:{p99LatencyMs:e.p99LatencyMs,threshold:this.options.latencyP99ThresholdMs}}]});continue}let o=i.get(`${e.operation.name}.v${e.operation.version}`);if(o){let p=(o.totalCalls-e.totalCalls)/Math.max(o.totalCalls,1);if(p>=this.options.throughputDropThreshold)t.push({id:s(),type:"throughput-drop",operation:e.operation,confidence:Math.min(1,p/this.options.throughputDropThreshold),description:`Throughput dropped ${(p*100).toFixed(1)}% vs baseline`,metadata:{baselineCalls:o.totalCalls,currentCalls:e.totalCalls},evidence:[{type:"metric",description:"throughput-drop",data:{baselineCalls:o.totalCalls,currentCalls:e.totalCalls}}]})}}return t}detectSequentialIntents(r){let n=[];for(let t of r){if(t.steps.length<this.options.minSequenceLength)continue;let i=t.steps.join(" \u2192 ");n.push({id:s(),type:"missing-workflow-step",confidence:0.6,description:`Repeated workflow detected: ${i}`,metadata:{steps:t.steps,tenantId:t.tenantId,occurrences:t.count},evidence:[{type:"sequence",description:"sequential-calls",data:{steps:t.steps,count:t.count}}]})}return n}}export{c as IntentDetector};
|
package/dist/logging/index.js
CHANGED
|
@@ -1,42 +1,2 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
|
|
3
|
-
import { context, trace } from "@opentelemetry/api";
|
|
4
|
-
|
|
5
|
-
class Logger {
|
|
6
|
-
serviceName;
|
|
7
|
-
constructor(serviceName) {
|
|
8
|
-
this.serviceName = serviceName;
|
|
9
|
-
}
|
|
10
|
-
log(level, message, meta = {}) {
|
|
11
|
-
const span = trace.getSpan(context.active());
|
|
12
|
-
const traceId = span?.spanContext().traceId;
|
|
13
|
-
const spanId = span?.spanContext().spanId;
|
|
14
|
-
const entry = {
|
|
15
|
-
timestamp: new Date().toISOString(),
|
|
16
|
-
service: this.serviceName,
|
|
17
|
-
level,
|
|
18
|
-
message,
|
|
19
|
-
traceId,
|
|
20
|
-
spanId,
|
|
21
|
-
...meta
|
|
22
|
-
};
|
|
23
|
-
console.log(JSON.stringify(entry));
|
|
24
|
-
}
|
|
25
|
-
debug(message, meta) {
|
|
26
|
-
this.log("debug", message, meta);
|
|
27
|
-
}
|
|
28
|
-
info(message, meta) {
|
|
29
|
-
this.log("info", message, meta);
|
|
30
|
-
}
|
|
31
|
-
warn(message, meta) {
|
|
32
|
-
this.log("warn", message, meta);
|
|
33
|
-
}
|
|
34
|
-
error(message, meta) {
|
|
35
|
-
this.log("error", message, meta);
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
var logger = new Logger(process.env.OTEL_SERVICE_NAME || "unknown-service");
|
|
39
|
-
export {
|
|
40
|
-
logger,
|
|
41
|
-
Logger
|
|
42
|
-
};
|
|
2
|
+
import{context as c,trace as a}from"@opentelemetry/api";class t{serviceName;constructor(n){this.serviceName=n}log(n,e,o={}){let r=a.getSpan(c.active()),s=r?.spanContext().traceId,i=r?.spanContext().spanId,g={timestamp:new Date().toISOString(),service:this.serviceName,level:n,message:e,traceId:s,spanId:i,...o};console.log(JSON.stringify(g))}debug(n,e){this.log("debug",n,e)}info(n,e){this.log("info",n,e)}warn(n,e){this.log("warn",n,e)}error(n,e){this.log("error",n,e)}}var l=new t(process.env.OTEL_SERVICE_NAME||"unknown-service");export{l as logger,t as Logger};
|
package/dist/metrics/index.js
CHANGED
|
@@ -1,31 +1,2 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
|
|
3
|
-
import {
|
|
4
|
-
metrics
|
|
5
|
-
} from "@opentelemetry/api";
|
|
6
|
-
var DEFAULT_METER_NAME = "@contractspec/lib.observability";
|
|
7
|
-
function getMeter(name = DEFAULT_METER_NAME) {
|
|
8
|
-
return metrics.getMeter(name);
|
|
9
|
-
}
|
|
10
|
-
function createCounter(name, description, meterName) {
|
|
11
|
-
return getMeter(meterName).createCounter(name, { description });
|
|
12
|
-
}
|
|
13
|
-
function createUpDownCounter(name, description, meterName) {
|
|
14
|
-
return getMeter(meterName).createUpDownCounter(name, { description });
|
|
15
|
-
}
|
|
16
|
-
function createHistogram(name, description, meterName) {
|
|
17
|
-
return getMeter(meterName).createHistogram(name, { description });
|
|
18
|
-
}
|
|
19
|
-
var standardMetrics = {
|
|
20
|
-
httpRequests: createCounter("http_requests_total", "Total HTTP requests"),
|
|
21
|
-
httpDuration: createHistogram("http_request_duration_seconds", "HTTP request duration"),
|
|
22
|
-
operationErrors: createCounter("operation_errors_total", "Total operation errors"),
|
|
23
|
-
workflowDuration: createHistogram("workflow_duration_seconds", "Workflow execution duration")
|
|
24
|
-
};
|
|
25
|
-
export {
|
|
26
|
-
standardMetrics,
|
|
27
|
-
getMeter,
|
|
28
|
-
createUpDownCounter,
|
|
29
|
-
createHistogram,
|
|
30
|
-
createCounter
|
|
31
|
-
};
|
|
2
|
+
import{metrics as i}from"@opentelemetry/api";var u="@contractspec/lib.observability";function o(t=u){return i.getMeter(t)}function n(t,r,e){return o(e).createCounter(t,{description:r})}function p(t,r,e){return o(e).createUpDownCounter(t,{description:r})}function s(t,r,e){return o(e).createHistogram(t,{description:r})}var c={httpRequests:n("http_requests_total","Total HTTP requests"),httpDuration:s("http_request_duration_seconds","HTTP request duration"),operationErrors:n("operation_errors_total","Total operation errors"),workflowDuration:s("workflow_duration_seconds","Workflow execution duration")};export{c as standardMetrics,o as getMeter,p as createUpDownCounter,s as createHistogram,n as createCounter};
|
|
@@ -1,23 +1 @@
|
|
|
1
|
-
|
|
2
|
-
class AlertManager {
|
|
3
|
-
options;
|
|
4
|
-
cooldownMs;
|
|
5
|
-
lastAlert = new Map;
|
|
6
|
-
constructor(options) {
|
|
7
|
-
this.options = options;
|
|
8
|
-
this.cooldownMs = options.cooldownMs ?? 60000;
|
|
9
|
-
}
|
|
10
|
-
async notify(signal, analysis) {
|
|
11
|
-
const key = `${signal.type}:${analysis.culprit?.id ?? "none"}`;
|
|
12
|
-
const now = Date.now();
|
|
13
|
-
const last = this.lastAlert.get(key) ?? 0;
|
|
14
|
-
if (now - last < this.cooldownMs) {
|
|
15
|
-
return;
|
|
16
|
-
}
|
|
17
|
-
await this.options.transport({ signal, analysis });
|
|
18
|
-
this.lastAlert.set(key, now);
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
export {
|
|
22
|
-
AlertManager
|
|
23
|
-
};
|
|
1
|
+
class r{options;cooldownMs;lastAlert=new Map;constructor(o){this.options=o;this.cooldownMs=o.cooldownMs??60000}async notify(o,t){let a=`${o.type}:${t.culprit?.id??"none"}`,n=Date.now(),e=this.lastAlert.get(a)??0;if(n-e<this.cooldownMs)return;await this.options.transport({signal:o,analysis:t}),this.lastAlert.set(a,n)}}export{r as AlertManager};
|
|
@@ -1,101 +1 @@
|
|
|
1
|
-
|
|
2
|
-
class BaselineCalculator {
|
|
3
|
-
alpha;
|
|
4
|
-
snapshot = {
|
|
5
|
-
latencyP99: 0,
|
|
6
|
-
latencyP95: 0,
|
|
7
|
-
errorRate: 0,
|
|
8
|
-
throughput: 0,
|
|
9
|
-
sampleCount: 0
|
|
10
|
-
};
|
|
11
|
-
constructor(alpha = 0.2) {
|
|
12
|
-
this.alpha = alpha;
|
|
13
|
-
}
|
|
14
|
-
update(point) {
|
|
15
|
-
const { sampleCount } = this.snapshot;
|
|
16
|
-
const nextCount = sampleCount + 1;
|
|
17
|
-
const weight = sampleCount === 0 ? 1 : this.alpha;
|
|
18
|
-
this.snapshot = {
|
|
19
|
-
latencyP99: this.mix(this.snapshot.latencyP99, point.latencyP99, weight),
|
|
20
|
-
latencyP95: this.mix(this.snapshot.latencyP95, point.latencyP95, weight),
|
|
21
|
-
errorRate: this.mix(this.snapshot.errorRate, point.errorRate, weight),
|
|
22
|
-
throughput: this.mix(this.snapshot.throughput, point.throughput, weight),
|
|
23
|
-
sampleCount: nextCount
|
|
24
|
-
};
|
|
25
|
-
return this.snapshot;
|
|
26
|
-
}
|
|
27
|
-
getSnapshot() {
|
|
28
|
-
return this.snapshot;
|
|
29
|
-
}
|
|
30
|
-
mix(current, next, weight) {
|
|
31
|
-
if (this.snapshot.sampleCount === 0) {
|
|
32
|
-
return next;
|
|
33
|
-
}
|
|
34
|
-
return current * (1 - weight) + next * weight;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// src/anomaly/anomaly-detector.ts
|
|
39
|
-
class AnomalyDetector {
|
|
40
|
-
baseline;
|
|
41
|
-
thresholds = {
|
|
42
|
-
errorRateDelta: 0.5,
|
|
43
|
-
latencyDelta: 0.35,
|
|
44
|
-
throughputDrop: 0.4,
|
|
45
|
-
minSamples: 10
|
|
46
|
-
};
|
|
47
|
-
constructor(options = {}) {
|
|
48
|
-
this.baseline = new BaselineCalculator;
|
|
49
|
-
this.thresholds = { ...this.thresholds, ...options };
|
|
50
|
-
}
|
|
51
|
-
evaluate(point) {
|
|
52
|
-
const baselineSnapshot = this.baseline.update(point);
|
|
53
|
-
if (baselineSnapshot.sampleCount < this.thresholds.minSamples) {
|
|
54
|
-
return [];
|
|
55
|
-
}
|
|
56
|
-
const signals = [];
|
|
57
|
-
const errorDelta = this.relativeDelta(point.errorRate, baselineSnapshot.errorRate);
|
|
58
|
-
if (errorDelta > this.thresholds.errorRateDelta) {
|
|
59
|
-
signals.push({
|
|
60
|
-
type: "error_rate_spike",
|
|
61
|
-
delta: errorDelta,
|
|
62
|
-
point,
|
|
63
|
-
baseline: baselineSnapshot
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
const latencyDelta = this.relativeDelta(point.latencyP99, baselineSnapshot.latencyP99);
|
|
67
|
-
if (latencyDelta > this.thresholds.latencyDelta) {
|
|
68
|
-
signals.push({
|
|
69
|
-
type: "latency_regression",
|
|
70
|
-
delta: latencyDelta,
|
|
71
|
-
point,
|
|
72
|
-
baseline: baselineSnapshot
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
const throughputDelta = this.relativeDrop(point.throughput, baselineSnapshot.throughput);
|
|
76
|
-
if (throughputDelta > this.thresholds.throughputDrop) {
|
|
77
|
-
signals.push({
|
|
78
|
-
type: "throughput_drop",
|
|
79
|
-
delta: throughputDelta,
|
|
80
|
-
point,
|
|
81
|
-
baseline: baselineSnapshot
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
return signals;
|
|
85
|
-
}
|
|
86
|
-
relativeDelta(value, baseline) {
|
|
87
|
-
if (baseline === 0) {
|
|
88
|
-
return 0;
|
|
89
|
-
}
|
|
90
|
-
return (value - baseline) / baseline;
|
|
91
|
-
}
|
|
92
|
-
relativeDrop(value, baseline) {
|
|
93
|
-
if (baseline === 0) {
|
|
94
|
-
return 0;
|
|
95
|
-
}
|
|
96
|
-
return (baseline - value) / baseline;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
export {
|
|
100
|
-
AnomalyDetector
|
|
101
|
-
};
|
|
1
|
+
class s{alpha;snapshot={latencyP99:0,latencyP95:0,errorRate:0,throughput:0,sampleCount:0};constructor(e=0.2){this.alpha=e}update(e){let{sampleCount:t}=this.snapshot,r=t+1,a=t===0?1:this.alpha;return this.snapshot={latencyP99:this.mix(this.snapshot.latencyP99,e.latencyP99,a),latencyP95:this.mix(this.snapshot.latencyP95,e.latencyP95,a),errorRate:this.mix(this.snapshot.errorRate,e.errorRate,a),throughput:this.mix(this.snapshot.throughput,e.throughput,a),sampleCount:r},this.snapshot}getSnapshot(){return this.snapshot}mix(e,t,r){if(this.snapshot.sampleCount===0)return t;return e*(1-r)+t*r}}class l{baseline;thresholds={errorRateDelta:0.5,latencyDelta:0.35,throughputDrop:0.4,minSamples:10};constructor(e={}){this.baseline=new s,this.thresholds={...this.thresholds,...e}}evaluate(e){let t=this.baseline.update(e);if(t.sampleCount<this.thresholds.minSamples)return[];let r=[],a=this.relativeDelta(e.errorRate,t.errorRate);if(a>this.thresholds.errorRateDelta)r.push({type:"error_rate_spike",delta:a,point:e,baseline:t});let n=this.relativeDelta(e.latencyP99,t.latencyP99);if(n>this.thresholds.latencyDelta)r.push({type:"latency_regression",delta:n,point:e,baseline:t});let o=this.relativeDrop(e.throughput,t.throughput);if(o>this.thresholds.throughputDrop)r.push({type:"throughput_drop",delta:o,point:e,baseline:t});return r}relativeDelta(e,t){if(t===0)return 0;return(e-t)/t}relativeDrop(e,t){if(t===0)return 0;return(t-e)/t}}export{l as AnomalyDetector};
|
|
@@ -1,39 +1 @@
|
|
|
1
|
-
|
|
2
|
-
class BaselineCalculator {
|
|
3
|
-
alpha;
|
|
4
|
-
snapshot = {
|
|
5
|
-
latencyP99: 0,
|
|
6
|
-
latencyP95: 0,
|
|
7
|
-
errorRate: 0,
|
|
8
|
-
throughput: 0,
|
|
9
|
-
sampleCount: 0
|
|
10
|
-
};
|
|
11
|
-
constructor(alpha = 0.2) {
|
|
12
|
-
this.alpha = alpha;
|
|
13
|
-
}
|
|
14
|
-
update(point) {
|
|
15
|
-
const { sampleCount } = this.snapshot;
|
|
16
|
-
const nextCount = sampleCount + 1;
|
|
17
|
-
const weight = sampleCount === 0 ? 1 : this.alpha;
|
|
18
|
-
this.snapshot = {
|
|
19
|
-
latencyP99: this.mix(this.snapshot.latencyP99, point.latencyP99, weight),
|
|
20
|
-
latencyP95: this.mix(this.snapshot.latencyP95, point.latencyP95, weight),
|
|
21
|
-
errorRate: this.mix(this.snapshot.errorRate, point.errorRate, weight),
|
|
22
|
-
throughput: this.mix(this.snapshot.throughput, point.throughput, weight),
|
|
23
|
-
sampleCount: nextCount
|
|
24
|
-
};
|
|
25
|
-
return this.snapshot;
|
|
26
|
-
}
|
|
27
|
-
getSnapshot() {
|
|
28
|
-
return this.snapshot;
|
|
29
|
-
}
|
|
30
|
-
mix(current, next, weight) {
|
|
31
|
-
if (this.snapshot.sampleCount === 0) {
|
|
32
|
-
return next;
|
|
33
|
-
}
|
|
34
|
-
return current * (1 - weight) + next * weight;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
export {
|
|
38
|
-
BaselineCalculator
|
|
39
|
-
};
|
|
1
|
+
class a{alpha;snapshot={latencyP99:0,latencyP95:0,errorRate:0,throughput:0,sampleCount:0};constructor(t=0.2){this.alpha=t}update(t){let{sampleCount:e}=this.snapshot,n=e+1,r=e===0?1:this.alpha;return this.snapshot={latencyP99:this.mix(this.snapshot.latencyP99,t.latencyP99,r),latencyP95:this.mix(this.snapshot.latencyP95,t.latencyP95,r),errorRate:this.mix(this.snapshot.errorRate,t.errorRate,r),throughput:this.mix(this.snapshot.throughput,t.throughput,r),sampleCount:n},this.snapshot}getSnapshot(){return this.snapshot}mix(t,e,n){if(this.snapshot.sampleCount===0)return e;return t*(1-n)+e*n}}export{a as BaselineCalculator};
|
|
@@ -1,31 +1 @@
|
|
|
1
|
-
|
|
2
|
-
class RootCauseAnalyzer {
|
|
3
|
-
lookbackMs;
|
|
4
|
-
constructor(lookbackMs = 15 * 60 * 1000) {
|
|
5
|
-
this.lookbackMs = lookbackMs;
|
|
6
|
-
}
|
|
7
|
-
analyze(signal, deployments) {
|
|
8
|
-
const windowStart = new Date(signal.point.timestamp.getTime() - this.lookbackMs);
|
|
9
|
-
const candidates = deployments.filter((deployment) => deployment.deployedAt >= windowStart).sort((a, b) => b.deployedAt.getTime() - a.deployedAt.getTime());
|
|
10
|
-
const notes = [];
|
|
11
|
-
let culprit;
|
|
12
|
-
if (candidates.length > 0) {
|
|
13
|
-
culprit = candidates[0];
|
|
14
|
-
if (culprit) {
|
|
15
|
-
notes.push(`Closest deployment ${culprit.id} (${culprit.operation}) at ${culprit.deployedAt.toISOString()}`);
|
|
16
|
-
}
|
|
17
|
-
} else {
|
|
18
|
-
notes.push("No deployments found within lookback window.");
|
|
19
|
-
}
|
|
20
|
-
if (signal.type === "latency_regression") {
|
|
21
|
-
notes.push("Verify recent schema changes and external dependency latency.");
|
|
22
|
-
}
|
|
23
|
-
if (signal.type === "error_rate_spike") {
|
|
24
|
-
notes.push("Check SLO monitor for correlated incidents.");
|
|
25
|
-
}
|
|
26
|
-
return { signal, culprit, notes };
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
export {
|
|
30
|
-
RootCauseAnalyzer
|
|
31
|
-
};
|
|
1
|
+
class l{lookbackMs;constructor(t=900000){this.lookbackMs=t}analyze(t,i){let s=new Date(t.point.timestamp.getTime()-this.lookbackMs),r=i.filter((n)=>n.deployedAt>=s).sort((n,a)=>a.deployedAt.getTime()-n.deployedAt.getTime()),o=[],e;if(r.length>0){if(e=r[0],e)o.push(`Closest deployment ${e.id} (${e.operation}) at ${e.deployedAt.toISOString()}`)}else o.push("No deployments found within lookback window.");if(t.type==="latency_regression")o.push("Verify recent schema changes and external dependency latency.");if(t.type==="error_rate_spike")o.push("Check SLO monitor for correlated incidents.");return{signal:t,culprit:e,notes:o}}}export{l as RootCauseAnalyzer};
|