@contractspec/lib.observability 3.7.16 → 3.7.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/anomaly/alert-manager.js +1 -23
- package/dist/anomaly/anomaly-detector.js +1 -101
- package/dist/anomaly/baseline-calculator.js +1 -39
- package/dist/anomaly/root-cause-analyzer.js +1 -31
- package/dist/index.js +5 -1128
- package/dist/intent/aggregator.js +1 -109
- package/dist/intent/detector.js +1 -132
- package/dist/logging/index.js +1 -41
- package/dist/metrics/index.js +1 -30
- package/dist/node/anomaly/alert-manager.js +1 -23
- package/dist/node/anomaly/anomaly-detector.js +1 -101
- package/dist/node/anomaly/baseline-calculator.js +1 -39
- package/dist/node/anomaly/root-cause-analyzer.js +1 -31
- package/dist/node/index.js +5 -1128
- package/dist/node/intent/aggregator.js +1 -109
- package/dist/node/intent/detector.js +1 -132
- package/dist/node/logging/index.js +1 -41
- package/dist/node/metrics/index.js +1 -30
- package/dist/node/pipeline/evolution-pipeline.js +1 -299
- package/dist/node/pipeline/lifecycle-pipeline.js +1 -85
- package/dist/node/telemetry/model-selection-telemetry.js +1 -30
- package/dist/node/telemetry/posthog-baseline-reader.js +5 -308
- package/dist/node/telemetry/posthog-telemetry.js +1 -60
- package/dist/node/tracing/core.js +1 -52
- package/dist/node/tracing/index.js +1 -75
- package/dist/node/tracing/middleware.js +1 -171
- package/dist/node/tracing/model-selection.span.js +1 -72
- package/dist/pipeline/evolution-pipeline.js +1 -299
- package/dist/pipeline/lifecycle-pipeline.js +1 -85
- package/dist/telemetry/model-selection-telemetry.js +1 -30
- package/dist/telemetry/posthog-baseline-reader.js +5 -308
- package/dist/telemetry/posthog-telemetry.js +1 -60
- package/dist/tracing/core.js +1 -52
- package/dist/tracing/index.js +1 -75
- package/dist/tracing/middleware.js +1 -171
- package/dist/tracing/model-selection.span.js +1 -72
- package/package.json +8 -8
|
@@ -1,109 +1 @@
|
|
|
1
|
-
|
|
2
|
-
var DEFAULT_WINDOW_MS = 15 * 60 * 1000;
|
|
3
|
-
|
|
4
|
-
class IntentAggregator {
|
|
5
|
-
windowMs;
|
|
6
|
-
sequenceSampleSize;
|
|
7
|
-
samples = [];
|
|
8
|
-
constructor(options = {}) {
|
|
9
|
-
this.windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;
|
|
10
|
-
this.sequenceSampleSize = options.sequenceSampleSize ?? 1000;
|
|
11
|
-
}
|
|
12
|
-
add(sample) {
|
|
13
|
-
this.samples.push(sample);
|
|
14
|
-
}
|
|
15
|
-
flush(now = new Date) {
|
|
16
|
-
const minTimestamp = now.getTime() - this.windowMs;
|
|
17
|
-
const windowSamples = this.samples.filter((sample) => sample.timestamp.getTime() >= minTimestamp);
|
|
18
|
-
this.samples.length = 0;
|
|
19
|
-
const metrics = this.aggregateMetrics(windowSamples);
|
|
20
|
-
const sequences = this.buildSequences(windowSamples);
|
|
21
|
-
const timestamps = windowSamples.map((sample) => sample.timestamp.getTime());
|
|
22
|
-
return {
|
|
23
|
-
metrics,
|
|
24
|
-
sequences,
|
|
25
|
-
sampleCount: windowSamples.length,
|
|
26
|
-
windowStart: timestamps.length ? new Date(Math.min(...timestamps)) : undefined,
|
|
27
|
-
windowEnd: timestamps.length ? new Date(Math.max(...timestamps)) : undefined
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
aggregateMetrics(samples) {
|
|
31
|
-
if (!samples.length)
|
|
32
|
-
return [];
|
|
33
|
-
const groups = new Map;
|
|
34
|
-
for (const sample of samples) {
|
|
35
|
-
const key = `${sample.operation.name}.v${sample.operation.version}`;
|
|
36
|
-
const arr = groups.get(key) ?? [];
|
|
37
|
-
arr.push(sample);
|
|
38
|
-
groups.set(key, arr);
|
|
39
|
-
}
|
|
40
|
-
return [...groups.values()].map((group) => {
|
|
41
|
-
const first = group[0];
|
|
42
|
-
if (!first)
|
|
43
|
-
throw new Error("Empty group in aggregation");
|
|
44
|
-
const durations = group.map((s) => s.durationMs).sort((a, b) => a - b);
|
|
45
|
-
const errors = group.filter((s) => !s.success);
|
|
46
|
-
const totalCalls = group.length;
|
|
47
|
-
const topErrors = errors.reduce((acc, sample) => {
|
|
48
|
-
if (!sample.errorCode)
|
|
49
|
-
return acc;
|
|
50
|
-
acc[sample.errorCode] = (acc[sample.errorCode] ?? 0) + 1;
|
|
51
|
-
return acc;
|
|
52
|
-
}, {});
|
|
53
|
-
const timestamps = group.map((s) => s.timestamp.getTime());
|
|
54
|
-
return {
|
|
55
|
-
operation: first.operation,
|
|
56
|
-
totalCalls,
|
|
57
|
-
successRate: (totalCalls - errors.length) / totalCalls,
|
|
58
|
-
errorRate: errors.length / totalCalls,
|
|
59
|
-
averageLatencyMs: durations.reduce((sum, value) => sum + value, 0) / totalCalls,
|
|
60
|
-
p95LatencyMs: percentile(durations, 0.95),
|
|
61
|
-
p99LatencyMs: percentile(durations, 0.99),
|
|
62
|
-
maxLatencyMs: Math.max(...durations),
|
|
63
|
-
windowStart: new Date(Math.min(...timestamps)),
|
|
64
|
-
windowEnd: new Date(Math.max(...timestamps)),
|
|
65
|
-
topErrors
|
|
66
|
-
};
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
buildSequences(samples) {
|
|
70
|
-
const byTrace = new Map;
|
|
71
|
-
for (const sample of samples.slice(-this.sequenceSampleSize)) {
|
|
72
|
-
if (!sample.traceId)
|
|
73
|
-
continue;
|
|
74
|
-
const arr = byTrace.get(sample.traceId) ?? [];
|
|
75
|
-
arr.push(sample);
|
|
76
|
-
byTrace.set(sample.traceId, arr);
|
|
77
|
-
}
|
|
78
|
-
const sequences = {};
|
|
79
|
-
for (const events of byTrace.values()) {
|
|
80
|
-
const ordered = events.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
81
|
-
const steps = ordered.map((event) => event.operation.name);
|
|
82
|
-
if (steps.length < 2)
|
|
83
|
-
continue;
|
|
84
|
-
const key = `${steps.join(">")}@${ordered[0]?.tenantId ?? "global"}`;
|
|
85
|
-
const existing = sequences[key];
|
|
86
|
-
if (existing) {
|
|
87
|
-
existing.count += 1;
|
|
88
|
-
} else {
|
|
89
|
-
sequences[key] = {
|
|
90
|
-
steps,
|
|
91
|
-
tenantId: ordered[0]?.tenantId,
|
|
92
|
-
count: 1
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
return Object.values(sequences).sort((a, b) => b.count - a.count);
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
function percentile(values, ratio) {
|
|
100
|
-
if (!values.length)
|
|
101
|
-
return 0;
|
|
102
|
-
if (values.length === 1)
|
|
103
|
-
return values[0] ?? 0;
|
|
104
|
-
const index = Math.min(values.length - 1, Math.floor(ratio * values.length));
|
|
105
|
-
return values[index] ?? 0;
|
|
106
|
-
}
|
|
107
|
-
export {
|
|
108
|
-
IntentAggregator
|
|
109
|
-
};
|
|
1
|
+
class u{windowMs;sequenceSampleSize;samples=[];constructor(t={}){this.windowMs=t.windowMs??900000,this.sequenceSampleSize=t.sequenceSampleSize??1000}add(t){this.samples.push(t)}flush(t=new Date){let i=t.getTime()-this.windowMs,e=this.samples.filter((o)=>o.timestamp.getTime()>=i);this.samples.length=0;let r=this.aggregateMetrics(e),n=this.buildSequences(e),a=e.map((o)=>o.timestamp.getTime());return{metrics:r,sequences:n,sampleCount:e.length,windowStart:a.length?new Date(Math.min(...a)):void 0,windowEnd:a.length?new Date(Math.max(...a)):void 0}}aggregateMetrics(t){if(!t.length)return[];let i=new Map;for(let e of t){let r=`${e.operation.name}.v${e.operation.version}`,n=i.get(r)??[];n.push(e),i.set(r,n)}return[...i.values()].map((e)=>{let r=e[0];if(!r)throw Error("Empty group in aggregation");let n=e.map((s)=>s.durationMs).sort((s,c)=>s-c),a=e.filter((s)=>!s.success),o=e.length,g=a.reduce((s,c)=>{if(!c.errorCode)return s;return s[c.errorCode]=(s[c.errorCode]??0)+1,s},{}),m=e.map((s)=>s.timestamp.getTime());return{operation:r.operation,totalCalls:o,successRate:(o-a.length)/o,errorRate:a.length/o,averageLatencyMs:n.reduce((s,c)=>s+c,0)/o,p95LatencyMs:p(n,0.95),p99LatencyMs:p(n,0.99),maxLatencyMs:Math.max(...n),windowStart:new Date(Math.min(...m)),windowEnd:new Date(Math.max(...m)),topErrors:g}})}buildSequences(t){let i=new Map;for(let r of t.slice(-this.sequenceSampleSize)){if(!r.traceId)continue;let n=i.get(r.traceId)??[];n.push(r),i.set(r.traceId,n)}let e={};for(let r of i.values()){let n=r.sort((m,s)=>m.timestamp.getTime()-s.timestamp.getTime()),a=n.map((m)=>m.operation.name);if(a.length<2)continue;let o=`${a.join(">")}@${n[0]?.tenantId??"global"}`,g=e[o];if(g)g.count+=1;else e[o]={steps:a,tenantId:n[0]?.tenantId,count:1}}return Object.values(e).sort((r,n)=>n.count-r.count)}}function p(t,i){if(!t.length)return 0;if(t.length===1)return t[0]??0;let e=Math.min(t.length-1,Math.floor(i*t.length));return t[e]??0}export{u as IntentAggregator};
|
|
@@ -1,132 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import { randomUUID } from "crypto";
|
|
3
|
-
var DEFAULTS = {
|
|
4
|
-
errorRateThreshold: 0.05,
|
|
5
|
-
latencyP99ThresholdMs: 750,
|
|
6
|
-
throughputDropThreshold: 0.3,
|
|
7
|
-
minSequenceLength: 3
|
|
8
|
-
};
|
|
9
|
-
|
|
10
|
-
class IntentDetector {
|
|
11
|
-
options;
|
|
12
|
-
constructor(options = {}) {
|
|
13
|
-
this.options = {
|
|
14
|
-
errorRateThreshold: options.errorRateThreshold ?? DEFAULTS.errorRateThreshold,
|
|
15
|
-
latencyP99ThresholdMs: options.latencyP99ThresholdMs ?? DEFAULTS.latencyP99ThresholdMs,
|
|
16
|
-
throughputDropThreshold: options.throughputDropThreshold ?? DEFAULTS.throughputDropThreshold,
|
|
17
|
-
minSequenceLength: options.minSequenceLength ?? DEFAULTS.minSequenceLength
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
detectFromMetrics(current, previous) {
|
|
21
|
-
const signals = [];
|
|
22
|
-
const baseline = new Map((previous ?? []).map((metric) => [
|
|
23
|
-
`${metric.operation.name}.v${metric.operation.version}`,
|
|
24
|
-
metric
|
|
25
|
-
]));
|
|
26
|
-
for (const metric of current) {
|
|
27
|
-
if (metric.errorRate >= this.options.errorRateThreshold) {
|
|
28
|
-
signals.push({
|
|
29
|
-
id: randomUUID(),
|
|
30
|
-
type: "error-spike",
|
|
31
|
-
operation: metric.operation,
|
|
32
|
-
confidence: Math.min(1, metric.errorRate / this.options.errorRateThreshold),
|
|
33
|
-
description: `Error rate ${metric.errorRate.toFixed(2)} exceeded threshold`,
|
|
34
|
-
metadata: {
|
|
35
|
-
errorRate: metric.errorRate,
|
|
36
|
-
topErrors: metric.topErrors
|
|
37
|
-
},
|
|
38
|
-
evidence: [
|
|
39
|
-
{
|
|
40
|
-
type: "metric",
|
|
41
|
-
description: "error-rate",
|
|
42
|
-
data: {
|
|
43
|
-
errorRate: metric.errorRate,
|
|
44
|
-
threshold: this.options.errorRateThreshold
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
]
|
|
48
|
-
});
|
|
49
|
-
continue;
|
|
50
|
-
}
|
|
51
|
-
if (metric.p99LatencyMs >= this.options.latencyP99ThresholdMs) {
|
|
52
|
-
signals.push({
|
|
53
|
-
id: randomUUID(),
|
|
54
|
-
type: "latency-regression",
|
|
55
|
-
operation: metric.operation,
|
|
56
|
-
confidence: Math.min(1, metric.p99LatencyMs / this.options.latencyP99ThresholdMs),
|
|
57
|
-
description: `P99 latency ${metric.p99LatencyMs}ms exceeded threshold`,
|
|
58
|
-
metadata: { p99LatencyMs: metric.p99LatencyMs },
|
|
59
|
-
evidence: [
|
|
60
|
-
{
|
|
61
|
-
type: "metric",
|
|
62
|
-
description: "p99-latency",
|
|
63
|
-
data: {
|
|
64
|
-
p99LatencyMs: metric.p99LatencyMs,
|
|
65
|
-
threshold: this.options.latencyP99ThresholdMs
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
]
|
|
69
|
-
});
|
|
70
|
-
continue;
|
|
71
|
-
}
|
|
72
|
-
const base = baseline.get(`${metric.operation.name}.v${metric.operation.version}`);
|
|
73
|
-
if (base) {
|
|
74
|
-
const drop = (base.totalCalls - metric.totalCalls) / Math.max(base.totalCalls, 1);
|
|
75
|
-
if (drop >= this.options.throughputDropThreshold) {
|
|
76
|
-
signals.push({
|
|
77
|
-
id: randomUUID(),
|
|
78
|
-
type: "throughput-drop",
|
|
79
|
-
operation: metric.operation,
|
|
80
|
-
confidence: Math.min(1, drop / this.options.throughputDropThreshold),
|
|
81
|
-
description: `Throughput dropped ${(drop * 100).toFixed(1)}% vs baseline`,
|
|
82
|
-
metadata: {
|
|
83
|
-
baselineCalls: base.totalCalls,
|
|
84
|
-
currentCalls: metric.totalCalls
|
|
85
|
-
},
|
|
86
|
-
evidence: [
|
|
87
|
-
{
|
|
88
|
-
type: "metric",
|
|
89
|
-
description: "throughput-drop",
|
|
90
|
-
data: {
|
|
91
|
-
baselineCalls: base.totalCalls,
|
|
92
|
-
currentCalls: metric.totalCalls
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
]
|
|
96
|
-
});
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return signals;
|
|
101
|
-
}
|
|
102
|
-
detectSequentialIntents(sequences) {
|
|
103
|
-
const signals = [];
|
|
104
|
-
for (const sequence of sequences) {
|
|
105
|
-
if (sequence.steps.length < this.options.minSequenceLength)
|
|
106
|
-
continue;
|
|
107
|
-
const description = sequence.steps.join(" → ");
|
|
108
|
-
signals.push({
|
|
109
|
-
id: randomUUID(),
|
|
110
|
-
type: "missing-workflow-step",
|
|
111
|
-
confidence: 0.6,
|
|
112
|
-
description: `Repeated workflow detected: ${description}`,
|
|
113
|
-
metadata: {
|
|
114
|
-
steps: sequence.steps,
|
|
115
|
-
tenantId: sequence.tenantId,
|
|
116
|
-
occurrences: sequence.count
|
|
117
|
-
},
|
|
118
|
-
evidence: [
|
|
119
|
-
{
|
|
120
|
-
type: "sequence",
|
|
121
|
-
description: "sequential-calls",
|
|
122
|
-
data: { steps: sequence.steps, count: sequence.count }
|
|
123
|
-
}
|
|
124
|
-
]
|
|
125
|
-
});
|
|
126
|
-
}
|
|
127
|
-
return signals;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
export {
|
|
131
|
-
IntentDetector
|
|
132
|
-
};
|
|
1
|
+
import{randomUUID as s}from"crypto";var a={errorRateThreshold:0.05,latencyP99ThresholdMs:750,throughputDropThreshold:0.3,minSequenceLength:3};class c{options;constructor(r={}){this.options={errorRateThreshold:r.errorRateThreshold??a.errorRateThreshold,latencyP99ThresholdMs:r.latencyP99ThresholdMs??a.latencyP99ThresholdMs,throughputDropThreshold:r.throughputDropThreshold??a.throughputDropThreshold,minSequenceLength:r.minSequenceLength??a.minSequenceLength}}detectFromMetrics(r,n){let t=[],i=new Map((n??[]).map((e)=>[`${e.operation.name}.v${e.operation.version}`,e]));for(let e of r){if(e.errorRate>=this.options.errorRateThreshold){t.push({id:s(),type:"error-spike",operation:e.operation,confidence:Math.min(1,e.errorRate/this.options.errorRateThreshold),description:`Error rate ${e.errorRate.toFixed(2)} exceeded threshold`,metadata:{errorRate:e.errorRate,topErrors:e.topErrors},evidence:[{type:"metric",description:"error-rate",data:{errorRate:e.errorRate,threshold:this.options.errorRateThreshold}}]});continue}if(e.p99LatencyMs>=this.options.latencyP99ThresholdMs){t.push({id:s(),type:"latency-regression",operation:e.operation,confidence:Math.min(1,e.p99LatencyMs/this.options.latencyP99ThresholdMs),description:`P99 latency ${e.p99LatencyMs}ms exceeded threshold`,metadata:{p99LatencyMs:e.p99LatencyMs},evidence:[{type:"metric",description:"p99-latency",data:{p99LatencyMs:e.p99LatencyMs,threshold:this.options.latencyP99ThresholdMs}}]});continue}let o=i.get(`${e.operation.name}.v${e.operation.version}`);if(o){let p=(o.totalCalls-e.totalCalls)/Math.max(o.totalCalls,1);if(p>=this.options.throughputDropThreshold)t.push({id:s(),type:"throughput-drop",operation:e.operation,confidence:Math.min(1,p/this.options.throughputDropThreshold),description:`Throughput dropped ${(p*100).toFixed(1)}% vs baseline`,metadata:{baselineCalls:o.totalCalls,currentCalls:e.totalCalls},evidence:[{type:"metric",description:"throughput-drop",data:{baselineCalls:o.totalCalls,currentCalls:e.totalCalls}}]})}}return t}detectSequentialIntents(r){let n=[];for(let t of r){if(t.steps.length<this.options.minSequenceLength)continue;let i=t.steps.join(" → ");n.push({id:s(),type:"missing-workflow-step",confidence:0.6,description:`Repeated workflow detected: ${i}`,metadata:{steps:t.steps,tenantId:t.tenantId,occurrences:t.count},evidence:[{type:"sequence",description:"sequential-calls",data:{steps:t.steps,count:t.count}}]})}return n}}export{c as IntentDetector};
|
|
@@ -1,41 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import { context, trace } from "@opentelemetry/api";
|
|
3
|
-
|
|
4
|
-
class Logger {
|
|
5
|
-
serviceName;
|
|
6
|
-
constructor(serviceName) {
|
|
7
|
-
this.serviceName = serviceName;
|
|
8
|
-
}
|
|
9
|
-
log(level, message, meta = {}) {
|
|
10
|
-
const span = trace.getSpan(context.active());
|
|
11
|
-
const traceId = span?.spanContext().traceId;
|
|
12
|
-
const spanId = span?.spanContext().spanId;
|
|
13
|
-
const entry = {
|
|
14
|
-
timestamp: new Date().toISOString(),
|
|
15
|
-
service: this.serviceName,
|
|
16
|
-
level,
|
|
17
|
-
message,
|
|
18
|
-
traceId,
|
|
19
|
-
spanId,
|
|
20
|
-
...meta
|
|
21
|
-
};
|
|
22
|
-
console.log(JSON.stringify(entry));
|
|
23
|
-
}
|
|
24
|
-
debug(message, meta) {
|
|
25
|
-
this.log("debug", message, meta);
|
|
26
|
-
}
|
|
27
|
-
info(message, meta) {
|
|
28
|
-
this.log("info", message, meta);
|
|
29
|
-
}
|
|
30
|
-
warn(message, meta) {
|
|
31
|
-
this.log("warn", message, meta);
|
|
32
|
-
}
|
|
33
|
-
error(message, meta) {
|
|
34
|
-
this.log("error", message, meta);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
var logger = new Logger(process.env.OTEL_SERVICE_NAME || "unknown-service");
|
|
38
|
-
export {
|
|
39
|
-
logger,
|
|
40
|
-
Logger
|
|
41
|
-
};
|
|
1
|
+
import{context as c,trace as a}from"@opentelemetry/api";class t{serviceName;constructor(n){this.serviceName=n}log(n,e,o={}){let r=a.getSpan(c.active()),s=r?.spanContext().traceId,i=r?.spanContext().spanId,g={timestamp:new Date().toISOString(),service:this.serviceName,level:n,message:e,traceId:s,spanId:i,...o};console.log(JSON.stringify(g))}debug(n,e){this.log("debug",n,e)}info(n,e){this.log("info",n,e)}warn(n,e){this.log("warn",n,e)}error(n,e){this.log("error",n,e)}}var l=new t(process.env.OTEL_SERVICE_NAME||"unknown-service");export{l as logger,t as Logger};
|
|
@@ -1,30 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
3
|
-
metrics
|
|
4
|
-
} from "@opentelemetry/api";
|
|
5
|
-
var DEFAULT_METER_NAME = "@contractspec/lib.observability";
|
|
6
|
-
function getMeter(name = DEFAULT_METER_NAME) {
|
|
7
|
-
return metrics.getMeter(name);
|
|
8
|
-
}
|
|
9
|
-
function createCounter(name, description, meterName) {
|
|
10
|
-
return getMeter(meterName).createCounter(name, { description });
|
|
11
|
-
}
|
|
12
|
-
function createUpDownCounter(name, description, meterName) {
|
|
13
|
-
return getMeter(meterName).createUpDownCounter(name, { description });
|
|
14
|
-
}
|
|
15
|
-
function createHistogram(name, description, meterName) {
|
|
16
|
-
return getMeter(meterName).createHistogram(name, { description });
|
|
17
|
-
}
|
|
18
|
-
var standardMetrics = {
|
|
19
|
-
httpRequests: createCounter("http_requests_total", "Total HTTP requests"),
|
|
20
|
-
httpDuration: createHistogram("http_request_duration_seconds", "HTTP request duration"),
|
|
21
|
-
operationErrors: createCounter("operation_errors_total", "Total operation errors"),
|
|
22
|
-
workflowDuration: createHistogram("workflow_duration_seconds", "Workflow execution duration")
|
|
23
|
-
};
|
|
24
|
-
export {
|
|
25
|
-
standardMetrics,
|
|
26
|
-
getMeter,
|
|
27
|
-
createUpDownCounter,
|
|
28
|
-
createHistogram,
|
|
29
|
-
createCounter
|
|
30
|
-
};
|
|
1
|
+
import{metrics as i}from"@opentelemetry/api";var u="@contractspec/lib.observability";function o(t=u){return i.getMeter(t)}function n(t,r,e){return o(e).createCounter(t,{description:r})}function p(t,r,e){return o(e).createUpDownCounter(t,{description:r})}function s(t,r,e){return o(e).createHistogram(t,{description:r})}var c={httpRequests:n("http_requests_total","Total HTTP requests"),httpDuration:s("http_request_duration_seconds","HTTP request duration"),operationErrors:n("operation_errors_total","Total operation errors"),workflowDuration:s("workflow_duration_seconds","Workflow execution duration")};export{c as standardMetrics,o as getMeter,p as createUpDownCounter,s as createHistogram,n as createCounter};
|
|
@@ -1,299 +1 @@
|
|
|
1
|
-
|
|
2
|
-
var DEFAULT_WINDOW_MS = 15 * 60 * 1000;
|
|
3
|
-
|
|
4
|
-
class IntentAggregator {
|
|
5
|
-
windowMs;
|
|
6
|
-
sequenceSampleSize;
|
|
7
|
-
samples = [];
|
|
8
|
-
constructor(options = {}) {
|
|
9
|
-
this.windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;
|
|
10
|
-
this.sequenceSampleSize = options.sequenceSampleSize ?? 1000;
|
|
11
|
-
}
|
|
12
|
-
add(sample) {
|
|
13
|
-
this.samples.push(sample);
|
|
14
|
-
}
|
|
15
|
-
flush(now = new Date) {
|
|
16
|
-
const minTimestamp = now.getTime() - this.windowMs;
|
|
17
|
-
const windowSamples = this.samples.filter((sample) => sample.timestamp.getTime() >= minTimestamp);
|
|
18
|
-
this.samples.length = 0;
|
|
19
|
-
const metrics = this.aggregateMetrics(windowSamples);
|
|
20
|
-
const sequences = this.buildSequences(windowSamples);
|
|
21
|
-
const timestamps = windowSamples.map((sample) => sample.timestamp.getTime());
|
|
22
|
-
return {
|
|
23
|
-
metrics,
|
|
24
|
-
sequences,
|
|
25
|
-
sampleCount: windowSamples.length,
|
|
26
|
-
windowStart: timestamps.length ? new Date(Math.min(...timestamps)) : undefined,
|
|
27
|
-
windowEnd: timestamps.length ? new Date(Math.max(...timestamps)) : undefined
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
aggregateMetrics(samples) {
|
|
31
|
-
if (!samples.length)
|
|
32
|
-
return [];
|
|
33
|
-
const groups = new Map;
|
|
34
|
-
for (const sample of samples) {
|
|
35
|
-
const key = `${sample.operation.name}.v${sample.operation.version}`;
|
|
36
|
-
const arr = groups.get(key) ?? [];
|
|
37
|
-
arr.push(sample);
|
|
38
|
-
groups.set(key, arr);
|
|
39
|
-
}
|
|
40
|
-
return [...groups.values()].map((group) => {
|
|
41
|
-
const first = group[0];
|
|
42
|
-
if (!first)
|
|
43
|
-
throw new Error("Empty group in aggregation");
|
|
44
|
-
const durations = group.map((s) => s.durationMs).sort((a, b) => a - b);
|
|
45
|
-
const errors = group.filter((s) => !s.success);
|
|
46
|
-
const totalCalls = group.length;
|
|
47
|
-
const topErrors = errors.reduce((acc, sample) => {
|
|
48
|
-
if (!sample.errorCode)
|
|
49
|
-
return acc;
|
|
50
|
-
acc[sample.errorCode] = (acc[sample.errorCode] ?? 0) + 1;
|
|
51
|
-
return acc;
|
|
52
|
-
}, {});
|
|
53
|
-
const timestamps = group.map((s) => s.timestamp.getTime());
|
|
54
|
-
return {
|
|
55
|
-
operation: first.operation,
|
|
56
|
-
totalCalls,
|
|
57
|
-
successRate: (totalCalls - errors.length) / totalCalls,
|
|
58
|
-
errorRate: errors.length / totalCalls,
|
|
59
|
-
averageLatencyMs: durations.reduce((sum, value) => sum + value, 0) / totalCalls,
|
|
60
|
-
p95LatencyMs: percentile(durations, 0.95),
|
|
61
|
-
p99LatencyMs: percentile(durations, 0.99),
|
|
62
|
-
maxLatencyMs: Math.max(...durations),
|
|
63
|
-
windowStart: new Date(Math.min(...timestamps)),
|
|
64
|
-
windowEnd: new Date(Math.max(...timestamps)),
|
|
65
|
-
topErrors
|
|
66
|
-
};
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
buildSequences(samples) {
|
|
70
|
-
const byTrace = new Map;
|
|
71
|
-
for (const sample of samples.slice(-this.sequenceSampleSize)) {
|
|
72
|
-
if (!sample.traceId)
|
|
73
|
-
continue;
|
|
74
|
-
const arr = byTrace.get(sample.traceId) ?? [];
|
|
75
|
-
arr.push(sample);
|
|
76
|
-
byTrace.set(sample.traceId, arr);
|
|
77
|
-
}
|
|
78
|
-
const sequences = {};
|
|
79
|
-
for (const events of byTrace.values()) {
|
|
80
|
-
const ordered = events.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
81
|
-
const steps = ordered.map((event) => event.operation.name);
|
|
82
|
-
if (steps.length < 2)
|
|
83
|
-
continue;
|
|
84
|
-
const key = `${steps.join(">")}@${ordered[0]?.tenantId ?? "global"}`;
|
|
85
|
-
const existing = sequences[key];
|
|
86
|
-
if (existing) {
|
|
87
|
-
existing.count += 1;
|
|
88
|
-
} else {
|
|
89
|
-
sequences[key] = {
|
|
90
|
-
steps,
|
|
91
|
-
tenantId: ordered[0]?.tenantId,
|
|
92
|
-
count: 1
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
return Object.values(sequences).sort((a, b) => b.count - a.count);
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
function percentile(values, ratio) {
|
|
100
|
-
if (!values.length)
|
|
101
|
-
return 0;
|
|
102
|
-
if (values.length === 1)
|
|
103
|
-
return values[0] ?? 0;
|
|
104
|
-
const index = Math.min(values.length - 1, Math.floor(ratio * values.length));
|
|
105
|
-
return values[index] ?? 0;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// src/intent/detector.ts
|
|
109
|
-
import { randomUUID } from "crypto";
|
|
110
|
-
var DEFAULTS = {
|
|
111
|
-
errorRateThreshold: 0.05,
|
|
112
|
-
latencyP99ThresholdMs: 750,
|
|
113
|
-
throughputDropThreshold: 0.3,
|
|
114
|
-
minSequenceLength: 3
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
class IntentDetector {
|
|
118
|
-
options;
|
|
119
|
-
constructor(options = {}) {
|
|
120
|
-
this.options = {
|
|
121
|
-
errorRateThreshold: options.errorRateThreshold ?? DEFAULTS.errorRateThreshold,
|
|
122
|
-
latencyP99ThresholdMs: options.latencyP99ThresholdMs ?? DEFAULTS.latencyP99ThresholdMs,
|
|
123
|
-
throughputDropThreshold: options.throughputDropThreshold ?? DEFAULTS.throughputDropThreshold,
|
|
124
|
-
minSequenceLength: options.minSequenceLength ?? DEFAULTS.minSequenceLength
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
detectFromMetrics(current, previous) {
|
|
128
|
-
const signals = [];
|
|
129
|
-
const baseline = new Map((previous ?? []).map((metric) => [
|
|
130
|
-
`${metric.operation.name}.v${metric.operation.version}`,
|
|
131
|
-
metric
|
|
132
|
-
]));
|
|
133
|
-
for (const metric of current) {
|
|
134
|
-
if (metric.errorRate >= this.options.errorRateThreshold) {
|
|
135
|
-
signals.push({
|
|
136
|
-
id: randomUUID(),
|
|
137
|
-
type: "error-spike",
|
|
138
|
-
operation: metric.operation,
|
|
139
|
-
confidence: Math.min(1, metric.errorRate / this.options.errorRateThreshold),
|
|
140
|
-
description: `Error rate ${metric.errorRate.toFixed(2)} exceeded threshold`,
|
|
141
|
-
metadata: {
|
|
142
|
-
errorRate: metric.errorRate,
|
|
143
|
-
topErrors: metric.topErrors
|
|
144
|
-
},
|
|
145
|
-
evidence: [
|
|
146
|
-
{
|
|
147
|
-
type: "metric",
|
|
148
|
-
description: "error-rate",
|
|
149
|
-
data: {
|
|
150
|
-
errorRate: metric.errorRate,
|
|
151
|
-
threshold: this.options.errorRateThreshold
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
]
|
|
155
|
-
});
|
|
156
|
-
continue;
|
|
157
|
-
}
|
|
158
|
-
if (metric.p99LatencyMs >= this.options.latencyP99ThresholdMs) {
|
|
159
|
-
signals.push({
|
|
160
|
-
id: randomUUID(),
|
|
161
|
-
type: "latency-regression",
|
|
162
|
-
operation: metric.operation,
|
|
163
|
-
confidence: Math.min(1, metric.p99LatencyMs / this.options.latencyP99ThresholdMs),
|
|
164
|
-
description: `P99 latency ${metric.p99LatencyMs}ms exceeded threshold`,
|
|
165
|
-
metadata: { p99LatencyMs: metric.p99LatencyMs },
|
|
166
|
-
evidence: [
|
|
167
|
-
{
|
|
168
|
-
type: "metric",
|
|
169
|
-
description: "p99-latency",
|
|
170
|
-
data: {
|
|
171
|
-
p99LatencyMs: metric.p99LatencyMs,
|
|
172
|
-
threshold: this.options.latencyP99ThresholdMs
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
]
|
|
176
|
-
});
|
|
177
|
-
continue;
|
|
178
|
-
}
|
|
179
|
-
const base = baseline.get(`${metric.operation.name}.v${metric.operation.version}`);
|
|
180
|
-
if (base) {
|
|
181
|
-
const drop = (base.totalCalls - metric.totalCalls) / Math.max(base.totalCalls, 1);
|
|
182
|
-
if (drop >= this.options.throughputDropThreshold) {
|
|
183
|
-
signals.push({
|
|
184
|
-
id: randomUUID(),
|
|
185
|
-
type: "throughput-drop",
|
|
186
|
-
operation: metric.operation,
|
|
187
|
-
confidence: Math.min(1, drop / this.options.throughputDropThreshold),
|
|
188
|
-
description: `Throughput dropped ${(drop * 100).toFixed(1)}% vs baseline`,
|
|
189
|
-
metadata: {
|
|
190
|
-
baselineCalls: base.totalCalls,
|
|
191
|
-
currentCalls: metric.totalCalls
|
|
192
|
-
},
|
|
193
|
-
evidence: [
|
|
194
|
-
{
|
|
195
|
-
type: "metric",
|
|
196
|
-
description: "throughput-drop",
|
|
197
|
-
data: {
|
|
198
|
-
baselineCalls: base.totalCalls,
|
|
199
|
-
currentCalls: metric.totalCalls
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
]
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
return signals;
|
|
208
|
-
}
|
|
209
|
-
detectSequentialIntents(sequences) {
|
|
210
|
-
const signals = [];
|
|
211
|
-
for (const sequence of sequences) {
|
|
212
|
-
if (sequence.steps.length < this.options.minSequenceLength)
|
|
213
|
-
continue;
|
|
214
|
-
const description = sequence.steps.join(" → ");
|
|
215
|
-
signals.push({
|
|
216
|
-
id: randomUUID(),
|
|
217
|
-
type: "missing-workflow-step",
|
|
218
|
-
confidence: 0.6,
|
|
219
|
-
description: `Repeated workflow detected: ${description}`,
|
|
220
|
-
metadata: {
|
|
221
|
-
steps: sequence.steps,
|
|
222
|
-
tenantId: sequence.tenantId,
|
|
223
|
-
occurrences: sequence.count
|
|
224
|
-
},
|
|
225
|
-
evidence: [
|
|
226
|
-
{
|
|
227
|
-
type: "sequence",
|
|
228
|
-
description: "sequential-calls",
|
|
229
|
-
data: { steps: sequence.steps, count: sequence.count }
|
|
230
|
-
}
|
|
231
|
-
]
|
|
232
|
-
});
|
|
233
|
-
}
|
|
234
|
-
return signals;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
// src/pipeline/evolution-pipeline.ts
|
|
239
|
-
import { EventEmitter } from "node:events";
|
|
240
|
-
class EvolutionPipeline {
|
|
241
|
-
detector;
|
|
242
|
-
aggregator;
|
|
243
|
-
emitter;
|
|
244
|
-
onIntent;
|
|
245
|
-
onSnapshot;
|
|
246
|
-
timer;
|
|
247
|
-
previousMetrics;
|
|
248
|
-
constructor(options = {}) {
|
|
249
|
-
this.detector = options.detector ?? new IntentDetector;
|
|
250
|
-
this.aggregator = options.aggregator ?? new IntentAggregator;
|
|
251
|
-
this.emitter = options.emitter ?? new EventEmitter;
|
|
252
|
-
this.onIntent = options.onIntent;
|
|
253
|
-
this.onSnapshot = options.onSnapshot;
|
|
254
|
-
}
|
|
255
|
-
ingest(sample) {
|
|
256
|
-
this.aggregator.add(sample);
|
|
257
|
-
}
|
|
258
|
-
on(listener) {
|
|
259
|
-
this.emitter.on("event", listener);
|
|
260
|
-
}
|
|
261
|
-
start(intervalMs = 5 * 60 * 1000) {
|
|
262
|
-
this.stop();
|
|
263
|
-
this.timer = setInterval(() => {
|
|
264
|
-
this.run();
|
|
265
|
-
}, intervalMs);
|
|
266
|
-
}
|
|
267
|
-
stop() {
|
|
268
|
-
if (this.timer) {
|
|
269
|
-
clearInterval(this.timer);
|
|
270
|
-
this.timer = undefined;
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
async run() {
|
|
274
|
-
const snapshot = this.aggregator.flush();
|
|
275
|
-
this.emit({
|
|
276
|
-
type: "telemetry.window",
|
|
277
|
-
payload: { sampleCount: snapshot.sampleCount }
|
|
278
|
-
});
|
|
279
|
-
if (this.onSnapshot)
|
|
280
|
-
await this.onSnapshot(snapshot);
|
|
281
|
-
if (!snapshot.sampleCount)
|
|
282
|
-
return;
|
|
283
|
-
const metricSignals = this.detector.detectFromMetrics(snapshot.metrics, this.previousMetrics);
|
|
284
|
-
const sequenceSignals = this.detector.detectSequentialIntents(snapshot.sequences);
|
|
285
|
-
this.previousMetrics = snapshot.metrics;
|
|
286
|
-
const signals = [...metricSignals, ...sequenceSignals];
|
|
287
|
-
for (const signal of signals) {
|
|
288
|
-
if (this.onIntent)
|
|
289
|
-
await this.onIntent(signal);
|
|
290
|
-
this.emit({ type: "intent.detected", payload: signal });
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
emit(event) {
|
|
294
|
-
this.emitter.emit("event", event);
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
export {
|
|
298
|
-
EvolutionPipeline
|
|
299
|
-
};
|
|
1
|
+
class g{windowMs;sequenceSampleSize;samples=[];constructor(e={}){this.windowMs=e.windowMs??900000,this.sequenceSampleSize=e.sequenceSampleSize??1000}add(e){this.samples.push(e)}flush(e=new Date){let o=e.getTime()-this.windowMs,n=this.samples.filter((i)=>i.timestamp.getTime()>=o);this.samples.length=0;let r=this.aggregateMetrics(n),t=this.buildSequences(n),s=n.map((i)=>i.timestamp.getTime());return{metrics:r,sequences:t,sampleCount:n.length,windowStart:s.length?new Date(Math.min(...s)):void 0,windowEnd:s.length?new Date(Math.max(...s)):void 0}}aggregateMetrics(e){if(!e.length)return[];let o=new Map;for(let n of e){let r=`${n.operation.name}.v${n.operation.version}`,t=o.get(r)??[];t.push(n),o.set(r,t)}return[...o.values()].map((n)=>{let r=n[0];if(!r)throw Error("Empty group in aggregation");let t=n.map((a)=>a.durationMs).sort((a,p)=>a-p),s=n.filter((a)=>!a.success),i=n.length,l=s.reduce((a,p)=>{if(!p.errorCode)return a;return a[p.errorCode]=(a[p.errorCode]??0)+1,a},{}),c=n.map((a)=>a.timestamp.getTime());return{operation:r.operation,totalCalls:i,successRate:(i-s.length)/i,errorRate:s.length/i,averageLatencyMs:t.reduce((a,p)=>a+p,0)/i,p95LatencyMs:u(t,0.95),p99LatencyMs:u(t,0.99),maxLatencyMs:Math.max(...t),windowStart:new Date(Math.min(...c)),windowEnd:new Date(Math.max(...c)),topErrors:l}})}buildSequences(e){let o=new Map;for(let r of e.slice(-this.sequenceSampleSize)){if(!r.traceId)continue;let t=o.get(r.traceId)??[];t.push(r),o.set(r.traceId,t)}let n={};for(let r of o.values()){let t=r.sort((c,a)=>c.timestamp.getTime()-a.timestamp.getTime()),s=t.map((c)=>c.operation.name);if(s.length<2)continue;let i=`${s.join(">")}@${t[0]?.tenantId??"global"}`,l=n[i];if(l)l.count+=1;else n[i]={steps:s,tenantId:t[0]?.tenantId,count:1}}return Object.values(n).sort((r,t)=>t.count-r.count)}}function u(e,o){if(!e.length)return 0;if(e.length===1)return e[0]??0;let n=Math.min(e.length-1,Math.floor(o*e.length));return e[n]??0}import{randomUUID as d}from"crypto";var h={errorRateThreshold:0.05,latencyP99ThresholdMs:750,throughputDropThreshold:0.3,minSequenceLength:3};class m{options;constructor(e={}){this.options={errorRateThreshold:e.errorRateThreshold??h.errorRateThreshold,latencyP99ThresholdMs:e.latencyP99ThresholdMs??h.latencyP99ThresholdMs,throughputDropThreshold:e.throughputDropThreshold??h.throughputDropThreshold,minSequenceLength:e.minSequenceLength??h.minSequenceLength}}detectFromMetrics(e,o){let n=[],r=new Map((o??[]).map((t)=>[`${t.operation.name}.v${t.operation.version}`,t]));for(let t of e){if(t.errorRate>=this.options.errorRateThreshold){n.push({id:d(),type:"error-spike",operation:t.operation,confidence:Math.min(1,t.errorRate/this.options.errorRateThreshold),description:`Error rate ${t.errorRate.toFixed(2)} exceeded threshold`,metadata:{errorRate:t.errorRate,topErrors:t.topErrors},evidence:[{type:"metric",description:"error-rate",data:{errorRate:t.errorRate,threshold:this.options.errorRateThreshold}}]});continue}if(t.p99LatencyMs>=this.options.latencyP99ThresholdMs){n.push({id:d(),type:"latency-regression",operation:t.operation,confidence:Math.min(1,t.p99LatencyMs/this.options.latencyP99ThresholdMs),description:`P99 latency ${t.p99LatencyMs}ms exceeded threshold`,metadata:{p99LatencyMs:t.p99LatencyMs},evidence:[{type:"metric",description:"p99-latency",data:{p99LatencyMs:t.p99LatencyMs,threshold:this.options.latencyP99ThresholdMs}}]});continue}let s=r.get(`${t.operation.name}.v${t.operation.version}`);if(s){let i=(s.totalCalls-t.totalCalls)/Math.max(s.totalCalls,1);if(i>=this.options.throughputDropThreshold)n.push({id:d(),type:"throughput-drop",operation:t.operation,confidence:Math.min(1,i/this.options.throughputDropThreshold),description:`Throughput dropped ${(i*100).toFixed(1)}% vs baseline`,metadata:{baselineCalls:s.totalCalls,currentCalls:t.totalCalls},evidence:[{type:"metric",description:"throughput-drop",data:{baselineCalls:s.totalCalls,currentCalls:t.totalCalls}}]})}}return n}detectSequentialIntents(e){let o=[];for(let n of e){if(n.steps.length<this.options.minSequenceLength)continue;let r=n.steps.join(" → ");o.push({id:d(),type:"missing-workflow-step",confidence:0.6,description:`Repeated workflow detected: ${r}`,metadata:{steps:n.steps,tenantId:n.tenantId,occurrences:n.count},evidence:[{type:"sequence",description:"sequential-calls",data:{steps:n.steps,count:n.count}}]})}return o}}import{EventEmitter as y}from"node:events";class S{detector;aggregator;emitter;onIntent;onSnapshot;timer;previousMetrics;constructor(e={}){this.detector=e.detector??new m,this.aggregator=e.aggregator??new g,this.emitter=e.emitter??new y,this.onIntent=e.onIntent,this.onSnapshot=e.onSnapshot}ingest(e){this.aggregator.add(e)}on(e){this.emitter.on("event",e)}start(e=300000){this.stop(),this.timer=setInterval(()=>{this.run()},e)}stop(){if(this.timer)clearInterval(this.timer),this.timer=void 0}async run(){let e=this.aggregator.flush();if(this.emit({type:"telemetry.window",payload:{sampleCount:e.sampleCount}}),this.onSnapshot)await this.onSnapshot(e);if(!e.sampleCount)return;let o=this.detector.detectFromMetrics(e.metrics,this.previousMetrics),n=this.detector.detectSequentialIntents(e.sequences);this.previousMetrics=e.metrics;let r=[...o,...n];for(let t of r){if(this.onIntent)await this.onIntent(t);this.emit({type:"intent.detected",payload:t})}}emit(e){this.emitter.emit("event",e)}}export{S as EvolutionPipeline};
|