orchestr8 2.5.0 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.blueprint/agents/AGENT_BA_CASS.md +42 -19
- package/.blueprint/agents/AGENT_DEVELOPER_CODEY.md +42 -38
- package/.blueprint/agents/AGENT_SPECIFICATION_ALEX.md +45 -0
- package/.blueprint/agents/AGENT_TESTER_NIGEL.md +42 -21
- package/.blueprint/features/feature_adaptive-retry/FEATURE_SPEC.md +239 -0
- package/.blueprint/features/feature_adaptive-retry/IMPLEMENTATION_PLAN.md +48 -0
- package/.blueprint/features/feature_adaptive-retry/story-prompt-modification.md +85 -0
- package/.blueprint/features/feature_adaptive-retry/story-retry-config.md +89 -0
- package/.blueprint/features/feature_adaptive-retry/story-should-retry.md +98 -0
- package/.blueprint/features/feature_adaptive-retry/story-strategy-recommendation.md +85 -0
- package/.blueprint/features/feature_agent-guardrails/FEATURE_SPEC.md +328 -0
- package/.blueprint/features/feature_agent-guardrails/IMPLEMENTATION_PLAN.md +90 -0
- package/.blueprint/features/feature_agent-guardrails/story-citation-requirements.md +50 -0
- package/.blueprint/features/feature_agent-guardrails/story-confidentiality.md +50 -0
- package/.blueprint/features/feature_agent-guardrails/story-escalation-protocol.md +55 -0
- package/.blueprint/features/feature_agent-guardrails/story-source-restrictions.md +50 -0
- package/.blueprint/features/feature_feedback-loop/FEATURE_SPEC.md +347 -0
- package/.blueprint/features/feature_feedback-loop/IMPLEMENTATION_PLAN.md +71 -0
- package/.blueprint/features/feature_feedback-loop/story-feedback-collection.md +63 -0
- package/.blueprint/features/feature_feedback-loop/story-feedback-config.md +61 -0
- package/.blueprint/features/feature_feedback-loop/story-feedback-insights.md +63 -0
- package/.blueprint/features/feature_feedback-loop/story-quality-gates.md +57 -0
- package/.blueprint/features/feature_pipeline-history/FEATURE_SPEC.md +239 -0
- package/.blueprint/features/feature_pipeline-history/IMPLEMENTATION_PLAN.md +71 -0
- package/.blueprint/features/feature_pipeline-history/story-clear-history.md +73 -0
- package/.blueprint/features/feature_pipeline-history/story-display-history.md +75 -0
- package/.blueprint/features/feature_pipeline-history/story-record-execution.md +76 -0
- package/.blueprint/features/feature_pipeline-history/story-show-statistics.md +85 -0
- package/.blueprint/features/feature_pipeline-insights/FEATURE_SPEC.md +288 -0
- package/.blueprint/features/feature_pipeline-insights/IMPLEMENTATION_PLAN.md +65 -0
- package/.blueprint/features/feature_pipeline-insights/story-anomaly-detection.md +71 -0
- package/.blueprint/features/feature_pipeline-insights/story-bottleneck-analysis.md +75 -0
- package/.blueprint/features/feature_pipeline-insights/story-failure-patterns.md +75 -0
- package/.blueprint/features/feature_pipeline-insights/story-json-output.md +75 -0
- package/.blueprint/features/feature_pipeline-insights/story-trend-analysis.md +78 -0
- package/.blueprint/features/feature_validate-command/FEATURE_SPEC.md +209 -0
- package/.blueprint/features/feature_validate-command/IMPLEMENTATION_PLAN.md +59 -0
- package/.blueprint/features/feature_validate-command/story-failure-output.md +61 -0
- package/.blueprint/features/feature_validate-command/story-node-version-check.md +52 -0
- package/.blueprint/features/feature_validate-command/story-run-validation.md +59 -0
- package/.blueprint/features/feature_validate-command/story-success-output.md +50 -0
- package/.blueprint/system_specification/SYSTEM_SPEC.md +248 -0
- package/README.md +182 -38
- package/SKILL.md +333 -23
- package/bin/cli.js +128 -20
- package/package.json +2 -2
- package/src/feedback.js +171 -0
- package/src/history.js +306 -0
- package/src/index.js +57 -2
- package/src/init.js +2 -6
- package/src/insights.js +504 -0
- package/src/retry.js +274 -0
- package/src/validate.js +172 -0
- package/src/skills.js +0 -93
package/src/insights.js
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
const { readHistoryFile, formatDuration } = require('./history');
|
|
2
|
+
|
|
3
|
+
const STAGES = ['alex', 'cass', 'nigel', 'codey-plan', 'codey-implement'];
|
|
4
|
+
|
|
5
|
+
function calculateMean(values) {
|
|
6
|
+
if (values.length === 0) return 0;
|
|
7
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function calculateStdDev(values, mean) {
|
|
11
|
+
if (values.length === 0) return 0;
|
|
12
|
+
const variance = values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length;
|
|
13
|
+
return Math.sqrt(variance);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function analyzeBottlenecks(history) {
|
|
17
|
+
const successRuns = history.filter(e => e.status === 'success' && e.stages);
|
|
18
|
+
if (successRuns.length < 3) {
|
|
19
|
+
return { insufficientData: true, message: 'Insufficient data for bottleneck analysis (need 3+ runs)' };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const stageDurations = {};
|
|
23
|
+
for (const stage of STAGES) {
|
|
24
|
+
stageDurations[stage] = [];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
for (const entry of successRuns) {
|
|
28
|
+
for (const stage of STAGES) {
|
|
29
|
+
if (entry.stages[stage] && entry.stages[stage].durationMs) {
|
|
30
|
+
stageDurations[stage].push(entry.stages[stage].durationMs);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const stageAvgs = {};
|
|
36
|
+
let totalAvgDuration = 0;
|
|
37
|
+
for (const stage of STAGES) {
|
|
38
|
+
const avg = calculateMean(stageDurations[stage]);
|
|
39
|
+
stageAvgs[stage] = avg;
|
|
40
|
+
totalAvgDuration += avg;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
let maxStage = null;
|
|
44
|
+
let maxAvg = 0;
|
|
45
|
+
for (const stage of STAGES) {
|
|
46
|
+
if (stageAvgs[stage] > maxAvg) {
|
|
47
|
+
maxAvg = stageAvgs[stage];
|
|
48
|
+
maxStage = stage;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const percentage = totalAvgDuration > 0 ? (maxAvg / totalAvgDuration) * 100 : 0;
|
|
53
|
+
const isBottleneck = percentage > 35;
|
|
54
|
+
const recommendation = percentage > 40
|
|
55
|
+
? `Consider optimizing ${maxStage} stage to improve pipeline throughput`
|
|
56
|
+
: null;
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
stages: stageAvgs,
|
|
60
|
+
bottleneckStage: maxStage,
|
|
61
|
+
avgDurationMs: maxAvg,
|
|
62
|
+
percentage: Math.round(percentage * 10) / 10,
|
|
63
|
+
isBottleneck,
|
|
64
|
+
recommendation
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function analyzeFailures(history) {
|
|
69
|
+
const failedRuns = history.filter(e => e.status === 'failed');
|
|
70
|
+
if (failedRuns.length === 0) {
|
|
71
|
+
return { noFailures: true, message: 'No failures recorded' };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const failuresByStage = {};
|
|
75
|
+
const featureFailures = {};
|
|
76
|
+
|
|
77
|
+
for (const entry of failedRuns) {
|
|
78
|
+
if (entry.failedStage) {
|
|
79
|
+
failuresByStage[entry.failedStage] = (failuresByStage[entry.failedStage] || 0) + 1;
|
|
80
|
+
}
|
|
81
|
+
if (entry.slug) {
|
|
82
|
+
featureFailures[entry.slug] = (featureFailures[entry.slug] || 0) + 1;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Find most common failure stage (first occurrence wins ties)
|
|
87
|
+
let mostCommonStage = null;
|
|
88
|
+
let maxCount = 0;
|
|
89
|
+
for (const stage of STAGES) {
|
|
90
|
+
if ((failuresByStage[stage] || 0) > maxCount) {
|
|
91
|
+
maxCount = failuresByStage[stage];
|
|
92
|
+
mostCommonStage = stage;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const repeatedFeatures = Object.entries(featureFailures)
|
|
97
|
+
.filter(([, count]) => count > 1)
|
|
98
|
+
.map(([slug, count]) => ({ slug, count }));
|
|
99
|
+
|
|
100
|
+
const totalRuns = history.length;
|
|
101
|
+
const failureRate = (failedRuns.length / totalRuns) * 100;
|
|
102
|
+
const isHighFailureRate = failureRate > 15;
|
|
103
|
+
const recommendation = failureRate > 20
|
|
104
|
+
? `High failure rate detected. Review ${mostCommonStage} stage for common issues`
|
|
105
|
+
: null;
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
failuresByStage,
|
|
109
|
+
mostCommonStage,
|
|
110
|
+
failureCount: maxCount,
|
|
111
|
+
repeatedFeatures,
|
|
112
|
+
failureRate: Math.round(failureRate * 10) / 10,
|
|
113
|
+
isHighFailureRate,
|
|
114
|
+
recommendation
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function detectAnomalies(history) {
|
|
119
|
+
const runsWithStages = history.filter(e => e.stages);
|
|
120
|
+
if (runsWithStages.length < 3) {
|
|
121
|
+
return { insufficientData: true, message: 'Insufficient data for anomaly detection' };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const stageDurations = {};
|
|
125
|
+
for (const stage of STAGES) {
|
|
126
|
+
stageDurations[stage] = [];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
for (const entry of runsWithStages) {
|
|
130
|
+
for (const stage of STAGES) {
|
|
131
|
+
if (entry.stages[stage] && entry.stages[stage].durationMs) {
|
|
132
|
+
stageDurations[stage].push({
|
|
133
|
+
slug: entry.slug,
|
|
134
|
+
duration: entry.stages[stage].durationMs
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const anomalies = [];
|
|
141
|
+
const last10 = runsWithStages.slice(-10);
|
|
142
|
+
|
|
143
|
+
for (const stage of STAGES) {
|
|
144
|
+
const allDurations = stageDurations[stage].map(d => d.duration);
|
|
145
|
+
const mean = calculateMean(allDurations);
|
|
146
|
+
const stddev = calculateStdDev(allDurations, mean);
|
|
147
|
+
const threshold = mean + 2 * stddev;
|
|
148
|
+
|
|
149
|
+
for (const entry of last10) {
|
|
150
|
+
if (entry.stages[stage] && entry.stages[stage].durationMs > threshold && stddev > 0) {
|
|
151
|
+
const actual = entry.stages[stage].durationMs;
|
|
152
|
+
const deviation = (actual - mean) / stddev;
|
|
153
|
+
anomalies.push({
|
|
154
|
+
slug: entry.slug,
|
|
155
|
+
stage,
|
|
156
|
+
actual,
|
|
157
|
+
expected: Math.round(mean),
|
|
158
|
+
deviation: Math.round(deviation * 10) / 10
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (anomalies.length === 0) {
|
|
165
|
+
return { noAnomalies: true, message: 'No anomalies detected in recent runs' };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
anomalies,
|
|
170
|
+
recommendation: anomalies.length > 0
|
|
171
|
+
? 'Review flagged runs for unusual conditions or environment issues'
|
|
172
|
+
: null
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function analyzeTrends(history) {
|
|
177
|
+
if (history.length < 6) {
|
|
178
|
+
return { insufficientData: true, message: 'Insufficient data for trend analysis (need 6+ runs)' };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const midpoint = Math.floor(history.length / 2);
|
|
182
|
+
const firstHalf = history.slice(0, midpoint);
|
|
183
|
+
const secondHalf = history.slice(midpoint);
|
|
184
|
+
|
|
185
|
+
// Success rate trend
|
|
186
|
+
const firstSuccessRate = firstHalf.filter(e => e.status === 'success').length / firstHalf.length * 100;
|
|
187
|
+
const secondSuccessRate = secondHalf.filter(e => e.status === 'success').length / secondHalf.length * 100;
|
|
188
|
+
const successRateChange = secondSuccessRate - firstSuccessRate;
|
|
189
|
+
|
|
190
|
+
let successTrend = 'stable';
|
|
191
|
+
if (successRateChange > 10) successTrend = 'improving';
|
|
192
|
+
else if (successRateChange < -10) successTrend = 'degrading';
|
|
193
|
+
|
|
194
|
+
// Duration trend
|
|
195
|
+
const firstDurations = firstHalf.filter(e => e.totalDurationMs).map(e => e.totalDurationMs);
|
|
196
|
+
const secondDurations = secondHalf.filter(e => e.totalDurationMs).map(e => e.totalDurationMs);
|
|
197
|
+
|
|
198
|
+
const firstAvgDuration = calculateMean(firstDurations);
|
|
199
|
+
const secondAvgDuration = calculateMean(secondDurations);
|
|
200
|
+
const durationChange = firstAvgDuration > 0
|
|
201
|
+
? ((secondAvgDuration - firstAvgDuration) / firstAvgDuration) * 100
|
|
202
|
+
: 0;
|
|
203
|
+
|
|
204
|
+
let durationTrend = 'stable';
|
|
205
|
+
if (durationChange < -10) durationTrend = 'improving';
|
|
206
|
+
else if (durationChange > 10) durationTrend = 'degrading';
|
|
207
|
+
|
|
208
|
+
let recommendation = null;
|
|
209
|
+
if (successTrend === 'degrading') {
|
|
210
|
+
recommendation = 'Pipeline success rate is declining. Review recent changes for regressions';
|
|
211
|
+
} else if (durationTrend === 'degrading') {
|
|
212
|
+
recommendation = 'Pipeline duration is increasing. Consider performance optimization';
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
successRate: {
|
|
217
|
+
trend: successTrend,
|
|
218
|
+
change: Math.round(successRateChange * 10) / 10,
|
|
219
|
+
first: Math.round(firstSuccessRate * 10) / 10,
|
|
220
|
+
second: Math.round(secondSuccessRate * 10) / 10
|
|
221
|
+
},
|
|
222
|
+
duration: {
|
|
223
|
+
trend: durationTrend,
|
|
224
|
+
change: Math.round(durationChange * 10) / 10,
|
|
225
|
+
first: Math.round(firstAvgDuration),
|
|
226
|
+
second: Math.round(secondAvgDuration)
|
|
227
|
+
},
|
|
228
|
+
recommendation
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function formatTextOutput(analysis, sections) {
|
|
233
|
+
const lines = ['\nPipeline Insights\n'];
|
|
234
|
+
|
|
235
|
+
const showAll = sections.length === 0;
|
|
236
|
+
const showBottlenecks = showAll || sections.includes('bottlenecks');
|
|
237
|
+
const showFailures = showAll || sections.includes('failures');
|
|
238
|
+
const showAnomalies = showAll || sections.includes('anomalies');
|
|
239
|
+
const showTrends = showAll || sections.includes('trends');
|
|
240
|
+
|
|
241
|
+
if (showBottlenecks) {
|
|
242
|
+
lines.push('BOTTLENECK ANALYSIS');
|
|
243
|
+
if (analysis.bottlenecks.insufficientData) {
|
|
244
|
+
lines.push(` ${analysis.bottlenecks.message}`);
|
|
245
|
+
} else {
|
|
246
|
+
lines.push(` Slowest stage: ${analysis.bottlenecks.bottleneckStage} (${analysis.bottlenecks.percentage}% of pipeline)`);
|
|
247
|
+
lines.push(` Average duration: ${formatDuration(analysis.bottlenecks.avgDurationMs)}`);
|
|
248
|
+
if (analysis.bottlenecks.isBottleneck) {
|
|
249
|
+
lines.push(' Status: BOTTLENECK DETECTED');
|
|
250
|
+
}
|
|
251
|
+
if (analysis.bottlenecks.recommendation) {
|
|
252
|
+
lines.push(` Recommendation: ${analysis.bottlenecks.recommendation}`);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
lines.push('');
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (showFailures) {
|
|
259
|
+
lines.push('FAILURE PATTERNS');
|
|
260
|
+
if (analysis.failures.noFailures) {
|
|
261
|
+
lines.push(` ${analysis.failures.message}`);
|
|
262
|
+
} else {
|
|
263
|
+
lines.push(` Most common failure stage: ${analysis.failures.mostCommonStage} (${analysis.failures.failureCount} failures)`);
|
|
264
|
+
lines.push(` Overall failure rate: ${analysis.failures.failureRate}%`);
|
|
265
|
+
if (analysis.failures.repeatedFeatures.length > 0) {
|
|
266
|
+
lines.push(' Features with repeated failures:');
|
|
267
|
+
for (const f of analysis.failures.repeatedFeatures) {
|
|
268
|
+
lines.push(` - ${f.slug} (${f.count} failures)`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (analysis.failures.recommendation) {
|
|
272
|
+
lines.push(` Recommendation: ${analysis.failures.recommendation}`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
lines.push('');
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (showAnomalies) {
|
|
279
|
+
lines.push('ANOMALY DETECTION');
|
|
280
|
+
if (analysis.anomalies.insufficientData) {
|
|
281
|
+
lines.push(` ${analysis.anomalies.message}`);
|
|
282
|
+
} else if (analysis.anomalies.noAnomalies) {
|
|
283
|
+
lines.push(` ${analysis.anomalies.message}`);
|
|
284
|
+
} else {
|
|
285
|
+
lines.push(' Anomalous runs detected:');
|
|
286
|
+
for (const a of analysis.anomalies.anomalies) {
|
|
287
|
+
lines.push(` - ${a.slug}/${a.stage}: ${formatDuration(a.actual)} (expected ~${formatDuration(a.expected)}, ${a.deviation}x stddev)`);
|
|
288
|
+
}
|
|
289
|
+
if (analysis.anomalies.recommendation) {
|
|
290
|
+
lines.push(` Recommendation: ${analysis.anomalies.recommendation}`);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
lines.push('');
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (showTrends) {
|
|
297
|
+
lines.push('TREND ANALYSIS');
|
|
298
|
+
if (analysis.trends.insufficientData) {
|
|
299
|
+
lines.push(` ${analysis.trends.message}`);
|
|
300
|
+
} else {
|
|
301
|
+
const sr = analysis.trends.successRate;
|
|
302
|
+
const dr = analysis.trends.duration;
|
|
303
|
+
lines.push(` Success rate: ${sr.trend} (${sr.change > 0 ? '+' : ''}${sr.change}%)`);
|
|
304
|
+
lines.push(` Duration: ${dr.trend} (${dr.change > 0 ? '+' : ''}${dr.change}%)`);
|
|
305
|
+
if (analysis.trends.recommendation) {
|
|
306
|
+
lines.push(` Recommendation: ${analysis.trends.recommendation}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
lines.push('');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return lines.join('\n');
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function formatJsonOutput(analysis, sections) {
|
|
316
|
+
const showAll = sections.length === 0;
|
|
317
|
+
const output = {};
|
|
318
|
+
|
|
319
|
+
if (showAll || sections.includes('bottlenecks')) {
|
|
320
|
+
output.bottlenecks = analysis.bottlenecks;
|
|
321
|
+
}
|
|
322
|
+
if (showAll || sections.includes('failures')) {
|
|
323
|
+
output.failures = analysis.failures;
|
|
324
|
+
}
|
|
325
|
+
if (showAll || sections.includes('anomalies')) {
|
|
326
|
+
output.anomalies = analysis.anomalies;
|
|
327
|
+
}
|
|
328
|
+
if (showAll || sections.includes('trends')) {
|
|
329
|
+
output.trends = analysis.trends;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return JSON.stringify(output, null, 2);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function displayInsights(options = {}) {
|
|
336
|
+
const history = readHistoryFile();
|
|
337
|
+
|
|
338
|
+
if (history.error === 'corrupted') {
|
|
339
|
+
console.log("Warning: History file is corrupted. Run 'orchestr8 history clear' to reset.");
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (!history || history.length === 0) {
|
|
344
|
+
console.log('No pipeline history found.');
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const analysis = {
|
|
349
|
+
bottlenecks: analyzeBottlenecks(history),
|
|
350
|
+
failures: analyzeFailures(history),
|
|
351
|
+
anomalies: detectAnomalies(history),
|
|
352
|
+
trends: analyzeTrends(history)
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
const sections = [];
|
|
356
|
+
if (options.bottlenecks) sections.push('bottlenecks');
|
|
357
|
+
if (options.failures) sections.push('failures');
|
|
358
|
+
|
|
359
|
+
if (options.json) {
|
|
360
|
+
console.log(formatJsonOutput(analysis, sections));
|
|
361
|
+
} else {
|
|
362
|
+
console.log(formatTextOutput(analysis, sections));
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Calculates agent calibration score based on prediction accuracy.
|
|
368
|
+
* Per FEATURE_SPEC.md:Rule 4.
|
|
369
|
+
* @param {string} agent - Agent name (alex, cass, nigel)
|
|
370
|
+
* @param {Array} history - History entries
|
|
371
|
+
* @returns {number|null} Calibration score 0-1, or null if insufficient data
|
|
372
|
+
*/
|
|
373
|
+
function calculateCalibration(agent, history) {
|
|
374
|
+
const entries = history.filter(e => e.stages?.[agent]?.feedback);
|
|
375
|
+
if (entries.length < 10) return null;
|
|
376
|
+
|
|
377
|
+
let matches = 0;
|
|
378
|
+
for (const entry of entries) {
|
|
379
|
+
const rating = entry.stages[agent].feedback.rating;
|
|
380
|
+
const success = entry.status === 'success';
|
|
381
|
+
const predicted = rating >= 3;
|
|
382
|
+
if (predicted === success) matches++;
|
|
383
|
+
}
|
|
384
|
+
return matches / entries.length;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Correlates issue codes with failure rates.
|
|
389
|
+
* @param {Array} history - History entries
|
|
390
|
+
* @returns {object} Map of issue code to failure correlation (0-1)
|
|
391
|
+
*/
|
|
392
|
+
function correlateIssues(history) {
|
|
393
|
+
const issueCounts = {};
|
|
394
|
+
const issueFailures = {};
|
|
395
|
+
|
|
396
|
+
for (const entry of history) {
|
|
397
|
+
for (const stage of Object.values(entry.stages || {})) {
|
|
398
|
+
if (stage.feedback?.issues) {
|
|
399
|
+
for (const issue of stage.feedback.issues) {
|
|
400
|
+
issueCounts[issue] = (issueCounts[issue] || 0) + 1;
|
|
401
|
+
if (entry.status === 'failed') {
|
|
402
|
+
issueFailures[issue] = (issueFailures[issue] || 0) + 1;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const correlations = {};
|
|
410
|
+
for (const issue of Object.keys(issueCounts)) {
|
|
411
|
+
correlations[issue] = (issueFailures[issue] || 0) / issueCounts[issue];
|
|
412
|
+
}
|
|
413
|
+
return correlations;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Recommends optimal threshold based on historical data.
|
|
418
|
+
* @param {Array} history - History entries
|
|
419
|
+
* @returns {number} Recommended threshold value
|
|
420
|
+
*/
|
|
421
|
+
function recommendThreshold(history) {
|
|
422
|
+
let best = 3.0;
|
|
423
|
+
for (const t of [2, 2.5, 3, 3.5, 4]) {
|
|
424
|
+
const correct = history.filter(e => {
|
|
425
|
+
const r = e.stages?.cass?.feedback?.rating || 3;
|
|
426
|
+
const pred = r >= t;
|
|
427
|
+
return pred === (e.status === 'success');
|
|
428
|
+
}).length;
|
|
429
|
+
if (correct > history.length * 0.7) best = t;
|
|
430
|
+
}
|
|
431
|
+
return best;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Displays feedback-specific insights.
|
|
436
|
+
* @param {object} options - Display options
|
|
437
|
+
*/
|
|
438
|
+
function displayFeedbackInsights(options = {}) {
|
|
439
|
+
const history = readHistoryFile();
|
|
440
|
+
|
|
441
|
+
if (history.error === 'corrupted') {
|
|
442
|
+
console.log("Warning: History file is corrupted.");
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
if (!history || history.length === 0) {
|
|
447
|
+
console.log('No pipeline history found.');
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
console.log('\nFeedback Insights\n');
|
|
452
|
+
|
|
453
|
+
// Agent calibration
|
|
454
|
+
console.log('AGENT CALIBRATION');
|
|
455
|
+
for (const agent of ['alex', 'cass', 'nigel']) {
|
|
456
|
+
const calibration = calculateCalibration(agent, history);
|
|
457
|
+
if (calibration === null) {
|
|
458
|
+
console.log(` ${agent.padEnd(8)}: Insufficient data (<10 runs)`);
|
|
459
|
+
} else {
|
|
460
|
+
const pct = Math.round(calibration * 100);
|
|
461
|
+
console.log(` ${agent.padEnd(8)}: ${pct}% accuracy`);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
console.log('');
|
|
465
|
+
|
|
466
|
+
// Issue correlations
|
|
467
|
+
const correlations = correlateIssues(history);
|
|
468
|
+
if (Object.keys(correlations).length > 0) {
|
|
469
|
+
console.log('ISSUE CORRELATIONS');
|
|
470
|
+
const sorted = Object.entries(correlations)
|
|
471
|
+
.sort(([, a], [, b]) => b - a);
|
|
472
|
+
for (const [issue, corr] of sorted) {
|
|
473
|
+
const pct = Math.round(corr * 100);
|
|
474
|
+
console.log(` ${issue.padEnd(24)}: ${pct}% failure rate`);
|
|
475
|
+
}
|
|
476
|
+
console.log('');
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Threshold recommendation
|
|
480
|
+
const entriesWithFeedback = history.filter(e =>
|
|
481
|
+
Object.values(e.stages || {}).some(s => s.feedback)
|
|
482
|
+
);
|
|
483
|
+
if (entriesWithFeedback.length >= 10) {
|
|
484
|
+
const recommended = recommendThreshold(history);
|
|
485
|
+
console.log('RECOMMENDATIONS');
|
|
486
|
+
console.log(` Suggested minRatingThreshold: ${recommended}`);
|
|
487
|
+
console.log('');
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
module.exports = {
|
|
492
|
+
displayInsights,
|
|
493
|
+
analyzeBottlenecks,
|
|
494
|
+
analyzeFailures,
|
|
495
|
+
detectAnomalies,
|
|
496
|
+
analyzeTrends,
|
|
497
|
+
calculateMean,
|
|
498
|
+
calculateStdDev,
|
|
499
|
+
// Feedback analysis exports
|
|
500
|
+
calculateCalibration,
|
|
501
|
+
correlateIssues,
|
|
502
|
+
recommendThreshold,
|
|
503
|
+
displayFeedbackInsights
|
|
504
|
+
};
|