@resolveio/server-lib 22.3.123 → 22.3.125
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/collections/ai-run.collection.d.ts +3 -0
- package/collections/ai-run.collection.js +170 -0
- package/collections/ai-run.collection.js.map +1 -0
- package/managers/ai-run-evidence.manager.d.ts +36 -0
- package/managers/ai-run-evidence.manager.js +377 -0
- package/managers/ai-run-evidence.manager.js.map +1 -0
- package/managers/openai-usage-ledger.manager.d.ts +1 -0
- package/managers/openai-usage-ledger.manager.js +5 -56
- package/managers/openai-usage-ledger.manager.js.map +1 -1
- package/models/ai-run.model.d.ts +19 -0
- package/models/ai-run.model.js +4 -0
- package/models/ai-run.model.js.map +1 -0
- package/package.json +3 -1
- package/public_api.d.ts +7 -0
- package/public_api.js +7 -0
- package/public_api.js.map +1 -1
- package/util/ai-run-evidence-adapters.d.ts +33 -0
- package/util/ai-run-evidence-adapters.js +660 -0
- package/util/ai-run-evidence-adapters.js.map +1 -0
- package/util/ai-run-evidence-dashboard.d.ts +67 -0
- package/util/ai-run-evidence-dashboard.js +309 -0
- package/util/ai-run-evidence-dashboard.js.map +1 -0
- package/util/ai-run-evidence-eval.d.ts +86 -0
- package/util/ai-run-evidence-eval.js +854 -0
- package/util/ai-run-evidence-eval.js.map +1 -0
- package/util/ai-run-evidence.d.ts +212 -0
- package/util/ai-run-evidence.js +649 -0
- package/util/ai-run-evidence.js.map +1 -0
- package/util/ai-runner-qa-tools.d.ts +1 -0
- package/util/ai-runner-qa-tools.js +150 -16
- package/util/ai-runner-qa-tools.js.map +1 -1
- package/util/openai-usage-cost.d.ts +6 -0
- package/util/openai-usage-cost.js +92 -0
- package/util/openai-usage-cost.js.map +1 -0
|
@@ -0,0 +1,854 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __values = (this && this.__values) || function(o) {
|
|
3
|
+
var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
|
|
4
|
+
if (m) return m.call(o);
|
|
5
|
+
if (o && typeof o.length === "number") return {
|
|
6
|
+
next: function () {
|
|
7
|
+
if (o && i >= o.length) o = void 0;
|
|
8
|
+
return { value: o && o[i++], done: !o };
|
|
9
|
+
}
|
|
10
|
+
};
|
|
11
|
+
throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
|
|
12
|
+
};
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.AI_RUN_SEED_EVAL_FIXTURES = void 0;
|
|
15
|
+
exports.classifyAIRunFailureClass = classifyAIRunFailureClass;
|
|
16
|
+
exports.evaluateAIRunFixture = evaluateAIRunFixture;
|
|
17
|
+
exports.evaluateAIRunFixtures = evaluateAIRunFixtures;
|
|
18
|
+
exports.buildAIRunTrainingExampleFromRun = buildAIRunTrainingExampleFromRun;
|
|
19
|
+
exports.buildAIRunDatasetExport = buildAIRunDatasetExport;
|
|
20
|
+
exports.decideAIRunRetry = decideAIRunRetry;
|
|
21
|
+
var ai_run_evidence_1 = require("./ai-run-evidence");
|
|
22
|
+
function cleanText(value, max) {
|
|
23
|
+
if (max === void 0) { max = 1000; }
|
|
24
|
+
return String(value || '').replace(/\s+/g, ' ').trim().slice(0, max);
|
|
25
|
+
}
|
|
26
|
+
function countBy(values) {
|
|
27
|
+
var e_1, _a;
|
|
28
|
+
var result = {};
|
|
29
|
+
try {
|
|
30
|
+
for (var values_1 = __values(values), values_1_1 = values_1.next(); !values_1_1.done; values_1_1 = values_1.next()) {
|
|
31
|
+
var value = values_1_1.value;
|
|
32
|
+
result[value] = (result[value] || 0) + 1;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
36
|
+
finally {
|
|
37
|
+
try {
|
|
38
|
+
if (values_1_1 && !values_1_1.done && (_a = values_1.return)) _a.call(values_1);
|
|
39
|
+
}
|
|
40
|
+
finally { if (e_1) throw e_1.error; }
|
|
41
|
+
}
|
|
42
|
+
return result;
|
|
43
|
+
}
|
|
44
|
+
function isoNow(value) {
|
|
45
|
+
if (value instanceof Date) {
|
|
46
|
+
return value.toISOString();
|
|
47
|
+
}
|
|
48
|
+
var parsed = value ? new Date(value) : new Date();
|
|
49
|
+
if (Number.isFinite(parsed.getTime())) {
|
|
50
|
+
return parsed.toISOString();
|
|
51
|
+
}
|
|
52
|
+
return new Date().toISOString();
|
|
53
|
+
}
|
|
54
|
+
function addUnique(values, value, max) {
|
|
55
|
+
if (max === void 0) { max = 1000; }
|
|
56
|
+
var normalized = cleanText(value, max);
|
|
57
|
+
if (normalized && !values.includes(normalized)) {
|
|
58
|
+
values.push(normalized);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function classifyAIRunFailureClass(run) {
|
|
62
|
+
var _a, _b, _c, _d, _e;
|
|
63
|
+
if (run.outcome === 'accepted') {
|
|
64
|
+
return 'none';
|
|
65
|
+
}
|
|
66
|
+
if (run.outcome === 'qa_infra_failed') {
|
|
67
|
+
return 'infra';
|
|
68
|
+
}
|
|
69
|
+
if (run.outcome === 'build_failed') {
|
|
70
|
+
return 'compile';
|
|
71
|
+
}
|
|
72
|
+
if (run.outcome === 'release_blocked') {
|
|
73
|
+
return 'release';
|
|
74
|
+
}
|
|
75
|
+
if (run.outcome === 'false_pass') {
|
|
76
|
+
return 'false_pass';
|
|
77
|
+
}
|
|
78
|
+
if (run.outcome === 'manual_handoff') {
|
|
79
|
+
return 'manual';
|
|
80
|
+
}
|
|
81
|
+
if (run.outcome === 'stopped') {
|
|
82
|
+
return 'stopped';
|
|
83
|
+
}
|
|
84
|
+
if (run.source === 'ai_assistant' && (run.outcome === 'rejected' || run.outcome === 'qa_incomplete')) {
|
|
85
|
+
return 'assistant_correctness';
|
|
86
|
+
}
|
|
87
|
+
if (((_a = run.qa) === null || _a === void 0 ? void 0 : _a.outcome) === 'compile_failed') {
|
|
88
|
+
return 'compile';
|
|
89
|
+
}
|
|
90
|
+
if (((_b = run.qa) === null || _b === void 0 ? void 0 : _b.outcome) === 'route_failed') {
|
|
91
|
+
return 'route';
|
|
92
|
+
}
|
|
93
|
+
if (((_c = run.qa) === null || _c === void 0 ? void 0 : _c.outcome) === 'route_only_pass' || ((_d = run.qa) === null || _d === void 0 ? void 0 : _d.outcome) === 'incomplete') {
|
|
94
|
+
return 'business';
|
|
95
|
+
}
|
|
96
|
+
if (((_e = run.qa) === null || _e === void 0 ? void 0 : _e.outcome) === 'business_assertion_failed') {
|
|
97
|
+
return 'business';
|
|
98
|
+
}
|
|
99
|
+
if (run.outcome === 'qa_incomplete') {
|
|
100
|
+
return 'business';
|
|
101
|
+
}
|
|
102
|
+
if (run.outcome === 'rejected') {
|
|
103
|
+
return 'business';
|
|
104
|
+
}
|
|
105
|
+
return 'unknown';
|
|
106
|
+
}
|
|
107
|
+
function evaluateAIRunFixture(fixture) {
|
|
108
|
+
var e_2, _a, e_3, _b;
|
|
109
|
+
var failures = [];
|
|
110
|
+
var actualFailureClass = classifyAIRunFailureClass(fixture.run);
|
|
111
|
+
if (fixture.run.outcome !== fixture.expected.outcome) {
|
|
112
|
+
failures.push("Expected outcome ".concat(fixture.expected.outcome, ", got ").concat(fixture.run.outcome, "."));
|
|
113
|
+
}
|
|
114
|
+
if (fixture.expected.failureClass && actualFailureClass !== fixture.expected.failureClass) {
|
|
115
|
+
failures.push("Expected failure class ".concat(fixture.expected.failureClass, ", got ").concat(actualFailureClass, "."));
|
|
116
|
+
}
|
|
117
|
+
if (fixture.expected.nextActionIncludes) {
|
|
118
|
+
var expectedText = fixture.expected.nextActionIncludes.toLowerCase();
|
|
119
|
+
var actualText = cleanText(fixture.run.nextAction, 2000).toLowerCase();
|
|
120
|
+
if (!actualText.includes(expectedText)) {
|
|
121
|
+
failures.push("Expected next action to include \"".concat(fixture.expected.nextActionIncludes, "\", got \"").concat(fixture.run.nextAction || '', "\"."));
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
var gatesByKey = {};
|
|
125
|
+
try {
|
|
126
|
+
for (var _c = __values(fixture.run.gates || []), _d = _c.next(); !_d.done; _d = _c.next()) {
|
|
127
|
+
var gate = _d.value;
|
|
128
|
+
gatesByKey[gate.key] = gate.status;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
132
|
+
finally {
|
|
133
|
+
try {
|
|
134
|
+
if (_d && !_d.done && (_a = _c.return)) _a.call(_c);
|
|
135
|
+
}
|
|
136
|
+
finally { if (e_2) throw e_2.error; }
|
|
137
|
+
}
|
|
138
|
+
try {
|
|
139
|
+
for (var _e = __values(fixture.expected.gates || []), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
140
|
+
var expectedGate = _f.value;
|
|
141
|
+
if (!gatesByKey[expectedGate.key]) {
|
|
142
|
+
failures.push("Expected gate ".concat(expectedGate.key, " to be present."));
|
|
143
|
+
}
|
|
144
|
+
else if (expectedGate.status && gatesByKey[expectedGate.key] !== expectedGate.status) {
|
|
145
|
+
failures.push("Expected gate ".concat(expectedGate.key, " status ").concat(expectedGate.status, ", got ").concat(gatesByKey[expectedGate.key], "."));
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
150
|
+
finally {
|
|
151
|
+
try {
|
|
152
|
+
if (_f && !_f.done && (_b = _e.return)) _b.call(_e);
|
|
153
|
+
}
|
|
154
|
+
finally { if (e_3) throw e_3.error; }
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
id: fixture.id,
|
|
158
|
+
title: fixture.title,
|
|
159
|
+
source: fixture.source,
|
|
160
|
+
passed: failures.length === 0,
|
|
161
|
+
actualOutcome: fixture.run.outcome,
|
|
162
|
+
expectedOutcome: fixture.expected.outcome,
|
|
163
|
+
actualFailureClass: actualFailureClass,
|
|
164
|
+
expectedFailureClass: fixture.expected.failureClass,
|
|
165
|
+
failures: failures,
|
|
166
|
+
tags: fixture.tags
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
function evaluateAIRunFixtures(fixtures) {
|
|
170
|
+
var e_4, _a, e_5, _b;
|
|
171
|
+
if (fixtures === void 0) { fixtures = exports.AI_RUN_SEED_EVAL_FIXTURES; }
|
|
172
|
+
var results = fixtures.map(function (fixture) { return evaluateAIRunFixture(fixture); });
|
|
173
|
+
var failuresByTag = {};
|
|
174
|
+
try {
|
|
175
|
+
for (var results_1 = __values(results), results_1_1 = results_1.next(); !results_1_1.done; results_1_1 = results_1.next()) {
|
|
176
|
+
var result = results_1_1.value;
|
|
177
|
+
if (result.passed) {
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
try {
|
|
181
|
+
for (var _c = (e_5 = void 0, __values(result.tags)), _d = _c.next(); !_d.done; _d = _c.next()) {
|
|
182
|
+
var tag = _d.value;
|
|
183
|
+
failuresByTag[tag] = (failuresByTag[tag] || 0) + 1;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
catch (e_5_1) { e_5 = { error: e_5_1 }; }
|
|
187
|
+
finally {
|
|
188
|
+
try {
|
|
189
|
+
if (_d && !_d.done && (_b = _c.return)) _b.call(_c);
|
|
190
|
+
}
|
|
191
|
+
finally { if (e_5) throw e_5.error; }
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
catch (e_4_1) { e_4 = { error: e_4_1 }; }
|
|
196
|
+
finally {
|
|
197
|
+
try {
|
|
198
|
+
if (results_1_1 && !results_1_1.done && (_a = results_1.return)) _a.call(results_1);
|
|
199
|
+
}
|
|
200
|
+
finally { if (e_4) throw e_4.error; }
|
|
201
|
+
}
|
|
202
|
+
var outcomes = countBy(results.map(function (result) { return result.actualOutcome; }));
|
|
203
|
+
var passedCount = results.filter(function (result) { return result.passed; }).length;
|
|
204
|
+
return {
|
|
205
|
+
passed: passedCount === results.length,
|
|
206
|
+
total: results.length,
|
|
207
|
+
passedCount: passedCount,
|
|
208
|
+
failedCount: results.length - passedCount,
|
|
209
|
+
results: results,
|
|
210
|
+
failuresByTag: failuresByTag,
|
|
211
|
+
outcomes: outcomes
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
function buildAIRunTrainingExampleFromRun(run, maxEvidenceItems) {
|
|
215
|
+
var e_6, _a, e_7, _b, e_8, _c;
|
|
216
|
+
var _d, _e;
|
|
217
|
+
if (maxEvidenceItems === void 0) { maxEvidenceItems = 30; }
|
|
218
|
+
var evidence = [];
|
|
219
|
+
try {
|
|
220
|
+
for (var _f = __values(run.gates || []), _g = _f.next(); !_g.done; _g = _f.next()) {
|
|
221
|
+
var gate = _g.value;
|
|
222
|
+
addUnique(evidence, "".concat(gate.key, ":").concat(gate.status, ": ").concat(gate.reason), 1600);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
catch (e_6_1) { e_6 = { error: e_6_1 }; }
|
|
226
|
+
finally {
|
|
227
|
+
try {
|
|
228
|
+
if (_g && !_g.done && (_a = _f.return)) _a.call(_f);
|
|
229
|
+
}
|
|
230
|
+
finally { if (e_6) throw e_6.error; }
|
|
231
|
+
}
|
|
232
|
+
try {
|
|
233
|
+
for (var _h = __values(run.events || []), _j = _h.next(); !_j.done; _j = _h.next()) {
|
|
234
|
+
var event_1 = _j.value;
|
|
235
|
+
addUnique(evidence, "".concat(event_1.type, ":").concat(event_1.category || '', ": ").concat(event_1.message || ''), 1400);
|
|
236
|
+
if (evidence.length >= maxEvidenceItems) {
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
catch (e_7_1) { e_7 = { error: e_7_1 }; }
|
|
242
|
+
finally {
|
|
243
|
+
try {
|
|
244
|
+
if (_j && !_j.done && (_b = _h.return)) _b.call(_h);
|
|
245
|
+
}
|
|
246
|
+
finally { if (e_7) throw e_7.error; }
|
|
247
|
+
}
|
|
248
|
+
if (evidence.length < maxEvidenceItems) {
|
|
249
|
+
try {
|
|
250
|
+
for (var _k = __values(((_d = run.qa) === null || _d === void 0 ? void 0 : _d.businessAssertions) || []), _l = _k.next(); !_l.done; _l = _k.next()) {
|
|
251
|
+
var assertion = _l.value;
|
|
252
|
+
addUnique(evidence, "assertion:".concat(assertion.status, ": ").concat(assertion.assertion, " observed=").concat(assertion.observed || assertion.dataProof || assertion.message || ''), 1600);
|
|
253
|
+
if (evidence.length >= maxEvidenceItems) {
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
catch (e_8_1) { e_8 = { error: e_8_1 }; }
|
|
259
|
+
finally {
|
|
260
|
+
try {
|
|
261
|
+
if (_l && !_l.done && (_c = _k.return)) _c.call(_k);
|
|
262
|
+
}
|
|
263
|
+
finally { if (e_8) throw e_8.error; }
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return {
|
|
267
|
+
runId: run.id || run.sourceIds.jobId || run.sourceIds.ticketId || run.sourceIds.appId || run.sourceIds.conversationId || "".concat(run.source, ":unknown"),
|
|
268
|
+
source: run.source,
|
|
269
|
+
inputSummary: [
|
|
270
|
+
"source=".concat(run.source),
|
|
271
|
+
"title=".concat(run.title || ''),
|
|
272
|
+
"status=".concat(run.status || ''),
|
|
273
|
+
"phase=".concat(run.phase || ''),
|
|
274
|
+
"ids=".concat(JSON.stringify(run.sourceIds || {}))
|
|
275
|
+
].join(' | '),
|
|
276
|
+
actionSummary: [
|
|
277
|
+
"outcome=".concat(run.outcome),
|
|
278
|
+
"failureClass=".concat(classifyAIRunFailureClass(run)),
|
|
279
|
+
"nextAction=".concat(run.nextAction || '')
|
|
280
|
+
].join(' | '),
|
|
281
|
+
outcome: run.outcome,
|
|
282
|
+
evidence: evidence.slice(0, maxEvidenceItems),
|
|
283
|
+
cost: run.cost,
|
|
284
|
+
metadata: {
|
|
285
|
+
warnings: run.warnings || [],
|
|
286
|
+
gateCount: (run.gates || []).length,
|
|
287
|
+
eventCount: (run.events || []).length,
|
|
288
|
+
qaOutcome: (_e = run.qa) === null || _e === void 0 ? void 0 : _e.outcome
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
function buildAIRunDatasetExport(runs, options) {
|
|
293
|
+
if (options === void 0) { options = {}; }
|
|
294
|
+
var generatedAt = isoNow(options.now);
|
|
295
|
+
var trainingExamples = runs.map(function (run) {
|
|
296
|
+
var example = buildAIRunTrainingExampleFromRun(run, options.maxEvidenceItems || 30);
|
|
297
|
+
return options.redact === false ? example : (0, ai_run_evidence_1.redactAIRunTrainingExample)(example);
|
|
298
|
+
});
|
|
299
|
+
var estimatedUsd = runs.reduce(function (sum, run) { var _a; return sum + (Number((_a = run.cost) === null || _a === void 0 ? void 0 : _a.estimatedUsd) || 0); }, 0);
|
|
300
|
+
var totalTokens = runs.reduce(function (sum, run) { var _a; return sum + (Number((_a = run.cost) === null || _a === void 0 ? void 0 : _a.totalTokens) || 0); }, 0);
|
|
301
|
+
return {
|
|
302
|
+
generatedAt: generatedAt,
|
|
303
|
+
totalRuns: runs.length,
|
|
304
|
+
summary: {
|
|
305
|
+
bySource: countBy(runs.map(function (run) { return run.source; })),
|
|
306
|
+
byOutcome: countBy(runs.map(function (run) { return run.outcome; })),
|
|
307
|
+
byFailureClass: countBy(runs.map(function (run) { return classifyAIRunFailureClass(run); })),
|
|
308
|
+
estimatedUsd: Number(estimatedUsd.toFixed(6)),
|
|
309
|
+
totalTokens: totalTokens
|
|
310
|
+
},
|
|
311
|
+
runs: options.includeRuns === false ? [] : runs,
|
|
312
|
+
trainingExamples: trainingExamples
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
function decideAIRunRetry(input) {
|
|
316
|
+
var failureClass = classifyAIRunFailureClass(input.currentRun);
|
|
317
|
+
var previousFailureClass = input.previousRun ? classifyAIRunFailureClass(input.previousRun) : undefined;
|
|
318
|
+
var sameFailureCount = Number(input.sameFailureCount || 0);
|
|
319
|
+
var unchangedFailureLimit = Number(input.unchangedFailureLimit || 2);
|
|
320
|
+
var repeatedFailure = previousFailureClass === failureClass && (Number(input.newEvidenceCount || 0) <= 0 || sameFailureCount >= unchangedFailureLimit);
|
|
321
|
+
var retryExpectedValue;
|
|
322
|
+
if (input.historicalAcceptanceRate !== undefined && input.estimatedNextRetryCostUsd !== undefined) {
|
|
323
|
+
retryExpectedValue = Number((Number(input.historicalAcceptanceRate) - Number(input.estimatedNextRetryCostUsd)).toFixed(6));
|
|
324
|
+
}
|
|
325
|
+
if (input.currentRun.outcome === 'accepted') {
|
|
326
|
+
return {
|
|
327
|
+
action: 'none',
|
|
328
|
+
failureClass: failureClass,
|
|
329
|
+
repeatedFailure: false,
|
|
330
|
+
reason: 'Run is already accepted.',
|
|
331
|
+
nextAction: 'No retry required.',
|
|
332
|
+
retryExpectedValue: retryExpectedValue
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
if (failureClass === 'manual') {
|
|
336
|
+
return {
|
|
337
|
+
action: 'manual_handoff',
|
|
338
|
+
failureClass: failureClass,
|
|
339
|
+
repeatedFailure: repeatedFailure,
|
|
340
|
+
reason: 'Run is marked for manual handoff.',
|
|
341
|
+
nextAction: input.currentRun.nextAction || 'Ask an operator to resolve the run.',
|
|
342
|
+
retryExpectedValue: retryExpectedValue
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
if (failureClass === 'stopped') {
|
|
346
|
+
return {
|
|
347
|
+
action: 'stop',
|
|
348
|
+
failureClass: failureClass,
|
|
349
|
+
repeatedFailure: repeatedFailure,
|
|
350
|
+
reason: 'Run was stopped before accepted evidence was recorded.',
|
|
351
|
+
nextAction: input.currentRun.nextAction || 'Do not spend another model cycle unless the request is resumed.',
|
|
352
|
+
retryExpectedValue: retryExpectedValue
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
if (repeatedFailure) {
|
|
356
|
+
return {
|
|
357
|
+
action: 'stop',
|
|
358
|
+
failureClass: failureClass,
|
|
359
|
+
repeatedFailure: repeatedFailure,
|
|
360
|
+
reason: 'The same failure class repeated with no new evidence. Stop model loops until the evidence changes.',
|
|
361
|
+
nextAction: input.currentRun.nextAction || 'Inspect the failing gate before retrying.',
|
|
362
|
+
retryExpectedValue: retryExpectedValue
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
if (retryExpectedValue !== undefined && retryExpectedValue < Number(input.minRetryExpectedValue || 0)) {
|
|
366
|
+
return {
|
|
367
|
+
action: 'stop',
|
|
368
|
+
failureClass: failureClass,
|
|
369
|
+
repeatedFailure: repeatedFailure,
|
|
370
|
+
reason: 'Historical retry expected value is below the configured threshold.',
|
|
371
|
+
nextAction: input.currentRun.nextAction || 'Do not retry without new evidence or a cheaper targeted repair.',
|
|
372
|
+
retryExpectedValue: retryExpectedValue
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
if (failureClass === 'infra') {
|
|
376
|
+
return {
|
|
377
|
+
action: 'retry_infra_repair',
|
|
378
|
+
failureClass: failureClass,
|
|
379
|
+
repeatedFailure: repeatedFailure,
|
|
380
|
+
reason: 'QA infrastructure failed before business validation.',
|
|
381
|
+
nextAction: input.currentRun.nextAction || 'Repair browser, settings, Mongo, ports, or startup before model repair.',
|
|
382
|
+
retryExpectedValue: retryExpectedValue
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
if (failureClass === 'compile') {
|
|
386
|
+
return {
|
|
387
|
+
action: 'retry_build_repair',
|
|
388
|
+
failureClass: failureClass,
|
|
389
|
+
repeatedFailure: repeatedFailure,
|
|
390
|
+
reason: 'Compile/build evidence failed or was stale.',
|
|
391
|
+
nextAction: input.currentRun.nextAction || 'Repair build before browser QA.',
|
|
392
|
+
retryExpectedValue: retryExpectedValue
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
if (failureClass === 'route') {
|
|
396
|
+
return {
|
|
397
|
+
action: 'retry_route_repair',
|
|
398
|
+
failureClass: failureClass,
|
|
399
|
+
repeatedFailure: repeatedFailure,
|
|
400
|
+
reason: 'Route probe failed before business proof.',
|
|
401
|
+
nextAction: input.currentRun.nextAction || 'Repair route/auth/startup and rerun the exact probe.',
|
|
402
|
+
retryExpectedValue: retryExpectedValue
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
if (failureClass === 'release') {
|
|
406
|
+
return {
|
|
407
|
+
action: 'retry_release_repair',
|
|
408
|
+
failureClass: failureClass,
|
|
409
|
+
repeatedFailure: repeatedFailure,
|
|
410
|
+
reason: 'Business proof exists but deploy, publish, or sample-data gates block release.',
|
|
411
|
+
nextAction: input.currentRun.nextAction || 'Repair release gates without another broad model planning loop.',
|
|
412
|
+
retryExpectedValue: retryExpectedValue
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
if (failureClass === 'business' || failureClass === 'false_pass' || failureClass === 'assistant_correctness') {
|
|
416
|
+
return {
|
|
417
|
+
action: 'retry_business_repair',
|
|
418
|
+
failureClass: failureClass,
|
|
419
|
+
repeatedFailure: repeatedFailure,
|
|
420
|
+
reason: 'Business or assistant correctness proof failed or is missing.',
|
|
421
|
+
nextAction: input.currentRun.nextAction || 'Repair the exact failing workflow/query and rerun the deterministic assertion.',
|
|
422
|
+
retryExpectedValue: retryExpectedValue
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
return {
|
|
426
|
+
action: 'manual_handoff',
|
|
427
|
+
failureClass: failureClass,
|
|
428
|
+
repeatedFailure: repeatedFailure,
|
|
429
|
+
reason: 'Run lacks enough normalized evidence to choose a safe retry lane.',
|
|
430
|
+
nextAction: input.currentRun.nextAction || 'Ingest more run evidence before retrying.',
|
|
431
|
+
retryExpectedValue: retryExpectedValue
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
var supportAccepted004131 = (0, ai_run_evidence_1.buildAIRun)({
|
|
435
|
+
id: 'support-004131',
|
|
436
|
+
source: 'support_ticket',
|
|
437
|
+
sourceIds: { ticketNumber: '004131', ticketId: 'ticket-004131' },
|
|
438
|
+
title: '004131 accepted support fix',
|
|
439
|
+
status: 'Resolved',
|
|
440
|
+
phase: 'COMPLETE',
|
|
441
|
+
events: [{ type: 'git_commit', message: 'Fix support workflow and verified before/action/after proof.' }],
|
|
442
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
443
|
+
compile: { status: 'pass', artifactPath: '.build-output/build-server.log' },
|
|
444
|
+
routeProbes: [{ route: '/support/004131', status: 'pass', screenshot: 'qa-artifacts/004131-after.jpg' }],
|
|
445
|
+
businessAssertions: [{
|
|
446
|
+
assertion: 'Before/action/after workflow proves the ticket fix.',
|
|
447
|
+
status: 'pass',
|
|
448
|
+
action: 'Run the ticket workflow against seeded data.',
|
|
449
|
+
expected: 'The corrected state is visible.',
|
|
450
|
+
observed: 'The corrected state is visible after the action.',
|
|
451
|
+
artifactPaths: ['qa-artifacts/004131-proof.json']
|
|
452
|
+
}]
|
|
453
|
+
}),
|
|
454
|
+
explicitAccepted: true,
|
|
455
|
+
terminal: true
|
|
456
|
+
});
|
|
457
|
+
var supportBuildFailed004333 = (0, ai_run_evidence_1.buildAIRun)({
|
|
458
|
+
id: 'support-004333',
|
|
459
|
+
source: 'support_ticket',
|
|
460
|
+
sourceIds: { ticketNumber: '004333', ticketId: 'ticket-004333' },
|
|
461
|
+
title: '004333 compile failure',
|
|
462
|
+
status: 'Running',
|
|
463
|
+
phase: 'QA',
|
|
464
|
+
events: [{ type: 'log', message: 'Angular build failed before browser QA.' }],
|
|
465
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
466
|
+
compile: { status: 'fail', artifactPath: '.build-output/build-client.log', message: 'TypeScript compile failed.' }
|
|
467
|
+
})
|
|
468
|
+
});
|
|
469
|
+
var supportInfraFailed004341 = (0, ai_run_evidence_1.buildAIRun)({
|
|
470
|
+
id: 'support-004341',
|
|
471
|
+
source: 'support_ticket',
|
|
472
|
+
sourceIds: { ticketNumber: '004341', ticketId: 'ticket-004341' },
|
|
473
|
+
title: '004341 missing Puppeteer',
|
|
474
|
+
status: 'Running',
|
|
475
|
+
phase: 'QA',
|
|
476
|
+
events: [{ type: 'qa_infra', message: 'Puppeteer module could not be loaded.' }],
|
|
477
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
478
|
+
infraChecks: [{ name: 'puppeteer_require', status: 'blocked', message: 'Cannot find module puppeteer.' }]
|
|
479
|
+
})
|
|
480
|
+
});
|
|
481
|
+
var supportFalsePass004421 = (0, ai_run_evidence_1.buildAIRun)({
|
|
482
|
+
id: 'support-004421',
|
|
483
|
+
source: 'support_ticket',
|
|
484
|
+
sourceIds: { ticketNumber: '004421', ticketId: 'ticket-004421' },
|
|
485
|
+
title: '004421 route-only support QA',
|
|
486
|
+
status: 'Passed',
|
|
487
|
+
phase: 'QA',
|
|
488
|
+
events: [{ type: 'scorecard', message: 'Scorecard passed after route loaded.' }],
|
|
489
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
490
|
+
infraChecks: [{ name: 'chrome_launch', status: 'pass' }],
|
|
491
|
+
compile: { status: 'pass' },
|
|
492
|
+
routeProbes: [{ route: '/support/004421', status: 'pass', screenshot: 'qa-artifacts/004421-route.jpg' }]
|
|
493
|
+
}),
|
|
494
|
+
scorecardPassed: true
|
|
495
|
+
});
|
|
496
|
+
var supportBusinessFailed004423 = (0, ai_run_evidence_1.buildAIRun)({
|
|
497
|
+
id: 'support-004423',
|
|
498
|
+
source: 'support_ticket',
|
|
499
|
+
sourceIds: { ticketNumber: '004423', ticketId: 'ticket-004423' },
|
|
500
|
+
title: '004423 business assertion failed',
|
|
501
|
+
status: 'Running',
|
|
502
|
+
phase: 'QA',
|
|
503
|
+
events: [{ type: 'qa_business_assertion', message: 'The expected workflow state did not change.' }],
|
|
504
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
505
|
+
compile: { status: 'pass' },
|
|
506
|
+
routeProbes: [{ route: '/support/004423', status: 'pass' }],
|
|
507
|
+
businessAssertions: [{
|
|
508
|
+
assertion: 'Ticket workflow changes the requested field.',
|
|
509
|
+
status: 'fail',
|
|
510
|
+
expected: 'Field updates after save.',
|
|
511
|
+
observed: 'Field stayed unchanged.'
|
|
512
|
+
}]
|
|
513
|
+
})
|
|
514
|
+
});
|
|
515
|
+
var supportStaleBuild004430 = (0, ai_run_evidence_1.buildAIRun)({
|
|
516
|
+
id: 'support-004430',
|
|
517
|
+
source: 'support_ticket',
|
|
518
|
+
sourceIds: { ticketNumber: '004430', ticketId: 'ticket-004430' },
|
|
519
|
+
title: '004430 stale build evidence',
|
|
520
|
+
status: 'Running',
|
|
521
|
+
phase: 'QA',
|
|
522
|
+
events: [{ type: 'qa_compile', message: 'Build log predates the final code diff.' }],
|
|
523
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
524
|
+
compile: { status: 'stale', staleEvidence: true, artifactPath: '.build-output/build-client.log' }
|
|
525
|
+
})
|
|
526
|
+
});
|
|
527
|
+
var cryptoPortfolioAccepted = (0, ai_run_evidence_1.buildAIRun)({
|
|
528
|
+
id: 'aicoder-crypto-v6-golden',
|
|
529
|
+
source: 'aicoder_app',
|
|
530
|
+
sourceIds: { appId: 'crypto-v6-golden', jobId: 'crypto-job-golden' },
|
|
531
|
+
title: 'Crypto Portfolio Tracker V6 Golden',
|
|
532
|
+
status: 'Passed',
|
|
533
|
+
phase: 'COMPLETE',
|
|
534
|
+
events: [{ type: 'scorecard', message: 'Quality scorecard passed with workflow proof.' }],
|
|
535
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
536
|
+
compile: { status: 'pass' },
|
|
537
|
+
routeProbes: [{ route: '/portfolio', status: 'pass', screenshot: 'qa-artifacts/crypto-dashboard.jpg' }],
|
|
538
|
+
businessAssertions: [{
|
|
539
|
+
assertion: 'Portfolio dashboard calculates seeded crypto holdings.',
|
|
540
|
+
status: 'pass',
|
|
541
|
+
dataProof: 'Seeded BTC and ETH holdings render totals, allocation, and change.'
|
|
542
|
+
}]
|
|
543
|
+
}),
|
|
544
|
+
scorecardPassed: true,
|
|
545
|
+
deployStatus: 'pass',
|
|
546
|
+
publishStatus: 'published',
|
|
547
|
+
sampleDataStatus: 'pass',
|
|
548
|
+
terminal: true
|
|
549
|
+
});
|
|
550
|
+
var layflatReleaseBlocked = (0, ai_run_evidence_1.buildAIRun)({
|
|
551
|
+
id: 'aicoder-layflat-pressure-optimizer',
|
|
552
|
+
source: 'aicoder_app',
|
|
553
|
+
sourceIds: { appId: 'layflat-pressure-optimizer', jobId: 'layflat-job' },
|
|
554
|
+
title: 'Layflat Pressure Optimizer',
|
|
555
|
+
status: 'Passed',
|
|
556
|
+
phase: 'COMPLETE',
|
|
557
|
+
events: [{ type: 'scorecard', message: 'Scorecard passed but sample data/publish gate failed.' }],
|
|
558
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
559
|
+
compile: { status: 'pass' },
|
|
560
|
+
routeProbes: [{ route: '/optimizer', status: 'pass', screenshot: 'qa-artifacts/layflat-route.jpg' }],
|
|
561
|
+
businessAssertions: [{
|
|
562
|
+
assertion: 'Pressure optimizer computes a layflat hose result.',
|
|
563
|
+
status: 'pass',
|
|
564
|
+
dataProof: 'A seeded hose diameter and flow rate produce PSI loss.'
|
|
565
|
+
}]
|
|
566
|
+
}),
|
|
567
|
+
scorecardPassed: true,
|
|
568
|
+
deployStatus: 'pass',
|
|
569
|
+
publishStatus: 'publish failed',
|
|
570
|
+
sampleDataStatus: 'empty seed collections',
|
|
571
|
+
terminal: true
|
|
572
|
+
});
|
|
573
|
+
var assistantWrongDateWindow = (0, ai_run_evidence_1.buildAIRun)({
|
|
574
|
+
id: 'assistant-wrong-month-window',
|
|
575
|
+
source: 'ai_assistant',
|
|
576
|
+
sourceIds: { conversationId: 'assistant-date-window' },
|
|
577
|
+
title: 'Assistant wrong month/date-window answer',
|
|
578
|
+
status: 'Closed',
|
|
579
|
+
phase: 'ANSWERED',
|
|
580
|
+
events: [{ type: 'assistant_message', message: 'Answered with the wrong month for today-sensitive invoice data.' }],
|
|
581
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
582
|
+
businessAssertions: [{
|
|
583
|
+
assertion: 'Assistant date window uses the actual current date.',
|
|
584
|
+
status: 'fail',
|
|
585
|
+
expected: 'Use June 2026 for current-month windows.',
|
|
586
|
+
observed: 'Used a stale month.'
|
|
587
|
+
}]
|
|
588
|
+
}),
|
|
589
|
+
rejected: true,
|
|
590
|
+
terminal: true
|
|
591
|
+
});
|
|
592
|
+
var assistantIllegalProjection = (0, ai_run_evidence_1.buildAIRun)({
|
|
593
|
+
id: 'assistant-illegal-mongo-projection',
|
|
594
|
+
source: 'ai_assistant',
|
|
595
|
+
sourceIds: { conversationId: 'assistant-positional-projection' },
|
|
596
|
+
title: 'Assistant illegal Mongo positional projection',
|
|
597
|
+
status: 'Closed',
|
|
598
|
+
phase: 'ANSWERED',
|
|
599
|
+
events: [{ type: 'assistant_message', message: 'Mongo query used an illegal positional projection shape.' }],
|
|
600
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
601
|
+
businessAssertions: [{
|
|
602
|
+
assertion: 'Assistant avoids illegal Mongo positional projection.',
|
|
603
|
+
status: 'fail',
|
|
604
|
+
expected: 'Use legal aggregation or field projection shape.',
|
|
605
|
+
observed: 'Used invalid positional projection.'
|
|
606
|
+
}]
|
|
607
|
+
}),
|
|
608
|
+
rejected: true,
|
|
609
|
+
terminal: true
|
|
610
|
+
});
|
|
611
|
+
var qaMissingPuppeteer = (0, ai_run_evidence_1.buildAIRun)({
|
|
612
|
+
id: 'qa-missing-puppeteer',
|
|
613
|
+
source: 'qa_runner',
|
|
614
|
+
sourceIds: { jobId: 'qa-missing-puppeteer' },
|
|
615
|
+
title: 'QA runner missing Puppeteer',
|
|
616
|
+
status: 'Running',
|
|
617
|
+
phase: 'PREFLIGHT',
|
|
618
|
+
events: [{ type: 'qa_infra', message: 'Puppeteer failed to require.' }],
|
|
619
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
620
|
+
infraChecks: [{ name: 'puppeteer_require', status: 'blocked', message: 'Cannot find module puppeteer.' }]
|
|
621
|
+
})
|
|
622
|
+
});
|
|
623
|
+
var qaBadChromePath = (0, ai_run_evidence_1.buildAIRun)({
|
|
624
|
+
id: 'qa-bad-chrome-path',
|
|
625
|
+
source: 'qa_runner',
|
|
626
|
+
sourceIds: { jobId: 'qa-bad-chrome-path' },
|
|
627
|
+
title: 'QA runner bad Chrome path',
|
|
628
|
+
status: 'Running',
|
|
629
|
+
phase: 'PREFLIGHT',
|
|
630
|
+
events: [{ type: 'qa_infra', message: 'Chrome executable path was missing.' }],
|
|
631
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
632
|
+
infraChecks: [{ name: 'chrome_executable', status: 'fail', message: 'Chrome executable not found.', path: '/bad/chrome' }]
|
|
633
|
+
})
|
|
634
|
+
});
|
|
635
|
+
var qaCompileFailure = (0, ai_run_evidence_1.buildAIRun)({
|
|
636
|
+
id: 'qa-compile-failure',
|
|
637
|
+
source: 'qa_runner',
|
|
638
|
+
sourceIds: { jobId: 'qa-compile-failure' },
|
|
639
|
+
title: 'QA runner compile failure',
|
|
640
|
+
status: 'Running',
|
|
641
|
+
phase: 'COMPILE',
|
|
642
|
+
events: [{ type: 'qa_compile', message: 'Compile failed before browser QA.' }],
|
|
643
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
644
|
+
compile: { status: 'fail', command: 'npm run build', artifactPath: '.build-output/build.log' }
|
|
645
|
+
})
|
|
646
|
+
});
|
|
647
|
+
var qaRouteOnlyPass = (0, ai_run_evidence_1.buildAIRun)({
|
|
648
|
+
id: 'qa-route-only-pass',
|
|
649
|
+
source: 'qa_runner',
|
|
650
|
+
sourceIds: { jobId: 'qa-route-only-pass' },
|
|
651
|
+
title: 'QA runner route-only pass',
|
|
652
|
+
status: 'Passed',
|
|
653
|
+
phase: 'BROWSER_QA',
|
|
654
|
+
events: [{ type: 'qa_route_probe', message: 'Route loaded but no business proof exists.' }],
|
|
655
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
656
|
+
compile: { status: 'pass' },
|
|
657
|
+
routeProbes: [{ route: '/dashboard', status: 'pass', screenshot: 'qa-artifacts/dashboard.jpg' }]
|
|
658
|
+
}),
|
|
659
|
+
scorecardPassed: true
|
|
660
|
+
});
|
|
661
|
+
var qaShellOnlyPage = (0, ai_run_evidence_1.buildAIRun)({
|
|
662
|
+
id: 'qa-shell-only-page',
|
|
663
|
+
source: 'qa_runner',
|
|
664
|
+
sourceIds: { jobId: 'qa-shell-only-page' },
|
|
665
|
+
title: 'QA runner shell-only page',
|
|
666
|
+
status: 'Running',
|
|
667
|
+
phase: 'BROWSER_QA',
|
|
668
|
+
events: [{ type: 'qa_route_probe', message: 'Shell-only route loaded.' }],
|
|
669
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
670
|
+
compile: { status: 'pass' },
|
|
671
|
+
routeProbes: [{ route: '/dashboard', status: 'pass', shellOnly: true, screenshot: 'qa-artifacts/shell.jpg' }]
|
|
672
|
+
})
|
|
673
|
+
});
|
|
674
|
+
var qaEmptyData = (0, ai_run_evidence_1.buildAIRun)({
|
|
675
|
+
id: 'qa-empty-data',
|
|
676
|
+
source: 'qa_runner',
|
|
677
|
+
sourceIds: { jobId: 'qa-empty-data' },
|
|
678
|
+
title: 'QA runner empty data',
|
|
679
|
+
status: 'Failed',
|
|
680
|
+
phase: 'BROWSER_QA',
|
|
681
|
+
events: [{ type: 'qa_business_assertion', message: 'Workflow rendered empty data.' }],
|
|
682
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
683
|
+
compile: { status: 'pass' },
|
|
684
|
+
routeProbes: [{ route: '/invoices', status: 'pass' }],
|
|
685
|
+
businessAssertions: [{
|
|
686
|
+
assertion: 'Invoice dashboard renders non-empty meaningful data.',
|
|
687
|
+
status: 'fail',
|
|
688
|
+
expected: 'At least one meaningful invoice row.',
|
|
689
|
+
observed: 'Empty table.'
|
|
690
|
+
}]
|
|
691
|
+
}),
|
|
692
|
+
terminal: true
|
|
693
|
+
});
|
|
694
|
+
var qaPublishFailureAfterScorecard = (0, ai_run_evidence_1.buildAIRun)({
|
|
695
|
+
id: 'qa-scorecard-pass-publish-failure',
|
|
696
|
+
source: 'qa_runner',
|
|
697
|
+
sourceIds: { jobId: 'qa-publish-failure' },
|
|
698
|
+
title: 'QA scorecard pass followed by publish failure',
|
|
699
|
+
status: 'Passed',
|
|
700
|
+
phase: 'RELEASE',
|
|
701
|
+
events: [{ type: 'publish', message: 'Scorecard passed but publish failed.' }],
|
|
702
|
+
qa: (0, ai_run_evidence_1.buildAIQaRun)({
|
|
703
|
+
compile: { status: 'pass' },
|
|
704
|
+
routeProbes: [{ route: '/app', status: 'pass' }],
|
|
705
|
+
businessAssertions: [{
|
|
706
|
+
assertion: 'Primary app workflow runs.',
|
|
707
|
+
status: 'pass',
|
|
708
|
+
dataProof: 'Workflow action returned expected state.'
|
|
709
|
+
}]
|
|
710
|
+
}),
|
|
711
|
+
scorecardPassed: true,
|
|
712
|
+
publishStatus: 'publish failed',
|
|
713
|
+
sampleDataStatus: 'pass'
|
|
714
|
+
});
|
|
715
|
+
exports.AI_RUN_SEED_EVAL_FIXTURES = [
|
|
716
|
+
{
|
|
717
|
+
id: 'support-004131',
|
|
718
|
+
title: 'Support 004131 accepted proof',
|
|
719
|
+
source: 'support_ticket',
|
|
720
|
+
run: supportAccepted004131,
|
|
721
|
+
expected: { outcome: 'accepted', failureClass: 'none', gates: [{ key: 'qa_business_assertion', status: 'pass' }] },
|
|
722
|
+
tags: ['support', 'accepted', 'business_assertion']
|
|
723
|
+
},
|
|
724
|
+
{
|
|
725
|
+
id: 'support-004333',
|
|
726
|
+
title: 'Support 004333 compile failure',
|
|
727
|
+
source: 'support_ticket',
|
|
728
|
+
run: supportBuildFailed004333,
|
|
729
|
+
expected: { outcome: 'build_failed', failureClass: 'compile', nextActionIncludes: 'compile' },
|
|
730
|
+
tags: ['support', 'compile']
|
|
731
|
+
},
|
|
732
|
+
{
|
|
733
|
+
id: 'support-004341',
|
|
734
|
+
title: 'Support 004341 Puppeteer infra failure',
|
|
735
|
+
source: 'support_ticket',
|
|
736
|
+
run: supportInfraFailed004341,
|
|
737
|
+
expected: { outcome: 'qa_infra_failed', failureClass: 'infra', nextActionIncludes: 'browser' },
|
|
738
|
+
tags: ['support', 'infra', 'puppeteer']
|
|
739
|
+
},
|
|
740
|
+
{
|
|
741
|
+
id: 'support-004421',
|
|
742
|
+
title: 'Support 004421 route-only false pass',
|
|
743
|
+
source: 'support_ticket',
|
|
744
|
+
run: supportFalsePass004421,
|
|
745
|
+
expected: { outcome: 'false_pass', failureClass: 'false_pass', gates: [{ key: 'qa_business_assertion', status: 'blocked' }] },
|
|
746
|
+
tags: ['support', 'route_only', 'false_pass']
|
|
747
|
+
},
|
|
748
|
+
{
|
|
749
|
+
id: 'support-004423',
|
|
750
|
+
title: 'Support 004423 business assertion failed',
|
|
751
|
+
source: 'support_ticket',
|
|
752
|
+
run: supportBusinessFailed004423,
|
|
753
|
+
expected: { outcome: 'qa_incomplete', failureClass: 'business', nextActionIncludes: 'assertion' },
|
|
754
|
+
tags: ['support', 'business_assertion']
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
id: 'support-004430',
|
|
758
|
+
title: 'Support 004430 stale build evidence',
|
|
759
|
+
source: 'support_ticket',
|
|
760
|
+
run: supportStaleBuild004430,
|
|
761
|
+
expected: { outcome: 'build_failed', failureClass: 'compile', nextActionIncludes: 'build' },
|
|
762
|
+
tags: ['support', 'compile', 'stale_build']
|
|
763
|
+
},
|
|
764
|
+
{
|
|
765
|
+
id: 'aicoder-crypto-v6-golden',
|
|
766
|
+
title: 'AICoder Crypto Portfolio Tracker V6 Golden',
|
|
767
|
+
source: 'aicoder_app',
|
|
768
|
+
run: cryptoPortfolioAccepted,
|
|
769
|
+
expected: { outcome: 'accepted', failureClass: 'none', gates: [{ key: 'qa_business_assertion', status: 'pass' }] },
|
|
770
|
+
tags: ['aicoder', 'crypto', 'accepted']
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
id: 'aicoder-layflat-pressure-optimizer',
|
|
774
|
+
title: 'AICoder Layflat Pressure Optimizer release blocked',
|
|
775
|
+
source: 'aicoder_app',
|
|
776
|
+
run: layflatReleaseBlocked,
|
|
777
|
+
expected: { outcome: 'release_blocked', failureClass: 'release', gates: [{ key: 'release', status: 'fail' }] },
|
|
778
|
+
tags: ['aicoder', 'layflat', 'release_blocked', 'sample_data']
|
|
779
|
+
},
|
|
780
|
+
{
|
|
781
|
+
id: 'assistant-wrong-month-window',
|
|
782
|
+
title: 'Assistant wrong month/date-window answer',
|
|
783
|
+
source: 'ai_assistant',
|
|
784
|
+
run: assistantWrongDateWindow,
|
|
785
|
+
expected: { outcome: 'rejected', failureClass: 'assistant_correctness' },
|
|
786
|
+
tags: ['assistant', 'date_window']
|
|
787
|
+
},
|
|
788
|
+
{
|
|
789
|
+
id: 'assistant-illegal-mongo-projection',
|
|
790
|
+
title: 'Assistant illegal Mongo positional projection',
|
|
791
|
+
source: 'ai_assistant',
|
|
792
|
+
run: assistantIllegalProjection,
|
|
793
|
+
expected: { outcome: 'rejected', failureClass: 'assistant_correctness' },
|
|
794
|
+
tags: ['assistant', 'mongo_query']
|
|
795
|
+
},
|
|
796
|
+
{
|
|
797
|
+
id: 'qa-missing-puppeteer',
|
|
798
|
+
title: 'QA missing Puppeteer',
|
|
799
|
+
source: 'qa_runner',
|
|
800
|
+
run: qaMissingPuppeteer,
|
|
801
|
+
expected: { outcome: 'qa_infra_failed', failureClass: 'infra' },
|
|
802
|
+
tags: ['qa_runner', 'infra', 'puppeteer']
|
|
803
|
+
},
|
|
804
|
+
{
|
|
805
|
+
id: 'qa-bad-chrome-path',
|
|
806
|
+
title: 'QA bad Chrome path',
|
|
807
|
+
source: 'qa_runner',
|
|
808
|
+
run: qaBadChromePath,
|
|
809
|
+
expected: { outcome: 'qa_infra_failed', failureClass: 'infra' },
|
|
810
|
+
tags: ['qa_runner', 'infra', 'chrome']
|
|
811
|
+
},
|
|
812
|
+
{
|
|
813
|
+
id: 'qa-compile-failure',
|
|
814
|
+
title: 'QA compile failure',
|
|
815
|
+
source: 'qa_runner',
|
|
816
|
+
run: qaCompileFailure,
|
|
817
|
+
expected: { outcome: 'build_failed', failureClass: 'compile' },
|
|
818
|
+
tags: ['qa_runner', 'compile']
|
|
819
|
+
},
|
|
820
|
+
{
|
|
821
|
+
id: 'qa-route-only-pass',
|
|
822
|
+
title: 'QA route-only pass',
|
|
823
|
+
source: 'qa_runner',
|
|
824
|
+
run: qaRouteOnlyPass,
|
|
825
|
+
expected: { outcome: 'false_pass', failureClass: 'false_pass' },
|
|
826
|
+
tags: ['qa_runner', 'route_only', 'false_pass']
|
|
827
|
+
},
|
|
828
|
+
{
|
|
829
|
+
id: 'qa-shell-only-page',
|
|
830
|
+
title: 'QA shell-only page',
|
|
831
|
+
source: 'qa_runner',
|
|
832
|
+
run: qaShellOnlyPage,
|
|
833
|
+
expected: { outcome: 'qa_incomplete', failureClass: 'route' },
|
|
834
|
+
tags: ['qa_runner', 'route', 'shell_only']
|
|
835
|
+
},
|
|
836
|
+
{
|
|
837
|
+
id: 'qa-empty-data',
|
|
838
|
+
title: 'QA empty data',
|
|
839
|
+
source: 'qa_runner',
|
|
840
|
+
run: qaEmptyData,
|
|
841
|
+
expected: { outcome: 'rejected', failureClass: 'business' },
|
|
842
|
+
tags: ['qa_runner', 'empty_data', 'business_assertion']
|
|
843
|
+
},
|
|
844
|
+
{
|
|
845
|
+
id: 'qa-scorecard-pass-publish-failure',
|
|
846
|
+
title: 'QA scorecard pass followed by publish failure',
|
|
847
|
+
source: 'qa_runner',
|
|
848
|
+
run: qaPublishFailureAfterScorecard,
|
|
849
|
+
expected: { outcome: 'release_blocked', failureClass: 'release' },
|
|
850
|
+
tags: ['qa_runner', 'release_blocked', 'publish']
|
|
851
|
+
}
|
|
852
|
+
];
|
|
853
|
+
|
|
854
|
+
//# sourceMappingURL=ai-run-evidence-eval.js.map
|