@zhixuan92/multi-model-agent-core 3.10.3 → 3.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/executors/_shared/findings-schema.d.ts +55 -32
- package/dist/executors/_shared/findings-schema.d.ts.map +1 -1
- package/dist/executors/_shared/findings-schema.js +60 -22
- package/dist/executors/_shared/findings-schema.js.map +1 -1
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +1 -1
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +1 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +1 -1
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/verify.d.ts.map +1 -1
- package/dist/executors/verify.js +1 -1
- package/dist/executors/verify.js.map +1 -1
- package/dist/intake/compilers/audit.d.ts.map +1 -1
- package/dist/intake/compilers/audit.js +2 -2
- package/dist/intake/compilers/audit.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts.map +1 -1
- package/dist/intake/compilers/investigate.js +1 -2
- package/dist/intake/compilers/investigate.js.map +1 -1
- package/dist/intake/resolve.d.ts +10 -0
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +10 -19
- package/dist/intake/resolve.js.map +1 -1
- package/dist/review/fallback-extraction.d.ts +17 -0
- package/dist/review/fallback-extraction.d.ts.map +1 -0
- package/dist/review/fallback-extraction.js +140 -0
- package/dist/review/fallback-extraction.js.map +1 -0
- package/dist/review/parse-reviewer-findings.d.ts +26 -0
- package/dist/review/parse-reviewer-findings.d.ts.map +1 -0
- package/dist/review/parse-reviewer-findings.js +73 -0
- package/dist/review/parse-reviewer-findings.js.map +1 -0
- package/dist/review/quality-only-prompts.d.ts +18 -11
- package/dist/review/quality-only-prompts.d.ts.map +1 -1
- package/dist/review/quality-only-prompts.js +79 -128
- package/dist/review/quality-only-prompts.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +3 -26
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +54 -141
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/run-tasks/index.d.ts +0 -1
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +89 -12
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +1 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +2 -2
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/types.d.ts +7 -0
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +6 -5
- package/dist/telemetry/types.js.map +1 -1
- package/dist/types.d.ts +2 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { Provider } from '../types.js';
|
|
2
2
|
import type { ParsedStructuredReport } from '../reporting/structured-report.js';
|
|
3
|
-
import
|
|
3
|
+
import type { AnnotatedFinding } from '../executors/_shared/findings-schema.js';
|
|
4
|
+
export { parseReviewerFindings } from './parse-reviewer-findings.js';
|
|
5
|
+
export { fallbackExtractFindings } from './fallback-extraction.js';
|
|
4
6
|
/**
|
|
5
7
|
* Result of the read-only annotation review pass.
|
|
6
8
|
* - 'annotated' — reviewer ran, every worker finding has reviewerConfidence (and optionally reviewerSeverity).
|
|
@@ -40,29 +42,6 @@ export interface QualityReviewMetrics {
|
|
|
40
42
|
}
|
|
41
43
|
/** Backward-compat alias kept until reviewed-lifecycle is migrated to the new shape (Task 6). */
|
|
42
44
|
export type LegacyQualityReviewResult = QualityReviewResult;
|
|
43
|
-
/**
|
|
44
|
-
* Parse worker findings from the worker's raw output. Looks for a ```json block whose
|
|
45
|
-
* content is an array passing workerFindingsSchema. Returns null if absent or invalid.
|
|
46
|
-
*/
|
|
47
|
-
export declare function extractWorkerFindings(workerOutput: string): WorkerFinding[] | null;
|
|
48
|
-
interface AnnotationParseOk {
|
|
49
|
-
ok: true;
|
|
50
|
-
annotated: AnnotatedFinding[];
|
|
51
|
-
}
|
|
52
|
-
interface AnnotationParseErr {
|
|
53
|
-
ok: false;
|
|
54
|
-
reason: string;
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Parse the reviewer's response, validate against the worker's findings,
|
|
58
|
-
* and merge to produce AnnotatedFinding[].
|
|
59
|
-
*
|
|
60
|
-
* Validation:
|
|
61
|
-
* - Reviewer output must contain exactly one ```json fenced block (we take the first).
|
|
62
|
-
* - Block content must be a JSON array passing annotationsArraySchema.
|
|
63
|
-
* - Annotation ids must be a permutation of worker ids: no missing, no duplicate, no extra.
|
|
64
|
-
*/
|
|
65
|
-
export declare function parseAndMergeAnnotations(reviewerOutput: string, workerFindings: WorkerFinding[]): AnnotationParseOk | AnnotationParseErr;
|
|
66
45
|
export declare function runQualityReview(reviewerProvider: Provider, packet: {
|
|
67
46
|
prompt: string;
|
|
68
47
|
scope: string[];
|
|
@@ -70,7 +49,5 @@ export declare function runQualityReview(reviewerProvider: Provider, packet: {
|
|
|
70
49
|
}, implReport: ParsedStructuredReport, fileContents: Record<string, string>, toolCallLog: string[], filesWritten: string[], evidenceBlock?: string, qualityReviewPromptBuilder?: (ctx: {
|
|
71
50
|
workerOutput: string;
|
|
72
51
|
brief: string;
|
|
73
|
-
workerFindings: WorkerFinding[];
|
|
74
52
|
}) => string, workerOutput?: string, taskDeadlineMs?: number, abortSignal?: AbortSignal, onProgress?: (e: import('../runners/types.js').InternalRunnerEvent) => void): Promise<QualityReviewResult>;
|
|
75
|
-
export {};
|
|
76
53
|
//# sourceMappingURL=quality-reviewer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"quality-reviewer.d.ts","sourceRoot":"","sources":["../../src/review/quality-reviewer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"quality-reviewer.d.ts","sourceRoot":"","sources":["../../src/review/quality-reviewer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAEhF,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yCAAyC,CAAC;AAIhF,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAEnE;;;;;;;;GAQG;AACH;;;;;;;;;;GAUG;AACH,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,UAAU,GAAG,kBAAkB,GAAG,WAAW,GAAG,OAAO,GAAG,WAAW,GAAG,eAAe,GAAG,SAAS,GAAG,aAAa,GAAG,SAAS,CAAC;IACxI,iBAAiB,CAAC,EAAE,gBAAgB,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,sBAAsB,CAAC;IAChC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iEAAiE;IACjE,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC;AAED,MAAM,WAAW,oBAAoB;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;CACjB;AAsBD,iGAAiG;AACjG,MAAM,MAAM,yBAAyB,GAAG,mBAAmB,CAAC;AAE5D,wBAAsB,gBAAgB,CACpC,gBAAgB,EAAE,QAAQ,EAC1B,MAAM,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,EAClE,UAAU,EAAE,sBAAsB,EAClC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACpC,WAAW,EAAE,MAAM,EAAE,EACrB,YAAY,EAAE,MAAM,EAAE,EACtB,aAAa,CAAC,EAAE,MAAM,EACtB,0BAA0B,CAAC,EAAE,CAAC,GAAG,EAAE;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,KAAK,MAAM,EACrF,YAAY,CAAC,EAAE,MAAM,EACrB,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,WAAW,EACzB,UAAU,CAAC,EAAE,CAAC,CAAC,EAAE,OAAO,qBAAqB,EAAE,mBAAmB,KAAK,IAAI,GAC1E,OAAO,CAAC,mBAAmB,CAAC,CAkE9B"}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
1
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
3
2
|
import { buildQualityReviewPrompt } from './reviewer-prompt.js';
|
|
4
3
|
import { parseStructuredReport } from '../reporting/structured-report.js';
|
|
5
|
-
import {
|
|
4
|
+
import { parseReviewerFindings } from './parse-reviewer-findings.js';
|
|
5
|
+
import { fallbackExtractFindings } from './fallback-extraction.js';
|
|
6
|
+
export { parseReviewerFindings } from './parse-reviewer-findings.js';
|
|
7
|
+
export { fallbackExtractFindings } from './fallback-extraction.js';
|
|
6
8
|
function extractMetrics(r) {
|
|
7
9
|
return {
|
|
8
10
|
inputTokens: r.usage?.inputTokens ?? 0,
|
|
@@ -21,98 +23,6 @@ function addMetrics(a, b) {
|
|
|
21
23
|
costUSD: a.costUSD + b.costUSD,
|
|
22
24
|
};
|
|
23
25
|
}
|
|
24
|
-
const annotationItemSchema = z.object({
|
|
25
|
-
id: z.string().min(1),
|
|
26
|
-
reviewerConfidence: z.number().int().min(0).max(100),
|
|
27
|
-
reviewerSeverity: z.enum(['high', 'medium', 'low']).optional(),
|
|
28
|
-
}).strict();
|
|
29
|
-
const annotationsArraySchema = z.array(annotationItemSchema);
|
|
30
|
-
/**
|
|
31
|
-
* Extract the first ```json fenced code block from a string, or `null` if none found.
|
|
32
|
-
*/
|
|
33
|
-
function extractJsonBlock(output) {
|
|
34
|
-
const match = output.match(/```json\s*\n([\s\S]*?)\n```/);
|
|
35
|
-
return match ? match[1] : null;
|
|
36
|
-
}
|
|
37
|
-
/**
|
|
38
|
-
* Parse worker findings from the worker's raw output. Looks for a ```json block whose
|
|
39
|
-
* content is an array passing workerFindingsSchema. Returns null if absent or invalid.
|
|
40
|
-
*/
|
|
41
|
-
export function extractWorkerFindings(workerOutput) {
|
|
42
|
-
// Try the first json block; if absent, also try matching multiple blocks for
|
|
43
|
-
// resilience against workers that emit example JSON before the real findings.
|
|
44
|
-
const blocks = [...workerOutput.matchAll(/```json\s*\n([\s\S]*?)\n```/g)].map(m => m[1]);
|
|
45
|
-
for (const raw of blocks) {
|
|
46
|
-
try {
|
|
47
|
-
const parsed = JSON.parse(raw);
|
|
48
|
-
if (!Array.isArray(parsed))
|
|
49
|
-
continue;
|
|
50
|
-
const validated = workerFindingsSchema.safeParse(parsed);
|
|
51
|
-
if (validated.success)
|
|
52
|
-
return validated.data;
|
|
53
|
-
}
|
|
54
|
-
catch { /* try next block */ }
|
|
55
|
-
}
|
|
56
|
-
return null;
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Parse the reviewer's response, validate against the worker's findings,
|
|
60
|
-
* and merge to produce AnnotatedFinding[].
|
|
61
|
-
*
|
|
62
|
-
* Validation:
|
|
63
|
-
* - Reviewer output must contain exactly one ```json fenced block (we take the first).
|
|
64
|
-
* - Block content must be a JSON array passing annotationsArraySchema.
|
|
65
|
-
* - Annotation ids must be a permutation of worker ids: no missing, no duplicate, no extra.
|
|
66
|
-
*/
|
|
67
|
-
export function parseAndMergeAnnotations(reviewerOutput, workerFindings) {
|
|
68
|
-
const block = extractJsonBlock(reviewerOutput);
|
|
69
|
-
if (block === null) {
|
|
70
|
-
return { ok: false, reason: 'reviewer output missing ```json fenced block' };
|
|
71
|
-
}
|
|
72
|
-
let parsed;
|
|
73
|
-
try {
|
|
74
|
-
parsed = JSON.parse(block);
|
|
75
|
-
}
|
|
76
|
-
catch (err) {
|
|
77
|
-
return { ok: false, reason: `reviewer JSON parse failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
78
|
-
}
|
|
79
|
-
const validated = annotationsArraySchema.safeParse(parsed);
|
|
80
|
-
if (!validated.success) {
|
|
81
|
-
return { ok: false, reason: `annotation array validation failed: ${validated.error.message}` };
|
|
82
|
-
}
|
|
83
|
-
const annotations = validated.data;
|
|
84
|
-
const workerIds = new Set(workerFindings.map(f => f.id));
|
|
85
|
-
const reviewerIds = annotations.map(a => a.id);
|
|
86
|
-
const reviewerIdSet = new Set(reviewerIds);
|
|
87
|
-
if (reviewerIds.length !== reviewerIdSet.size) {
|
|
88
|
-
return { ok: false, reason: 'duplicate id in reviewer annotations' };
|
|
89
|
-
}
|
|
90
|
-
if (reviewerIdSet.size !== workerIds.size) {
|
|
91
|
-
return { ok: false, reason: `annotation count ${reviewerIdSet.size} does not match worker findings count ${workerIds.size}` };
|
|
92
|
-
}
|
|
93
|
-
for (const id of reviewerIdSet) {
|
|
94
|
-
if (!workerIds.has(id)) {
|
|
95
|
-
return { ok: false, reason: `reviewer annotated unknown id: ${id}` };
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
for (const id of workerIds) {
|
|
99
|
-
if (!reviewerIdSet.has(id)) {
|
|
100
|
-
return { ok: false, reason: `reviewer missing annotation for worker id: ${id}` };
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
const byId = new Map(annotations.map(a => [a.id, a]));
|
|
104
|
-
const merged = workerFindings.map(wf => {
|
|
105
|
-
const ann = byId.get(wf.id);
|
|
106
|
-
const out = {
|
|
107
|
-
...wf,
|
|
108
|
-
reviewerConfidence: ann.reviewerConfidence,
|
|
109
|
-
};
|
|
110
|
-
if (ann.reviewerSeverity !== undefined)
|
|
111
|
-
out.reviewerSeverity = ann.reviewerSeverity;
|
|
112
|
-
return out;
|
|
113
|
-
});
|
|
114
|
-
return { ok: true, annotated: merged };
|
|
115
|
-
}
|
|
116
26
|
export async function runQualityReview(reviewerProvider, packet, implReport, fileContents, toolCallLog, filesWritten, evidenceBlock, qualityReviewPromptBuilder, workerOutput, taskDeadlineMs, abortSignal, onProgress) {
|
|
117
27
|
// Read-only annotation path: triggered when caller passed a prompt builder
|
|
118
28
|
// (these are the per-route quality_only prompts in quality-only-prompts.ts).
|
|
@@ -169,60 +79,63 @@ export async function runQualityReview(reviewerProvider, packet, implReport, fil
|
|
|
169
79
|
return { status: 'approved', report, findings: [], metrics };
|
|
170
80
|
}
|
|
171
81
|
async function runAnnotationReview(reviewerProvider, packet, workerOutput, qualityReviewPromptBuilder, taskDeadlineMs, abortSignal, onProgress) {
|
|
172
|
-
// Step 1: extract worker findings from worker output.
|
|
173
|
-
const workerFindings = extractWorkerFindings(workerOutput);
|
|
174
|
-
if (workerFindings === null) {
|
|
175
|
-
return {
|
|
176
|
-
status: 'error',
|
|
177
|
-
findings: [],
|
|
178
|
-
errorReason: 'worker output missing or invalid findings[] JSON block',
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
// Step 2: short-circuit when worker found nothing — nothing to annotate.
|
|
182
|
-
if (workerFindings.length === 0) {
|
|
183
|
-
return {
|
|
184
|
-
status: 'annotated',
|
|
185
|
-
annotatedFindings: [],
|
|
186
|
-
findings: [],
|
|
187
|
-
};
|
|
188
|
-
}
|
|
189
|
-
// Step 3: build the route-specific prompt and call the reviewer.
|
|
190
|
-
const prompt = qualityReviewPromptBuilder({ workerOutput, brief: packet.prompt, workerFindings });
|
|
191
82
|
const reviewerSlot = reviewerProvider.name === 'standard' ? 'standard' : 'complex';
|
|
192
|
-
|
|
83
|
+
const basePrompt = qualityReviewPromptBuilder({ workerOutput, brief: packet.prompt });
|
|
193
84
|
let metrics = { inputTokens: 0, outputTokens: 0, turnCount: 0, toolCallCount: 0, costUSD: 0 };
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
85
|
+
// Attempt 1
|
|
86
|
+
const attempt1 = await callReviewer(basePrompt);
|
|
87
|
+
if (attempt1.kind === 'transport') {
|
|
88
|
+
return { status: attempt1.status, findings: [], errorReason: attempt1.errorReason, metrics: attempt1.metrics };
|
|
89
|
+
}
|
|
90
|
+
metrics = addMetrics(metrics, attempt1.metrics);
|
|
91
|
+
const parsed1 = parseReviewerFindings(attempt1.output, workerOutput);
|
|
92
|
+
if (parsed1.ok) {
|
|
93
|
+
return successResult(parsed1.findings, metrics);
|
|
94
|
+
}
|
|
95
|
+
// Attempt 2 — strict reminder
|
|
96
|
+
const reminderPrompt = `${basePrompt}\n\nIMPORTANT: Your previous response was not parseable (${parsed1.reason}). Emit ONLY the findings JSON array now, in a single \`\`\`json fenced code block as the LAST block in your response. No surrounding prose required.`;
|
|
97
|
+
const attempt2 = await callReviewer(reminderPrompt);
|
|
98
|
+
// Round-2 finding #1: transport failure on retry MUST propagate as error, not
|
|
99
|
+
// silently fall back. Fallback is for parse failure of a real response,
|
|
100
|
+
// never for infrastructure failure. Otherwise telemetry hides outages.
|
|
101
|
+
if (attempt2.kind === 'transport') {
|
|
102
|
+
metrics = addMetrics(metrics, attempt2.metrics);
|
|
103
|
+
return { status: attempt2.status, findings: [], errorReason: attempt2.errorReason, metrics };
|
|
104
|
+
}
|
|
105
|
+
metrics = addMetrics(metrics, attempt2.metrics);
|
|
106
|
+
const parsed2 = parseReviewerFindings(attempt2.output, workerOutput);
|
|
107
|
+
if (parsed2.ok) {
|
|
108
|
+
return successResult(parsed2.findings, metrics);
|
|
109
|
+
}
|
|
110
|
+
// Both LLM attempts failed parse — deterministic fallback. Verdict stays
|
|
111
|
+
// 'annotated' so telemetry never sees 'error' from a parseable worker output.
|
|
112
|
+
const fallback = fallbackExtractFindings(workerOutput);
|
|
113
|
+
return successResult(fallback, metrics);
|
|
114
|
+
// ── helpers ────────────────────────────────────────────────────────────
|
|
115
|
+
function successResult(findings, m) {
|
|
199
116
|
return {
|
|
200
|
-
status: '
|
|
117
|
+
status: 'annotated',
|
|
118
|
+
annotatedFindings: findings,
|
|
201
119
|
findings: [],
|
|
202
|
-
|
|
120
|
+
metrics: m,
|
|
203
121
|
};
|
|
204
122
|
}
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
123
|
+
async function callReviewer(prompt) {
|
|
124
|
+
let result;
|
|
125
|
+
try {
|
|
126
|
+
result = await delegateWithEscalation({ prompt, agentType: reviewerSlot, briefQualityPolicy: 'off', timeoutMs: 120_000 }, [reviewerProvider], { explicitlyPinned: true, taskDeadlineMs, abortSignal, onProgress });
|
|
208
127
|
}
|
|
209
|
-
|
|
210
|
-
status: 'error',
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return {
|
|
128
|
+
catch (err) {
|
|
129
|
+
return { kind: 'transport', status: 'error', errorReason: `review agent threw: ${err instanceof Error ? err.message : String(err)}`, metrics: { inputTokens: 0, outputTokens: 0, turnCount: 0, toolCallCount: 0, costUSD: 0 } };
|
|
130
|
+
}
|
|
131
|
+
const m = extractMetrics(result);
|
|
132
|
+
if (result.status !== 'ok') {
|
|
133
|
+
if (result.status === 'api_error' || result.status === 'network_error' || result.status === 'timeout' || result.status === 'api_aborted') {
|
|
134
|
+
return { kind: 'transport', status: result.status, errorReason: `review agent returned status: ${result.status}`, metrics: m };
|
|
135
|
+
}
|
|
136
|
+
return { kind: 'transport', status: 'error', errorReason: `review agent returned status: ${result.status}`, metrics: m };
|
|
137
|
+
}
|
|
138
|
+
return { kind: 'ok', output: result.output, metrics: m };
|
|
220
139
|
}
|
|
221
|
-
return {
|
|
222
|
-
status: 'annotated',
|
|
223
|
-
annotatedFindings: merged.annotated,
|
|
224
|
-
findings: [],
|
|
225
|
-
metrics,
|
|
226
|
-
};
|
|
227
140
|
}
|
|
228
141
|
//# sourceMappingURL=quality-reviewer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"quality-reviewer.js","sourceRoot":"","sources":["../../src/review/quality-reviewer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"quality-reviewer.js","sourceRoot":"","sources":["../../src/review/quality-reviewer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAC;AACxE,OAAO,EAAE,wBAAwB,EAAE,MAAM,sBAAsB,CAAC;AAEhE,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAE1E,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAEnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAyCnE,SAAS,cAAc,CAAC,CAA8H;IACpJ,OAAO;QACL,WAAW,EAAE,CAAC,CAAC,KAAK,EAAE,WAAW,IAAI,CAAC;QACtC,YAAY,EAAE,CAAC,CAAC,KAAK,EAAE,YAAY,IAAI,CAAC;QACxC,SAAS,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC;QACvB,aAAa,EAAE,CAAC,CAAC,SAAS,EAAE,MAAM,IAAI,CAAC;QACvC,OAAO,EAAE,CAAC,CAAC,KAAK,EAAE,OAAO,IAAI,CAAC;KAC/B,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAuB,EAAE,CAAuB;IAClE,OAAO;QACL,WAAW,EAAE,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW;QAC1C,YAAY,EAAE,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY;QAC7C,SAAS,EAAE,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS;QACpC,aAAa,EAAE,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,aAAa;QAChD,OAAO,EAAE,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO;KAC/B,CAAC;AACJ,CAAC;AAKD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,gBAA0B,EAC1B,MAAkE,EAClE,UAAkC,EAClC,YAAoC,EACpC,WAAqB,EACrB,YAAsB,EACtB,aAAsB,EACtB,0BAAqF,EACrF,YAAqB,EACrB,cAAuB,EACvB,WAAyB,EACzB,UAA2E;IAE3E,2EAA2E;IAC3E,6EAA6E;IAC7E,IAAI,0BAA0B,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;QAC7D,OAAO,mBAAmB,CAAC,gBAAgB,EAAE,MAAM,EAAE,YAAY,EAAE,0BAA0B,EAAE,cAAc,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;IAC1I,CAAC;IAED,6DAA6D;IAC7D,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,iCAAiC,EAAE,CAAC;IAC7F,CAAC;IAED,MAAM,UAAU,GAAG,wBAAwB,CAAC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;IAC3F,MAAM,MAAM,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,aAAa,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC;IAC1E,MAAM,YAAY,GAChB,gBAAgB,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC;IAChE,IAAI,OAAO,GAAyB,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IACpH,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,sBAAsB,CACnC,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,kBAAkB,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,EAClF,CAAC,gBAAgB,CAAC,EAClB,EAAE,gBAAgB,EAAE,IAAI,EAAE,cAAc,EAAE,WAAW,EAAE,UAAU,EAAE,CACpE,CAAC;QACF,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,uBAAuB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;IACnI,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,MAAM,KAAK,eAAe,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,MAAM,KAAK,aAAa,EAAE,CAAC;YACzI,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,iCAAiC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QAChH,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,iCAAiC,MAAM,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC;IACnH,CAAC;IAED,IAAI,MAAM,GAAG,qBAAqB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,sBAAsB,CAC9C;gBACE,MAAM,EAAE,MAAM,GAAG,mJAAmJ;gBACpK,SAAS,EAAE,YAAY,EAAE,kBAAkB,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO;aACvE,EACD,CAAC,gBAAgB,CAAC,EAClB,EAAE,gBAAgB,EAAE,IAAI,EAAE,cAAc,EAAE,WAAW,EAAE,UAAU,EAAE,CACpE,CAAC;YACF,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,cAAc,CAAC,WAAW,CAAC,CAAC,CAAC;YAC3D,IAAI,WAAW,CAAC,MAAM,KAAK,IAAI;gBAAE,MAAM,GAAG,qBAAqB,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QACtF,CAAC;QAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC;QAE9B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,0DAA0D,EAAE,OAAO,EAAE,CAAC;QAC7H,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAClD,IAAI,YAAY,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;QAC9C,OAAO;YACL,MAAM,EAAE,kBAAkB;YAC1B,MAAM;YACN,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,mBAAmB,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC;YAC/E,OAAO;SACR,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC;AAC/D,CAAC;AAED,KAAK,UAAU,mBAAmB,CAChC,gBAA0B,EAC1B,MAAkE,EAClE,YAAoB,EACpB,0BAAoF,EACpF,cAAuB,EACvB,WAAyB,EACzB,UAA2E;IAE3E,MAAM,YAAY,GAChB,gBAAgB,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhE,MAAM,UAAU,GAAG,0BAA0B,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACtF,IAAI,OAAO,GAAyB,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IAEpH,YAAY;IACZ,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,UAAU,CAAC,CAAC;IAChD,IAAI,QAAQ,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,QAAQ,CAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC;IACjH,CAAC;IACD,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,qBAAqB,CAAC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACrE,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QACf,OAAO,aAAa,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClD,CAAC;IAED,8BAA8B;IAC9B,MAAM,cAAc,GAAG,GAAG,UAAU,4DAA4D,OAAO,CAAC,MAAM,uJAAuJ,CAAC;IACtQ,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,cAAc,CAAC,CAAC;IACpD,8EAA8E;IAC9E,wEAAwE;IACxE,uEAAuE;IACvE,IAAI,QAAQ,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;QAChD,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,WAAW,EAAE,QAAQ,CAAC,WAAW,EAAE,OAAO,EAAE,CAAC;IAC/F,CAAC;IACD,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,qBAAqB,CAAC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACrE,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QACf,OAAO,aAAa,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClD,CAAC;IAED,yEAAyE;IACzE,8EAA8E;IAC9E,MAAM,QAAQ,GAAG,uBAAuB,CAAC,YAAY,CAAC,CAAC;IACvD,OAAO,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAExC,0EAA0E;IAE1E,SAAS,aAAa,CAAC,QAA4B,EAAE,CAAuB;QAC1E,OAAO;YACL,MAAM,EAAE,WAAW;YACnB,iBAAiB,EAAE,QAAQ;YAC3B,QAAQ,EAAE,EAAE;YACZ,OAAO,EAAE,CAAC;SACX,CAAC;IACJ,CAAC;IAKD,KAAK,UAAU,YAAY,CAAC,MAAc;QACxC,IAAI,MAAM,CAAC;QACX,IAAI,CAAC;YACH,MAAM,GAAG,MAAM,sBAAsB,CACnC,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,kBAAkB,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,EAClF,CAAC,gBAAgB,CAAC,EAClB,EAAE,gBAAgB,EAAE,IAAI,EAAE,cAAc,EAAE,WAAW,EAAE,UAAU,EAAE,CACpE,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,uBAAuB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;QAClO,CAAC;QACD,MAAM,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;YAC3B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,MAAM,KAAK,eAAe,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,MAAM,KAAK,aAAa,EAAE,CAAC;gBACzI,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,iCAAiC,MAAM,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;YACjI,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,iCAAiC,MAAM,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QAC3H,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IAC3D,CAAC;AACH,CAAC"}
|
|
@@ -50,7 +50,6 @@ export interface RunTasksOptions {
|
|
|
50
50
|
qualityReviewPromptBuilder?: (ctx: {
|
|
51
51
|
workerOutput: string;
|
|
52
52
|
brief: string;
|
|
53
|
-
workerFindings: import('../executors/_shared/findings-schema.js').WorkerFinding[];
|
|
54
53
|
}) => string;
|
|
55
54
|
}
|
|
56
55
|
export declare function runTasks(tasks: TaskSpec[], config: MultiModelConfig, options?: RunTasksOptions): Promise<RunResult[]>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/run-tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EACT,QAAQ,EACR,gBAAgB,EAGjB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mCAAmC,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AAUxD,MAAM,MAAM,wBAAwB,GAAG,CACrC,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,aAAa,KACjB,IAAI,CAAC;AAEV,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,wBAAwB,CAAC;IACtC,OAAO,CAAC,EAAE,eAAe,CAAC;IAC1B,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,oEAAoE;IACpE,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,iBAAiB,KAAK,IAAI,CAAC;IACpD;;;;;OAKG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,+EAA+E;IAC/E,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,2EAA2E;IAC3E,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,CAAC,GAAG,EAAE;YACzB,KAAK,EAAE,MAAM,CAAC;YACd,QAAQ,EAAE,QAAQ,CAAC;YACnB,SAAS,EAAE,SAAS,CAAC;YACrB,MAAM,EAAE,MAAM,CAAC;YACf,eAAe,EAAE,MAAM,CAAC;YACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;SAC5B,KAAK,IAAI,CAAC;KACZ,CAAC;IACF,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sEAAsE;IACtE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,sEAAsE;IACtE,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oDAAoD;IACpD,GAAG,CAAC,EAAE,QAAQ,CAAC;IACf,+EAA+E;IAC/E,0BAA0B,CAAC,EAAE,CAAC,GAAG,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/run-tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EACT,QAAQ,EACR,gBAAgB,EAGjB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mCAAmC,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AAUxD,MAAM,MAAM,wBAAwB,GAAG,CACrC,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,aAAa,KACjB,IAAI,CAAC;AAEV,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,wBAAwB,CAAC;IACtC,OAAO,CAAC,EAAE,eAAe,CAAC;IAC1B,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,oEAAoE;IACpE,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,iBAAiB,KAAK,IAAI,CAAC;IACpD;;;;;OAKG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,+EAA+E;IAC/E,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,2EAA2E;IAC3E,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,CAAC,GAAG,EAAE;YACzB,KAAK,EAAE,MAAM,CAAC;YACd,QAAQ,EAAE,QAAQ,CAAC;YACnB,SAAS,EAAE,SAAS,CAAC;YACrB,MAAM,EAAE,MAAM,CAAC;YACf,eAAe,EAAE,MAAM,CAAC;YACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;SAC5B,KAAK,IAAI,CAAC;KACZ,CAAC;IACF,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sEAAsE;IACtE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,sEAAsE;IACtE,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oDAAoD;IACpD,GAAG,CAAC,EAAE,QAAQ,CAAC;IACf,+EAA+E;IAC/E,0BAA0B,CAAC,EAAE,CAAC,GAAG,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,MAAM,CAAC;CACvF;AAED,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,QAAQ,EAAE,EACjB,MAAM,EAAE,gBAAgB,EACxB,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,SAAS,EAAE,CAAC,CAsHtB;AAED,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -36,6 +36,7 @@ export declare function endReviewStage(stats: StageStatsMap, name: 'spec_review'
|
|
|
36
36
|
filesReadCount?: number;
|
|
37
37
|
filesWrittenCount?: number;
|
|
38
38
|
costUSD?: number;
|
|
39
|
+
durationMs?: number;
|
|
39
40
|
}): void;
|
|
40
41
|
export interface ReworkAccumulator {
|
|
41
42
|
occurred: boolean;
|
|
@@ -106,6 +107,5 @@ export declare function executeReviewedLifecycle(task: TaskSpec, resolved: {
|
|
|
106
107
|
}, _route?: string, _client?: string, _triggeringSkill?: string, bus?: import('../observability/bus.js').EventBus, qualityReviewPromptBuilder?: (ctx: {
|
|
107
108
|
workerOutput: string;
|
|
108
109
|
brief: string;
|
|
109
|
-
workerFindings: import('../executors/_shared/findings-schema.js').WorkerFinding[];
|
|
110
110
|
}) => string): Promise<RunResult>;
|
|
111
111
|
//# sourceMappingURL=reviewed-lifecycle.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reviewed-lifecycle.d.ts","sourceRoot":"","sources":["../../src/run-tasks/reviewed-lifecycle.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,QAAQ,EACR,SAAS,EACT,QAAQ,EACR,gBAAgB,EAChB,SAAS,EAGT,aAAa,EACb,aAAa,EACb,aAAa,EACb,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAoCrB,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAqB3D,wBAAgB,UAAU,IAAI,aAAa,CAW1C;AAMD,wBAAgB,YAAY,CAC1B,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,cAAc,GAAG,aAAa,GAAG,gBAAgB,GAAG,YAAY,EACtE,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GAAG,IAAI,EACjB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EACtD,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,EAC/E,OAAO,CAAC,EAAE;IAAE,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5N,IAAI,CAyBN;AAED,wBAAgB,cAAc,CAC5B,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,aAAa,GAAG,gBAAgB,GAAG,aAAa,EACtD,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GAAG,IAAI,EACjB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EACtD,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,EAC/E,OAAO,EAAE,aAAa,EACtB,UAAU,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"reviewed-lifecycle.d.ts","sourceRoot":"","sources":["../../src/run-tasks/reviewed-lifecycle.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,QAAQ,EACR,SAAS,EACT,QAAQ,EACR,gBAAgB,EAChB,SAAS,EAGT,aAAa,EACb,aAAa,EACb,aAAa,EACb,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAoCrB,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAqB3D,wBAAgB,UAAU,IAAI,aAAa,CAW1C;AAMD,wBAAgB,YAAY,CAC1B,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,cAAc,GAAG,aAAa,GAAG,gBAAgB,GAAG,YAAY,EACtE,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GAAG,IAAI,EACjB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EACtD,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,EAC/E,OAAO,CAAC,EAAE;IAAE,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5N,IAAI,CAyBN;AAED,wBAAgB,cAAc,CAC5B,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,aAAa,GAAG,gBAAgB,GAAG,aAAa,EACtD,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GAAG,IAAI,EACjB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EACtD,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,EAC/E,OAAO,EAAE,aAAa,EACtB,UAAU,EAAE,MAAM,EAMlB,OAAO,CAAC,EAAE;IAAE,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GACjP,IAAI,CAwBN;AAOD,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,wBAAgB,cAAc,IAAI,iBAAiB,CAQlD;AAED,wBAAgB,yBAAyB,CACvC,GAAG,EAAE,iBAAiB,EACtB,MAAM,EAAE;IAAE,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,EAAE,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,EAAE,CAAC;IAAC,YAAY,CAAC,EAAE,OAAO,EAAE,CAAA;CAAE,EACxQ,cAAc,EAAE,MAAM,EACtB,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,GAC9E,IAAI,CAiBN;AAED,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,aAAa,GAAG,gBAAgB,EACtC,GAAG,EAAE,iBAAiB,EACtB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrD,IAAI,CAsBN;AAED,wBAAgB,cAAc,CAC5B,KAAK,EAAE,aAAa,EACpB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GAAG,IAAI,EACjB,KAAK,EAAE;IAAE,IAAI,EAAE,UAAU,GAAG,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EACtD,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,cAAc,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,EAC/E,OAAO,EAAE,aAAa,EACtB,UAAU,EAAE,gBAAgB,GAAG,IAAI,GAClC,IAAI,CAuBN;AAED,wBAAsB,wBAAwB,CAC5C,IAAI,EAAE,QAAQ,EACd,QAAQ,EAAE;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,QAAQ,EAAE,QAAQ,CAAC;IAAC,kBAAkB,EAAE,OAAO,CAAA;CAAE,EAC9E,MAAM,EAAE,gBAAgB,EACxB,SAAS,EAAE,MAAM,EACjB,UAAU,CAAC,EAAE,wBAAwB,EACrC,eAAe,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,OAAO,iBAAiB,EAAE,iBAAiB,KAAK,IAAI,CAAA;CAAE,EACrH,WAAW,CAAC,EAAE;IACZ,MAAM,CAAC,EAAE,OAAO,mCAAmC,EAAE,aAAa,CAAC;IACnE,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;CACxC,EACD,QAAQ,CAAC,EAAE;IACT,mBAAmB,EAAE,CAAC,GAAG,EAAE;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,QAAQ,CAAC;QACnB,SAAS,EAAE,SAAS,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC;QACf,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;KAC5B,KAAK,IAAI,CAAC;CACZ,EACD,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,EAChB,gBAAgB,CAAC,EAAE,MAAM,EACzB,GAAG,CAAC,EAAE,OAAO,yBAAyB,EAAE,QAAQ,EAChD,0BAA0B,CAAC,EAAE,CAAC,GAAG,EAAE;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,KAAK,MAAM,GACpF,OAAO,CAAC,SAAS,CAAC,CA+6CpB"}
|
|
@@ -71,11 +71,17 @@ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD, idle, met
|
|
|
71
71
|
filesWrittenCount: metrics?.filesWrittenCount ?? null,
|
|
72
72
|
};
|
|
73
73
|
}
|
|
74
|
-
export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, idle, verdict, roundsUsed,
|
|
74
|
+
export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, idle, verdict, roundsUsed,
|
|
75
|
+
// metrics.durationMs OVERRIDES the t0-based fallback. Use this when the
|
|
76
|
+
// stage runs in multiple discrete invocations (initial + rework re-reviews
|
|
77
|
+
// for spec_review and quality_review) — the caller accumulates per-call
|
|
78
|
+
// wall time and passes the sum, instead of `Date.now() - t0` which would
|
|
79
|
+
// span the entire review block including subsequent stages.
|
|
80
|
+
metrics) {
|
|
75
81
|
stats[name] = {
|
|
76
82
|
stage: name,
|
|
77
83
|
entered: true,
|
|
78
|
-
durationMs: Date.now() - t0,
|
|
84
|
+
durationMs: metrics?.durationMs !== undefined ? metrics.durationMs : Date.now() - t0,
|
|
79
85
|
costUSD: metrics?.costUSD !== undefined ? metrics.costUSD
|
|
80
86
|
: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
|
|
81
87
|
agentTier: agent.tier,
|
|
@@ -467,6 +473,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
467
473
|
family: modelFamily(resolvedModel),
|
|
468
474
|
model: resolvedModel,
|
|
469
475
|
};
|
|
476
|
+
// Build agent info for a specific reviewer tier. Used so review-stage
|
|
477
|
+
// entries record the ACTUAL reviewer's model, not the implementer's
|
|
478
|
+
// — V3 R3 (review.model != implementerModel) requires this to be
|
|
479
|
+
// the cross-model invariant we claim. Pre-3.10.4 every endReviewStage
|
|
480
|
+
// call hardcoded implementerAgentInfo, so R3 always fired by
|
|
481
|
+
// construction regardless of config.
|
|
482
|
+
const reviewerAgentInfoFor = (tier) => {
|
|
483
|
+
const provider = providerFor(tier);
|
|
484
|
+
const model = provider?.config.model ?? config.agents[tier]?.model ?? resolvedModel;
|
|
485
|
+
return { tier, family: modelFamily(model), model };
|
|
486
|
+
};
|
|
470
487
|
const runningCostUSD = () => taskCostUSD();
|
|
471
488
|
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
472
489
|
const emitFallback = (p) => {
|
|
@@ -1050,7 +1067,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1050
1067
|
: 'skipped',
|
|
1051
1068
|
round: 1,
|
|
1052
1069
|
});
|
|
1053
|
-
endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit,
|
|
1070
|
+
endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, reviewerAgentInfoFor((diffCall.usedTier ?? diffReviewerTier)), runningCostUSD(), snapshotIdle(stageIdle),
|
|
1054
1071
|
// Diff review uses 'approve' | 'concerns' | 'reject' | 'transport_failure' (DiffReviewVerdict),
|
|
1055
1072
|
// distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
|
|
1056
1073
|
'kind' in verdict
|
|
@@ -1080,11 +1097,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1080
1097
|
let specReviewReason;
|
|
1081
1098
|
let specReviewT0 = 0;
|
|
1082
1099
|
let specReviewC0 = null;
|
|
1100
|
+
// Delta-only timing: accumulate per-call wall durations across the
|
|
1101
|
+
// initial spec_review + every spec_rework round's re-review. This
|
|
1102
|
+
// replaces the `Date.now() - specReviewT0` fallback at endReviewStage,
|
|
1103
|
+
// which over-counts because endReviewStage runs AFTER spec_rework,
|
|
1104
|
+
// quality_review, AND quality_rework all complete. No absolute
|
|
1105
|
+
// timestamps go on the wire — Date.now() is used only as a local
|
|
1106
|
+
// delta source. Privacy.md guarantees ms-deltas only.
|
|
1107
|
+
let specReviewDurationMs = 0;
|
|
1083
1108
|
if (reviewPolicy !== 'quality_only') {
|
|
1084
1109
|
transitionStage('verifying', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows }, null);
|
|
1085
1110
|
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
1086
1111
|
specReviewT0 = Date.now();
|
|
1087
1112
|
specReviewC0 = runningCostUSD();
|
|
1113
|
+
const initialSpecReviewIterStart = Date.now();
|
|
1088
1114
|
const initialSpecReview = await runWithFallback({
|
|
1089
1115
|
assigned: initialReviewerTier,
|
|
1090
1116
|
providerFor,
|
|
@@ -1094,6 +1120,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1094
1120
|
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
1095
1121
|
call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress),
|
|
1096
1122
|
});
|
|
1123
|
+
specReviewDurationMs += Date.now() - initialSpecReviewIterStart;
|
|
1097
1124
|
if (initialSpecReview.bothUnavailable) {
|
|
1098
1125
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
1099
1126
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1152,7 +1179,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1152
1179
|
accumulateReworkIteration(specReworkAcc, finalImplResult, Date.now() - specReworkIterStart, snapshotIdle(stageIdle));
|
|
1153
1180
|
commitReworkStage(stats, 'spec_rework', specReworkAcc, implementerAgentInfo);
|
|
1154
1181
|
transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
|
|
1182
|
+
const reReviewIterStart = Date.now();
|
|
1155
1183
|
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1184
|
+
specReviewDurationMs += Date.now() - reReviewIterStart;
|
|
1156
1185
|
if (reviewCall.bothUnavailable) {
|
|
1157
1186
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1158
1187
|
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1194,13 +1223,19 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1194
1223
|
// `entered: false` default — endReviewStage is never called.
|
|
1195
1224
|
let qualityReviewT0 = 0;
|
|
1196
1225
|
let qualityReviewC0 = null;
|
|
1226
|
+
// Same delta-only timing pattern as spec_review — accumulate per-call
|
|
1227
|
+
// wall durations across initial + each rework round's re-review. No
|
|
1228
|
+
// raw timestamps cross the wire.
|
|
1229
|
+
let qualityReviewDurationMs = 0;
|
|
1197
1230
|
if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
|
|
1198
1231
|
qualityUnavailable = new Map();
|
|
1199
1232
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
1200
1233
|
transitionStage(currentStage, 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows }, null);
|
|
1201
1234
|
qualityReviewT0 = Date.now();
|
|
1202
1235
|
qualityReviewC0 = runningCostUSD();
|
|
1236
|
+
const initialQualityIterStart = Date.now();
|
|
1203
1237
|
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1238
|
+
qualityReviewDurationMs += Date.now() - initialQualityIterStart;
|
|
1204
1239
|
if (initialQuality.bothUnavailable) {
|
|
1205
1240
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
1206
1241
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1221,10 +1256,31 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1221
1256
|
// Annotation model: emit one quality event per pass with severity-correction
|
|
1222
1257
|
// and mean-confidence summary fields. Then we are done — no rework loop.
|
|
1223
1258
|
const annotated = qualityResult.annotatedFindings ?? [];
|
|
1224
|
-
|
|
1225
|
-
const
|
|
1226
|
-
|
|
1259
|
+
// meanConfidence skips null entries (fallback path); null when ALL are null.
|
|
1260
|
+
const numericConfidences = annotated
|
|
1261
|
+
.map(f => f.reviewerConfidence)
|
|
1262
|
+
.filter((c) => c !== null);
|
|
1263
|
+
const meanConfidence = numericConfidences.length > 0
|
|
1264
|
+
? Math.round((numericConfidences.reduce((s, c) => s + c, 0) / numericConfidences.length) * 100) / 100
|
|
1227
1265
|
: null;
|
|
1266
|
+
// STEP A: Funnel annotated findings into concerns[] so V3
|
|
1267
|
+
// findingsBySeverity (built later in event-builder.ts:buildReviewStage)
|
|
1268
|
+
// rolls them up. MUST happen before any path that records the task,
|
|
1269
|
+
// and before emitTaskEvent below since downstream consumers may
|
|
1270
|
+
// observe finalImplResult during emit.
|
|
1271
|
+
if (annotated.length > 0) {
|
|
1272
|
+
const findingsAsConcerns = annotated.map((f) => ({
|
|
1273
|
+
source: 'quality_review',
|
|
1274
|
+
severity: f.severity,
|
|
1275
|
+
message: `[${f.id}] ${f.claim}`,
|
|
1276
|
+
}));
|
|
1277
|
+
finalImplResult = {
|
|
1278
|
+
...finalImplResult,
|
|
1279
|
+
concerns: [...(finalImplResult.concerns ?? []), ...findingsAsConcerns],
|
|
1280
|
+
annotatedFindings: annotated,
|
|
1281
|
+
};
|
|
1282
|
+
}
|
|
1283
|
+
// STEP B: Emit per-pass annotation event (no rework loop in quality_only).
|
|
1228
1284
|
emitTaskEvent('read_only_review.quality', {
|
|
1229
1285
|
route: routeKey,
|
|
1230
1286
|
verdict: qualityResult.status === 'annotated' ? 'annotated'
|
|
@@ -1232,8 +1288,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1232
1288
|
: 'error',
|
|
1233
1289
|
iterationIndex: 1,
|
|
1234
1290
|
findingsReviewed: annotated.length,
|
|
1235
|
-
findingsFlagged:
|
|
1236
|
-
severityCorrections,
|
|
1291
|
+
findingsFlagged: 0, // legacy field — severity correction tracked elsewhere now
|
|
1292
|
+
severityCorrections: 0, // reviewerSeverity field removed in 3.10.5
|
|
1237
1293
|
meanConfidence,
|
|
1238
1294
|
durationMs: Date.now() - qualityReviewT0,
|
|
1239
1295
|
costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
|
|
@@ -1278,7 +1334,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1278
1334
|
accumulateReworkIteration(qualityReworkAcc, finalImplResult, Date.now() - qualityReworkIterStart, snapshotIdle(stageIdle));
|
|
1279
1335
|
commitReworkStage(stats, 'quality_rework', qualityReworkAcc, implementerAgentInfo);
|
|
1280
1336
|
transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
|
|
1337
|
+
const qReReviewIterStart = Date.now();
|
|
1281
1338
|
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
|
|
1339
|
+
qualityReviewDurationMs += Date.now() - qReReviewIterStart;
|
|
1282
1340
|
if (reviewCall.bothUnavailable) {
|
|
1283
1341
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1284
1342
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1327,19 +1385,38 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1327
1385
|
const specAggregateStatus = reviewPolicy === 'quality_only'
|
|
1328
1386
|
? 'skipped'
|
|
1329
1387
|
: (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
1388
|
+
// R3 invariant: review-stage entries must record the actual REVIEWER's
|
|
1389
|
+
// model, not the implementer's. The last-used reviewer tier is the one
|
|
1390
|
+
// that produced the final verdict (after any escalation during rework).
|
|
1391
|
+
// Fall back to the implementer's tier only when no reviewer ever ran
|
|
1392
|
+
// (skipped path), which is fine because the schema R3 then doesn't apply.
|
|
1393
|
+
const lastSpecReviewerEntry = specReviewerHistory[specReviewerHistory.length - 1];
|
|
1394
|
+
const lastQualityReviewerEntry = qualityReviewerHistory[qualityReviewerHistory.length - 1];
|
|
1395
|
+
const specReviewAgent = lastSpecReviewerEntry === undefined || lastSpecReviewerEntry === 'skipped'
|
|
1396
|
+
? implementerAgentInfo
|
|
1397
|
+
: reviewerAgentInfoFor(lastSpecReviewerEntry);
|
|
1398
|
+
const qualityReviewAgent = lastQualityReviewerEntry === undefined || lastQualityReviewerEntry === 'skipped'
|
|
1399
|
+
? implementerAgentInfo
|
|
1400
|
+
: reviewerAgentInfoFor(lastQualityReviewerEntry);
|
|
1401
|
+
// Merge accumulated review-stage wall durations into the metrics
|
|
1402
|
+
// override. endReviewStage uses the override when present and falls
|
|
1403
|
+
// back to `Date.now() - t0` otherwise (which over-counts review-block
|
|
1404
|
+
// span across rework + later stages).
|
|
1405
|
+
const specMetrics = { ...(specResult.metrics ?? {}), durationMs: specReviewDurationMs };
|
|
1406
|
+
const qualityMetrics = { ...(qualityResult.metrics ?? {}), durationMs: qualityReviewDurationMs };
|
|
1330
1407
|
if (reviewPolicy !== 'quality_only') {
|
|
1331
|
-
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0,
|
|
1408
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, specReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
|
|
1332
1409
|
: specStatus === 'changes_required' ? 'changes_required'
|
|
1333
1410
|
: specStatus === 'skipped' ? 'skipped'
|
|
1334
1411
|
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1335
|
-
: 'error', specAttemptIndex,
|
|
1412
|
+
: 'error', specAttemptIndex, specMetrics);
|
|
1336
1413
|
}
|
|
1337
1414
|
const qualityAggregateStatus = qualityResult.status;
|
|
1338
|
-
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0,
|
|
1415
|
+
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, qualityReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
|
|
1339
1416
|
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1340
1417
|
: qualityResult.status === 'annotated' ? 'annotated'
|
|
1341
1418
|
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1342
|
-
: 'error', qualityAttemptIndex,
|
|
1419
|
+
: 'error', qualityAttemptIndex, qualityMetrics);
|
|
1343
1420
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
1344
1421
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
1345
1422
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|