@zhixuan92/multi-model-agent-core 3.10.4 → 3.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/executors/_shared/findings-schema.d.ts +55 -32
- package/dist/executors/_shared/findings-schema.d.ts.map +1 -1
- package/dist/executors/_shared/findings-schema.js +60 -22
- package/dist/executors/_shared/findings-schema.js.map +1 -1
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +1 -1
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +1 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +1 -1
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/verify.d.ts.map +1 -1
- package/dist/executors/verify.js +1 -1
- package/dist/executors/verify.js.map +1 -1
- package/dist/intake/compilers/audit.d.ts.map +1 -1
- package/dist/intake/compilers/audit.js +2 -2
- package/dist/intake/compilers/audit.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts.map +1 -1
- package/dist/intake/compilers/investigate.js +1 -2
- package/dist/intake/compilers/investigate.js.map +1 -1
- package/dist/intake/resolve.d.ts +10 -0
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +10 -19
- package/dist/intake/resolve.js.map +1 -1
- package/dist/review/fallback-extraction.d.ts +17 -0
- package/dist/review/fallback-extraction.d.ts.map +1 -0
- package/dist/review/fallback-extraction.js +140 -0
- package/dist/review/fallback-extraction.js.map +1 -0
- package/dist/review/parse-reviewer-findings.d.ts +26 -0
- package/dist/review/parse-reviewer-findings.d.ts.map +1 -0
- package/dist/review/parse-reviewer-findings.js +73 -0
- package/dist/review/parse-reviewer-findings.js.map +1 -0
- package/dist/review/quality-only-prompts.d.ts +18 -11
- package/dist/review/quality-only-prompts.d.ts.map +1 -1
- package/dist/review/quality-only-prompts.js +79 -128
- package/dist/review/quality-only-prompts.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +3 -26
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +54 -141
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/run-tasks/index.d.ts +0 -1
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +62 -9
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +1 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +2 -2
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/types.d.ts +7 -0
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +6 -5
- package/dist/telemetry/types.js.map +1 -1
- package/dist/types.d.ts +2 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/intake/resolve.js
CHANGED
|
@@ -1,25 +1,16 @@
|
|
|
1
1
|
import { DEFAULT_TASK_TIMEOUT_MS } from '../config/schema.js';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
'- `claim` (string, what is wrong / what is true)',
|
|
13
|
-
'- `evidence` (string, REQUIRED, at least 20 characters): embed `file:line` as prose plus a one-sentence explanation of what the cited code or text actually shows. For project-level findings, describe what was searched/checked instead.',
|
|
14
|
-
'- `suggestion?` (string, optional): a fix, follow-up step, or recommendation',
|
|
15
|
-
].join('\n');
|
|
2
|
+
/**
|
|
3
|
+
* Worker output contract per route.
|
|
4
|
+
*
|
|
5
|
+
* The 5 read-only routes (audit / review / verify / debug / investigate) no
|
|
6
|
+
* longer carry a structured-output contract — the quality reviewer extracts
|
|
7
|
+
* findings from the worker's free-form narrative in one pass. See
|
|
8
|
+
* packages/core/src/review/quality-only-prompts.ts.
|
|
9
|
+
*
|
|
10
|
+
* The artifact route `execute_plan` keeps its narrative contract.
|
|
11
|
+
*/
|
|
16
12
|
export const OUTPUT_CONTRACT_CLAUSES = {
|
|
17
|
-
review_code: `${FINDINGS_BASE}\nEach finding should describe a code-level concern (correctness, security, performance, style as applicable to the focus). Embed the file:line in evidence; the reader will jump to the source from your prose.`,
|
|
18
|
-
debug_task: `${FINDINGS_BASE}\nUse hypothesis-driven debugging: each finding should identify a root cause and propose a fix in \`suggestion\`. Evidence should quote the relevant trace, log line, or code path.`,
|
|
19
|
-
verify_work: `${FINDINGS_BASE}\nMap each checklist item from the brief to a finding: pass (low severity, evidence shows the criterion was met) or fail (high/medium severity, evidence shows what is missing). One finding per checklist item.`,
|
|
20
|
-
audit_document: `${FINDINGS_BASE}\nEach finding should describe an issue discovered in the audited document. Severity reflects impact if the issue stands.`,
|
|
21
13
|
execute_plan: 'Implement the task fully. Report: which task heading you matched, what files were created or modified, and any issues encountered. If no unique matching task was found, report that explicitly and do not implement anything.',
|
|
22
|
-
investigate_codebase: `${FINDINGS_BASE}\nFor an investigation, \`suggestion\` is optional and may be a follow-up question or angle to explore rather than a code fix. Evidence may be a file:line citation or a description of what was searched (e.g., "Searched src/middleware/, src/auth/ — no auth middleware found").`,
|
|
23
14
|
};
|
|
24
15
|
export const ROUTE_DEFAULTS = {
|
|
25
16
|
delegate_tasks: {},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../src/intake/resolve.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D
|
|
1
|
+
{"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../src/intake/resolve.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAyC;IAC3E,YAAY,EAAE,gOAAgO;CAC/O,CAAC;AAEF,MAAM,CAAC,MAAM,cAAc,GAA2C;IACpE,cAAc,EAAE,EAAE;IAClB,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,UAAU,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IAClE,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,cAAc,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACtE,YAAY,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE;IAC7D,oBAAoB,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;CAC7E,CAAC;AAEF,MAAM,UAAU,YAAY,CAC1B,KAAgB,EAChB,MAAwB;IAExB,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,aAAa,CAAC,SAAS,IAAI,UAAU,CAAC;IAE3E,MAAM,cAAc,GAAG,uBAAuB,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC;QACrE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,OAAO,cAAc,EAAE;QACxC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;IAEjB,OAAO;QACL,MAAM;QACN,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,SAAS,EAAE,SAAmC;QAC9C,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAC9D,KAAK,EAAE,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QACvC,SAAS,EAAE,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QAChE,UAAU,EAAE,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QAC7C,aAAa,EAAE,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC3D,kBAAkB,EAAE,KAAK;QACzB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,uBAAuB,EAAE,KAAK,CAAC,uBAAuB;KACvD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { type AnnotatedFinding } from '../executors/_shared/findings-schema.js';
|
|
2
|
+
/**
|
|
3
|
+
* Deterministic regex extractor — runs when the LLM reviewer's JSON output
|
|
4
|
+
* fails parse twice. Synthesizes AnnotatedFinding[] so telemetry always has
|
|
5
|
+
* something to count.
|
|
6
|
+
*
|
|
7
|
+
* Confidence is null. Ids are always sequential `F${i+1}` (never use the
|
|
8
|
+
* worker's number — duplicates would violate annotatedFindingsSchema).
|
|
9
|
+
* evidenceGrounded reflects the actual substring check on the normalized
|
|
10
|
+
* worker output.
|
|
11
|
+
*
|
|
12
|
+
* If the worker output has zero parseable numbered sections and no explicit
|
|
13
|
+
* "no findings" language, emits a single catch-all finding so downstream
|
|
14
|
+
* telemetry never sees an empty list.
|
|
15
|
+
*/
|
|
16
|
+
export declare function fallbackExtractFindings(workerOutput: string): AnnotatedFinding[];
|
|
17
|
+
//# sourceMappingURL=fallback-extraction.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fallback-extraction.d.ts","sourceRoot":"","sources":["../../src/review/fallback-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,gBAAgB,EACtB,MAAM,yCAAyC,CAAC;AA+FjD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,gBAAgB,EAAE,CAyChF"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { normalizeWhitespace, } from '../executors/_shared/findings-schema.js';
|
|
2
|
+
/**
|
|
3
|
+
* Map a worker-emitted severity string (case-insensitive, may be "mid")
|
|
4
|
+
* to the canonical 4-tier value. Default 'medium' on unknown.
|
|
5
|
+
*/
|
|
6
|
+
function mapSeverity(raw) {
|
|
7
|
+
const s = raw.trim().toLowerCase();
|
|
8
|
+
if (s === 'critical')
|
|
9
|
+
return 'critical';
|
|
10
|
+
if (s === 'high')
|
|
11
|
+
return 'high';
|
|
12
|
+
if (s === 'medium' || s === 'mid')
|
|
13
|
+
return 'medium';
|
|
14
|
+
if (s === 'low')
|
|
15
|
+
return 'low';
|
|
16
|
+
return 'medium';
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Match numbered/finding-shaped headings — broad enough to catch the most
|
|
20
|
+
* common Markdown patterns workers actually produce (Round-2 #5):
|
|
21
|
+
* "## 1. Title" — h2 with number
|
|
22
|
+
* "### 2. Title" — h3 with number
|
|
23
|
+
* "#### 3: Title" — h4 with number+colon
|
|
24
|
+
* "### [4] Title" — bracketed number
|
|
25
|
+
* "### Finding 5 — Title" — "Finding N" form
|
|
26
|
+
*
|
|
27
|
+
* Plain `### Summary` / `### Performance Notes` are ignored on purpose
|
|
28
|
+
* so the fallback does not invent findings out of structural sections.
|
|
29
|
+
*
|
|
30
|
+
* Capture group 1 is the bracketed number (when [N] form), 2 is the bare
|
|
31
|
+
* number (otherwise). At least one is always set when this regex matches.
|
|
32
|
+
*/
|
|
33
|
+
const SECTION_RE = /^#{2,6}\s+(?:Finding\s+)?(?:\[(\d+)\]|(\d+))\s*[\.\:\)\-\—\–]?\s+(.+)$/gim;
|
|
34
|
+
const SEVERITY_RE = /^Severity:\s*(critical|high|medium|mid|low)\s*$/gim;
|
|
35
|
+
/** Single-pass section iteration (Round-1 P3). */
|
|
36
|
+
function findSections(workerOutput) {
|
|
37
|
+
const sections = [];
|
|
38
|
+
SECTION_RE.lastIndex = 0;
|
|
39
|
+
let prev = null;
|
|
40
|
+
let match;
|
|
41
|
+
while ((match = SECTION_RE.exec(workerOutput)) !== null) {
|
|
42
|
+
const startIdx = match.index;
|
|
43
|
+
if (prev) {
|
|
44
|
+
sections.push({ ...prev, endIdx: startIdx });
|
|
45
|
+
}
|
|
46
|
+
// capture[1] = [N] bracketed number; capture[2] = bare number; capture[3] = title.
|
|
47
|
+
const workerNumber = match[1] ?? match[2] ?? '';
|
|
48
|
+
prev = {
|
|
49
|
+
startIdx,
|
|
50
|
+
workerNumber,
|
|
51
|
+
title: (match[3] ?? '').trim(),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
if (prev)
|
|
55
|
+
sections.push({ ...prev, endIdx: workerOutput.length });
|
|
56
|
+
return sections;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Detect explicit "no findings" language so fallback returns [] instead of
|
|
60
|
+
* a synthetic catch-all when a clean codebase produces a clean narrative
|
|
61
|
+
* but the reviewer happens to fail JSON parse (Round-2 #6).
|
|
62
|
+
*/
|
|
63
|
+
const NO_FINDINGS_RE = /\b(?:no\s+(?:findings|issues|problems)\s+(?:found|detected|reported)?|nothing\s+to\s+report|0\s+findings|zero\s+findings)\b/i;
|
|
64
|
+
function severityFromSection(section) {
|
|
65
|
+
SEVERITY_RE.lastIndex = 0;
|
|
66
|
+
const m = SEVERITY_RE.exec(section);
|
|
67
|
+
if (!m)
|
|
68
|
+
return 'medium';
|
|
69
|
+
return mapSeverity(m[1]);
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Build meaningful synthetic evidence (Round-1 #8):
|
|
73
|
+
* - Prefer the section body (first 240 chars after the heading).
|
|
74
|
+
* - If the body is too short, build a meaningful sentence from the title,
|
|
75
|
+
* not a space-padded string.
|
|
76
|
+
*
|
|
77
|
+
* Worker-stated number is preserved in the evidence prose, not in the id
|
|
78
|
+
* (Round-1 #2 — ids must always be unique sequential).
|
|
79
|
+
*/
|
|
80
|
+
function buildEvidence(sectionText, title, workerNumber) {
|
|
81
|
+
const body = sectionText.split('\n').slice(1).join('\n').trim();
|
|
82
|
+
if (body.length >= 20)
|
|
83
|
+
return body.slice(0, 240);
|
|
84
|
+
const synth = `Worker finding #${workerNumber} (${title}): no detailed body provided in implementer report.`;
|
|
85
|
+
return synth.length >= 20 ? synth : `${synth} fallback-synthesized.`;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Deterministic regex extractor — runs when the LLM reviewer's JSON output
|
|
89
|
+
* fails parse twice. Synthesizes AnnotatedFinding[] so telemetry always has
|
|
90
|
+
* something to count.
|
|
91
|
+
*
|
|
92
|
+
* Confidence is null. Ids are always sequential `F${i+1}` (never use the
|
|
93
|
+
* worker's number — duplicates would violate annotatedFindingsSchema).
|
|
94
|
+
* evidenceGrounded reflects the actual substring check on the normalized
|
|
95
|
+
* worker output.
|
|
96
|
+
*
|
|
97
|
+
* If the worker output has zero parseable numbered sections and no explicit
|
|
98
|
+
* "no findings" language, emits a single catch-all finding so downstream
|
|
99
|
+
* telemetry never sees an empty list.
|
|
100
|
+
*/
|
|
101
|
+
export function fallbackExtractFindings(workerOutput) {
|
|
102
|
+
const normalizedWorker = normalizeWhitespace(workerOutput);
|
|
103
|
+
const sections = findSections(workerOutput);
|
|
104
|
+
// Round-2 #6: respect explicit "no findings" worker output.
|
|
105
|
+
if (sections.length === 0 && NO_FINDINGS_RE.test(workerOutput)) {
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
if (sections.length === 0) {
|
|
109
|
+
const trimmed = workerOutput.trim();
|
|
110
|
+
// Use real worker text when long enough — preserves evidenceGrounded=true.
|
|
111
|
+
// Otherwise fall back to a meaningful synthetic sentence (knowingly ungrounded).
|
|
112
|
+
const evidence = trimmed.length >= 20
|
|
113
|
+
? trimmed.slice(0, 240)
|
|
114
|
+
: `Worker output had no parseable findings (length ${trimmed.length}). Fallback emitted catch-all so telemetry has at least one entry.`;
|
|
115
|
+
const eNorm = normalizeWhitespace(evidence);
|
|
116
|
+
return [{
|
|
117
|
+
id: 'F1',
|
|
118
|
+
severity: 'medium',
|
|
119
|
+
claim: 'reviewer parse failed; deterministic fallback emitted single catch-all from worker output',
|
|
120
|
+
evidence,
|
|
121
|
+
reviewerConfidence: null,
|
|
122
|
+
evidenceGrounded: eNorm.length >= 20 && normalizedWorker.includes(eNorm),
|
|
123
|
+
}];
|
|
124
|
+
}
|
|
125
|
+
return sections.map((section, i) => {
|
|
126
|
+
const sectionText = workerOutput.slice(section.startIdx, section.endIdx);
|
|
127
|
+
const severity = severityFromSection(sectionText);
|
|
128
|
+
const evidence = buildEvidence(sectionText, section.title, section.workerNumber);
|
|
129
|
+
const eNorm = normalizeWhitespace(evidence);
|
|
130
|
+
return {
|
|
131
|
+
id: `F${i + 1}`,
|
|
132
|
+
severity,
|
|
133
|
+
claim: section.title || `Finding ${i + 1}`,
|
|
134
|
+
evidence,
|
|
135
|
+
reviewerConfidence: null,
|
|
136
|
+
evidenceGrounded: eNorm.length >= 20 && normalizedWorker.includes(eNorm),
|
|
137
|
+
};
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
//# sourceMappingURL=fallback-extraction.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fallback-extraction.js","sourceRoot":"","sources":["../../src/review/fallback-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AAIjD;;;GAGG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACnC,IAAI,CAAC,KAAK,UAAU;QAAE,OAAO,UAAU,CAAC;IACxC,IAAI,CAAC,KAAK,MAAM;QAAE,OAAO,MAAM,CAAC;IAChC,IAAI,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,KAAK;QAAE,OAAO,QAAQ,CAAC;IACnD,IAAI,CAAC,KAAK,KAAK;QAAE,OAAO,KAAK,CAAC;IAC9B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,GAAG,2EAA2E,CAAC;AAC/F,MAAM,WAAW,GAAG,oDAAoD,CAAC;AASzE,kDAAkD;AAClD,SAAS,YAAY,CAAC,YAAoB;IACxC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC;IACzB,IAAI,IAAI,GAAqE,IAAI,CAAC;IAClF,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC;QAC7B,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,mFAAmF;QACnF,MAAM,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,GAAG;YACL,QAAQ;YACR,YAAY;YACZ,KAAK,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;SAC/B,CAAC;IACJ,CAAC;IACD,IAAI,IAAI;QAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC;IAClE,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,cAAc,GAAG,8HAA8H,CAAC;AAEtJ,SAAS,mBAAmB,CAAC,OAAe;IAC1C,WAAW,CAAC,SAAS,GAAG,CAAC,CAAC;IAC1B,MAAM,CAAC,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,aAAa,CAAC,WAAmB,EAAE,KAAa,EAAE,YAAoB;IAC7E,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IAChE,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,mBAAmB,YAAY,KAAK,KAAK,qDAAqD,CAAC;IAC7G,OAAO,KAAK,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,wBAAwB,CAAC;AACvE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,uBAAuB,CAAC,YAAoB;IAC1D,MAAM,gBAAgB,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;IAE5C,4DAA4D;IAC5D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;QAC/D,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;QACpC,2EAA2E;QAC3E,iFAAiF;QACjF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,IAAI,EAAE;YACnC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;YACvB,CAAC,CAAC,mDAAmD,OAAO,CAAC,MAAM,oEAAoE,CAAC;QAC1I,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,CAAC;gBACN,EAAE,EAAE,IAAI;gBACR,QAAQ,EAAE,QAAQ;gBAClB,KAAK,EAAE,2FAA2F;gBAClG,QAAQ;gBACR,kBAAkB,EAAE,IAAI;gBACxB,gBAAgB,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC;aACzE,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE;QACjC,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QACzE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,aAAa,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;QACjF,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE;YACf,QAAQ;YACR,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE;YAC1C,QAAQ;YACR,kBAAkB,EAAE,IAAI;YACxB,gBAAgB,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC;SACzE,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { type AnnotatedFinding } from '../executors/_shared/findings-schema.js';
|
|
2
|
+
interface ParseOk {
|
|
3
|
+
ok: true;
|
|
4
|
+
findings: AnnotatedFinding[];
|
|
5
|
+
/** Findings whose evidence was NOT a substring of worker output — kept but flagged. */
|
|
6
|
+
ungroundedCount: number;
|
|
7
|
+
}
|
|
8
|
+
interface ParseErr {
|
|
9
|
+
ok: false;
|
|
10
|
+
reason: string;
|
|
11
|
+
}
|
|
12
|
+
export type ParseReviewerFindingsResult = ParseOk | ParseErr;
|
|
13
|
+
/**
|
|
14
|
+
* Parse the reviewer's structured output and annotate each finding with
|
|
15
|
+
* `evidenceGrounded`.
|
|
16
|
+
*
|
|
17
|
+
* Pipeline:
|
|
18
|
+
* 1. Extract the final `` ```json `` block (permissive fence).
|
|
19
|
+
* 2. JSON.parse + Zod-validate against reviewerEmittedFindingsSchema.
|
|
20
|
+
* 3. Normalize worker output ONCE; for each finding set
|
|
21
|
+
* `evidenceGrounded` against the normalized worker.
|
|
22
|
+
* 4. Return ALL findings — never drop. `ungroundedCount` is informational only.
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseReviewerFindings(reviewerOutput: string, workerOutput: string): ParseReviewerFindingsResult;
|
|
25
|
+
export {};
|
|
26
|
+
//# sourceMappingURL=parse-reviewer-findings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-reviewer-findings.d.ts","sourceRoot":"","sources":["../../src/review/parse-reviewer-findings.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,gBAAgB,EACtB,MAAM,yCAAyC,CAAC;AAEjD,UAAU,OAAO;IACf,EAAE,EAAE,IAAI,CAAC;IACT,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,uFAAuF;IACvF,eAAe,EAAE,MAAM,CAAC;CACzB;AACD,UAAU,QAAQ;IAChB,EAAE,EAAE,KAAK,CAAC;IACV,MAAM,EAAE,MAAM,CAAC;CAChB;AACD,MAAM,MAAM,2BAA2B,GAAG,OAAO,GAAG,QAAQ,CAAC;AAmC7D;;;;;;;;;;GAUG;AACH,wBAAgB,qBAAqB,CACnC,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,GACnB,2BAA2B,CA0B7B"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { reviewerEmittedFindingsSchema, normalizeWhitespace, } from '../executors/_shared/findings-schema.js';
|
|
2
|
+
// Permissive fence regex (Round-3 #2): case-insensitive `json`, allow:
|
|
3
|
+
// - any whitespace (including none) between opening fence and content
|
|
4
|
+
// - any whitespace (including none) between content and closing fence
|
|
5
|
+
// so compact "[]```" or content-on-one-line forms are accepted.
|
|
6
|
+
// The captured group is JSON-trimmed before parse.
|
|
7
|
+
const JSON_BLOCK_RE = /```json[ \t]*\r?\n?([\s\S]*?)\s*```[ \t]*/gi;
|
|
8
|
+
/**
|
|
9
|
+
* Extract the LAST `` ```json `` fenced code block from the reviewer output.
|
|
10
|
+
* Reviewers often emit example/format JSON earlier; the real findings array
|
|
11
|
+
* is conventionally last. Single-pass — does not materialize all matches.
|
|
12
|
+
*/
|
|
13
|
+
function extractFinalJsonBlock(output) {
|
|
14
|
+
let last = null;
|
|
15
|
+
let match;
|
|
16
|
+
// Reset lastIndex on the shared regex to make this re-entrant.
|
|
17
|
+
JSON_BLOCK_RE.lastIndex = 0;
|
|
18
|
+
while ((match = JSON_BLOCK_RE.exec(output)) !== null) {
|
|
19
|
+
last = match;
|
|
20
|
+
}
|
|
21
|
+
return last ? last[1] ?? null : null;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Substring check against a pre-normalized worker output. Avoids re-normalizing
|
|
25
|
+
* the (potentially large) worker output for every finding (Round-1 P2).
|
|
26
|
+
*/
|
|
27
|
+
function evidenceIsGroundedAgainst(evidence, normalizedWorker) {
|
|
28
|
+
const e = normalizeWhitespace(evidence);
|
|
29
|
+
if (e.length < 20)
|
|
30
|
+
return false;
|
|
31
|
+
return normalizedWorker.includes(e);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Parse the reviewer's structured output and annotate each finding with
|
|
35
|
+
* `evidenceGrounded`.
|
|
36
|
+
*
|
|
37
|
+
* Pipeline:
|
|
38
|
+
* 1. Extract the final `` ```json `` block (permissive fence).
|
|
39
|
+
* 2. JSON.parse + Zod-validate against reviewerEmittedFindingsSchema.
|
|
40
|
+
* 3. Normalize worker output ONCE; for each finding set
|
|
41
|
+
* `evidenceGrounded` against the normalized worker.
|
|
42
|
+
* 4. Return ALL findings — never drop. `ungroundedCount` is informational only.
|
|
43
|
+
*/
|
|
44
|
+
export function parseReviewerFindings(reviewerOutput, workerOutput) {
|
|
45
|
+
const block = extractFinalJsonBlock(reviewerOutput);
|
|
46
|
+
if (block === null) {
|
|
47
|
+
return { ok: false, reason: 'reviewer output missing ```json fenced block' };
|
|
48
|
+
}
|
|
49
|
+
let parsed;
|
|
50
|
+
try {
|
|
51
|
+
// Trim — the permissive regex can capture leading/trailing whitespace.
|
|
52
|
+
parsed = JSON.parse(block.trim());
|
|
53
|
+
}
|
|
54
|
+
catch (err) {
|
|
55
|
+
return { ok: false, reason: `reviewer JSON parse failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
56
|
+
}
|
|
57
|
+
const validated = reviewerEmittedFindingsSchema.safeParse(parsed);
|
|
58
|
+
if (!validated.success) {
|
|
59
|
+
return { ok: false, reason: `findings array validation failed: ${validated.error.message}` };
|
|
60
|
+
}
|
|
61
|
+
// Round-2 P2: fuse map + count to avoid the second pass.
|
|
62
|
+
const normalizedWorker = normalizeWhitespace(workerOutput);
|
|
63
|
+
const annotated = [];
|
|
64
|
+
let ungroundedCount = 0;
|
|
65
|
+
for (const f of validated.data) {
|
|
66
|
+
const grounded = evidenceIsGroundedAgainst(f.evidence, normalizedWorker);
|
|
67
|
+
if (!grounded)
|
|
68
|
+
ungroundedCount++;
|
|
69
|
+
annotated.push({ ...f, evidenceGrounded: grounded });
|
|
70
|
+
}
|
|
71
|
+
return { ok: true, findings: annotated, ungroundedCount };
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=parse-reviewer-findings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-reviewer-findings.js","sourceRoot":"","sources":["../../src/review/parse-reviewer-findings.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,6BAA6B,EAC7B,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AAcjD,uEAAuE;AACvE,wEAAwE;AACxE,wEAAwE;AACxE,oEAAoE;AACpE,mDAAmD;AACnD,MAAM,aAAa,GAAG,6CAA6C,CAAC;AAEpE;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,MAAc;IAC3C,IAAI,IAAI,GAA2B,IAAI,CAAC;IACxC,IAAI,KAA6B,CAAC;IAClC,+DAA+D;IAC/D,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;IAC5B,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,IAAI,GAAG,KAAK,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACvC,CAAC;AAED;;;GAGG;AACH,SAAS,yBAAyB,CAAC,QAAgB,EAAE,gBAAwB;IAC3E,MAAM,CAAC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,KAAK,CAAC;IAChC,OAAO,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,qBAAqB,CACnC,cAAsB,EACtB,YAAoB;IAEpB,MAAM,KAAK,GAAG,qBAAqB,CAAC,cAAc,CAAC,CAAC;IACpD,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,8CAA8C,EAAE,CAAC;IAC/E,CAAC;IACD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,uEAAuE;QACvE,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,+BAA+B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,SAAS,GAAG,6BAA6B,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAClE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QACvB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,qCAAqC,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;IAC/F,CAAC;IACD,yDAAyD;IACzD,MAAM,gBAAgB,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAC3D,MAAM,SAAS,GAAuB,EAAE,CAAC;IACzC,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,yBAAyB,CAAC,CAAC,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QACzE,IAAI,CAAC,QAAQ;YAAE,eAAe,EAAE,CAAC;QACjC,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,gBAAgB,EAAE,QAAQ,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AAC5D,CAAC"}
|
|
@@ -1,21 +1,28 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Quality-only review prompts for the 5 read-only mma-* routes (3.
|
|
2
|
+
* Quality-only review prompts for the 5 read-only mma-* routes (3.10.5+).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* The reviewer receives ONLY the implementer's free-form markdown narrative
|
|
5
|
+
* and the original brief. It must:
|
|
6
|
+
* 1. Read the worker's narrative.
|
|
7
|
+
* 2. Identify every distinct issue/finding/checklist-item the worker raised.
|
|
8
|
+
* 3. Assign sequential ids (F1, F2, ...) — even if the worker numbered them,
|
|
9
|
+
* the reviewer re-numbers from 1 to ensure uniqueness.
|
|
10
|
+
* 4. Set `severity` to its OWN final 4-tier judgment {critical, high, medium,
|
|
11
|
+
* low}. The reviewer is authoritative — there is no separate
|
|
12
|
+
* `reviewerSeverity` field. Map worker-stated "mid" -> "medium". When
|
|
13
|
+
* the worker did not state a severity, judge from impact.
|
|
14
|
+
* 5. Score each finding's reviewerConfidence (0-100) — how confident YOU
|
|
15
|
+
* would be defending the finding's correctness if challenged.
|
|
16
|
+
* 6. Quote evidence VERBATIM (≥20 chars) from the worker's output. The
|
|
17
|
+
* downstream parser flags non-substring quotes via
|
|
18
|
+
* `evidenceGrounded:false` but never drops findings.
|
|
19
|
+
* 7. Emit ONE fenced JSON code block as the LAST block in your response.
|
|
9
20
|
*
|
|
10
|
-
*
|
|
11
|
-
* of {id, reviewerConfidence, reviewerSeverity?} objects, one per worker
|
|
12
|
-
* finding (matched by id). NO verdict, NO gate, NO rework signal.
|
|
21
|
+
* If the worker raised zero issues, emit `[]` and stop.
|
|
13
22
|
*/
|
|
14
|
-
import type { WorkerFinding } from '../executors/_shared/findings-schema.js';
|
|
15
23
|
interface PromptContext {
|
|
16
24
|
workerOutput: string;
|
|
17
25
|
brief: string;
|
|
18
|
-
workerFindings: WorkerFinding[];
|
|
19
26
|
}
|
|
20
27
|
export declare function buildAuditQualityPrompt(ctx: PromptContext): string;
|
|
21
28
|
export declare function buildReviewQualityPrompt(ctx: PromptContext): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"quality-only-prompts.d.ts","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"quality-only-prompts.d.ts","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,UAAU,aAAa;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;CACf;AAwED,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMlE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMnE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMnE;AAED,wBAAgB,6BAA6B,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMxE;AAED,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMlE"}
|
|
@@ -1,155 +1,106 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Quality-only review prompts for the 5 read-only mma-* routes (3.
|
|
2
|
+
* Quality-only review prompts for the 5 read-only mma-* routes (3.10.5+).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* The reviewer receives ONLY the implementer's free-form markdown narrative
|
|
5
|
+
* and the original brief. It must:
|
|
6
|
+
* 1. Read the worker's narrative.
|
|
7
|
+
* 2. Identify every distinct issue/finding/checklist-item the worker raised.
|
|
8
|
+
* 3. Assign sequential ids (F1, F2, ...) — even if the worker numbered them,
|
|
9
|
+
* the reviewer re-numbers from 1 to ensure uniqueness.
|
|
10
|
+
* 4. Set `severity` to its OWN final 4-tier judgment {critical, high, medium,
|
|
11
|
+
* low}. The reviewer is authoritative — there is no separate
|
|
12
|
+
* `reviewerSeverity` field. Map worker-stated "mid" -> "medium". When
|
|
13
|
+
* the worker did not state a severity, judge from impact.
|
|
14
|
+
* 5. Score each finding's reviewerConfidence (0-100) — how confident YOU
|
|
15
|
+
* would be defending the finding's correctness if challenged.
|
|
16
|
+
* 6. Quote evidence VERBATIM (≥20 chars) from the worker's output. The
|
|
17
|
+
* downstream parser flags non-substring quotes via
|
|
18
|
+
* `evidenceGrounded:false` but never drops findings.
|
|
19
|
+
* 7. Emit ONE fenced JSON code block as the LAST block in your response.
|
|
9
20
|
*
|
|
10
|
-
*
|
|
11
|
-
* of {id, reviewerConfidence, reviewerSeverity?} objects, one per worker
|
|
12
|
-
* finding (matched by id). NO verdict, NO gate, NO rework signal.
|
|
21
|
+
* If the worker raised zero issues, emit `[]` and stop.
|
|
13
22
|
*/
|
|
14
|
-
const
|
|
15
|
-
## How to score \`reviewerConfidence\` (integer 0-100)
|
|
16
|
-
|
|
17
|
-
You are scoring whether YOU would defend this finding if pushed. Not severity.
|
|
18
|
-
Not the worker's self-confidence.
|
|
19
|
-
|
|
20
|
-
80-100: evidence directly supports the claim, on-brief, defend without hesitation
|
|
21
|
-
60-79: evidence supports claim with minor gaps, on-brief, plausible
|
|
22
|
-
40-59: claim plausible but evidence thin, partial, or requires inference
|
|
23
|
-
20-39: claim weak, evidence does not back it up, OR off-brief
|
|
24
|
-
0-19: unsupported, contradicted, fabricated, OR completely off-brief
|
|
25
|
-
|
|
26
|
-
## How to use \`reviewerSeverity\` (optional)
|
|
27
|
-
|
|
28
|
-
Only set when you DISAGREE with the worker's \`severity\`. Workers tend to
|
|
29
|
-
inflate ("everything is high"); use \`reviewerSeverity\` to dial down. Omit
|
|
30
|
-
when you agree.
|
|
31
|
-
|
|
23
|
+
const RUBRIC_TEMPLATE = String.raw `
|
|
32
24
|
## Output format (REQUIRED)
|
|
33
25
|
|
|
34
|
-
Respond with exactly one fenced JSON code block
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
[
|
|
39
|
-
{
|
|
40
|
-
|
|
41
|
-
|
|
26
|
+
Respond with exactly one fenced JSON code block AS THE LAST BLOCK in your
|
|
27
|
+
response. The block contains a JSON array of finding objects, in the order
|
|
28
|
+
the worker presented them. Example:
|
|
29
|
+
|
|
30
|
+
` + '```json\n' + `[
|
|
31
|
+
{
|
|
32
|
+
"id": "F1",
|
|
33
|
+
"severity": "critical",
|
|
34
|
+
"claim": "Remote code execution via unsanitized input in src/handler.ts:42",
|
|
35
|
+
"evidence": "user input is passed directly into shellExec() without escaping",
|
|
36
|
+
"suggestion": "Use a parameterized API or escape input",
|
|
37
|
+
"reviewerConfidence": 90
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"id": "F2",
|
|
41
|
+
"severity": "medium",
|
|
42
|
+
"claim": "Auth check missing on /admin endpoint",
|
|
43
|
+
"evidence": "router.get('/admin', adminHandler) — no auth middleware applied",
|
|
44
|
+
"reviewerConfidence": 60
|
|
45
|
+
}
|
|
42
46
|
]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
+
` + '```' + `
|
|
48
|
+
|
|
49
|
+
Field rules:
|
|
50
|
+
- ` + '`id`' + `: assign sequentially F1, F2, F3, ... (your choice; must be unique).
|
|
51
|
+
- ` + '`severity`' + `: one of "critical" | "high" | "medium" | "low" — YOUR
|
|
52
|
+
final judgment, not the worker's. The worker's value is a hint; you may
|
|
53
|
+
dial it up or down based on actual impact (workers tend to inflate).
|
|
54
|
+
- critical: must fix before any other work (RCE, auth bypass, data loss)
|
|
55
|
+
- high: serious bug / security issue, blocks release
|
|
56
|
+
- medium: real issue, should fix soon
|
|
57
|
+
- low: minor issue, nice to fix
|
|
58
|
+
Map worker-said "mid" -> "medium". When the worker omitted severity, judge.
|
|
59
|
+
- ` + '`claim`' + `: one-sentence summary.
|
|
60
|
+
- ` + '`evidence`' + `: REQUIRED, ≥20 chars, MUST be a verbatim quote from the
|
|
61
|
+
worker's output. The parser flags non-substring quotes — quote precisely.
|
|
62
|
+
- ` + '`suggestion`' + `: optional; quote or paraphrase the worker's recommended fix.
|
|
63
|
+
- ` + '`reviewerConfidence`' + `: integer 0-100. How confident YOU (reviewer) are
|
|
64
|
+
that the finding is correct, on-brief, and well-grounded:
|
|
65
|
+
80-100: defend without hesitation
|
|
66
|
+
60-79: plausible, minor gaps
|
|
67
|
+
40-59: thin evidence
|
|
68
|
+
20-39: weak / off-brief
|
|
69
|
+
0-19: unsupported / fabricated
|
|
70
|
+
|
|
71
|
+
If the worker raised NO issues, return ` + '`[]`' + `. Surrounding prose is allowed
|
|
72
|
+
but ignored by the parser — only the LAST ` + '```json' + ` block is read.
|
|
47
73
|
`.trim();
|
|
48
|
-
function
|
|
49
|
-
return
|
|
50
|
-
}
|
|
51
|
-
export function buildAuditQualityPrompt(ctx) {
|
|
52
|
-
return `You are reviewing an audit produced by a worker.
|
|
74
|
+
function buildPrompt(role, onBriefCheck, ctx) {
|
|
75
|
+
return `You are reviewing a ${role} produced by a worker.
|
|
53
76
|
|
|
54
|
-
The user requested
|
|
77
|
+
The user requested a ${role}. The brief was:
|
|
55
78
|
|
|
56
79
|
${ctx.brief}
|
|
57
80
|
|
|
58
81
|
## On-brief check (per finding)
|
|
59
82
|
|
|
60
|
-
|
|
61
|
-
A security audit should produce security findings, not style nits.
|
|
83
|
+
${onBriefCheck}
|
|
62
84
|
|
|
63
|
-
## Worker
|
|
85
|
+
## Worker output to extract findings from
|
|
64
86
|
|
|
65
|
-
|
|
66
|
-
${renderFindings(ctx.workerFindings)}
|
|
67
|
-
\`\`\`
|
|
87
|
+
${ctx.workerOutput}
|
|
68
88
|
|
|
69
|
-
${
|
|
89
|
+
${RUBRIC_TEMPLATE}`;
|
|
90
|
+
}
|
|
91
|
+
export function buildAuditQualityPrompt(ctx) {
|
|
92
|
+
return buildPrompt('audit', 'For each finding, ask: is this the kind of issue the audit asked for? A security audit should produce security findings, not style nits.', ctx);
|
|
70
93
|
}
|
|
71
94
|
export function buildReviewQualityPrompt(ctx) {
|
|
72
|
-
return
|
|
73
|
-
|
|
74
|
-
The user requested a code review. The brief was:
|
|
75
|
-
|
|
76
|
-
${ctx.brief}
|
|
77
|
-
|
|
78
|
-
## On-brief check (per finding)
|
|
79
|
-
|
|
80
|
-
For each worker finding, ask: is this within the requested focus area?
|
|
81
|
-
A security review should produce security findings, not formatting nits.
|
|
82
|
-
|
|
83
|
-
## Worker findings to annotate
|
|
84
|
-
|
|
85
|
-
\`\`\`json
|
|
86
|
-
${renderFindings(ctx.workerFindings)}
|
|
87
|
-
\`\`\`
|
|
88
|
-
|
|
89
|
-
${RUBRIC}`;
|
|
95
|
+
return buildPrompt('code review', 'For each finding, ask: is this within the requested focus area? A security review should produce security findings, not formatting nits.', ctx);
|
|
90
96
|
}
|
|
91
97
|
export function buildVerifyQualityPrompt(ctx) {
|
|
92
|
-
return
|
|
93
|
-
|
|
94
|
-
The user provided a checklist of acceptance criteria. The brief was:
|
|
95
|
-
|
|
96
|
-
${ctx.brief}
|
|
97
|
-
|
|
98
|
-
## On-brief check (per finding)
|
|
99
|
-
|
|
100
|
-
Each finding should map to one checklist item with evidence the criterion was
|
|
101
|
-
met or unmet. Flag findings that don't correspond to any checklist item, or
|
|
102
|
-
whose evidence doesn't actually demonstrate the claimed pass/fail status.
|
|
103
|
-
|
|
104
|
-
## Worker findings to annotate
|
|
105
|
-
|
|
106
|
-
\`\`\`json
|
|
107
|
-
${renderFindings(ctx.workerFindings)}
|
|
108
|
-
\`\`\`
|
|
109
|
-
|
|
110
|
-
${RUBRIC}`;
|
|
98
|
+
return buildPrompt('verification report', 'Each finding should map to one checklist item with evidence the criterion was met or unmet. Flag findings that do not correspond to any checklist item, or whose evidence does not actually demonstrate the claimed pass/fail status.', ctx);
|
|
111
99
|
}
|
|
112
100
|
export function buildInvestigateQualityPrompt(ctx) {
|
|
113
|
-
return
|
|
114
|
-
|
|
115
|
-
The user asked a question. The brief was:
|
|
116
|
-
|
|
117
|
-
${ctx.brief}
|
|
118
|
-
|
|
119
|
-
## On-brief check (per finding)
|
|
120
|
-
|
|
121
|
-
Each finding should be relevant to the question. Findings may be code-level
|
|
122
|
-
(file:line cited in evidence) or project-level synthesis (what was searched,
|
|
123
|
-
what was not found). Flag findings whose evidence does not support the claim
|
|
124
|
-
or whose claim drifts from the question.
|
|
125
|
-
|
|
126
|
-
## Worker findings to annotate
|
|
127
|
-
|
|
128
|
-
\`\`\`json
|
|
129
|
-
${renderFindings(ctx.workerFindings)}
|
|
130
|
-
\`\`\`
|
|
131
|
-
|
|
132
|
-
${RUBRIC}`;
|
|
101
|
+
return buildPrompt('codebase investigation', 'Each finding should be relevant to the question. Findings may be code-level (file:line cited in evidence) or project-level synthesis (what was searched, what was not found). Flag findings whose evidence does not support the claim or whose claim drifts from the question.', ctx);
|
|
133
102
|
}
|
|
134
103
|
export function buildDebugQualityPrompt(ctx) {
|
|
135
|
-
return
|
|
136
|
-
|
|
137
|
-
The user reported a failure. The brief was:
|
|
138
|
-
|
|
139
|
-
${ctx.brief}
|
|
140
|
-
|
|
141
|
-
## On-brief check (per finding)
|
|
142
|
-
|
|
143
|
-
Each finding should be a hypothesis, root-cause claim, or evidence
|
|
144
|
-
(reproducer, error pattern, code path). Flag findings that don't logically
|
|
145
|
-
follow from cited evidence or that exceed what the trace actually shows.
|
|
146
|
-
|
|
147
|
-
## Worker findings to annotate
|
|
148
|
-
|
|
149
|
-
\`\`\`json
|
|
150
|
-
${renderFindings(ctx.workerFindings)}
|
|
151
|
-
\`\`\`
|
|
152
|
-
|
|
153
|
-
${RUBRIC}`;
|
|
104
|
+
return buildPrompt('debugging hypothesis', 'Each finding should be a hypothesis, root-cause claim, or evidence (reproducer, error pattern, code path). Flag findings that do not logically follow from cited evidence or that exceed what the trace actually shows.', ctx);
|
|
154
105
|
}
|
|
155
106
|
//# sourceMappingURL=quality-only-prompts.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"quality-only-prompts.js","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"quality-only-prompts.js","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAOH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,CAAA;;;;;;;CAOjC,GAAG,WAAW,GAAG;;;;;;;;;;;;;;;;;CAiBjB,GAAG,KAAK,GAAG;;;GAGT,GAAG,MAAM,GAAG;GACZ,GAAG,YAAY,GAAG;;;;;;;;GAQlB,GAAG,SAAS,GAAG;GACf,GAAG,YAAY,GAAG;;GAElB,GAAG,cAAc,GAAG;GACpB,GAAG,sBAAsB,GAAG;;;;;;;;wCAQS,GAAG,MAAM,GAAG;2CACT,GAAG,SAAS,GAAG;CACzD,CAAC,IAAI,EAAE,CAAC;AAET,SAAS,WAAW,CAAC,IAAY,EAAE,YAAoB,EAAE,GAAkB;IACzE,OAAO,uBAAuB,IAAI;;uBAEb,IAAI;;EAEzB,GAAG,CAAC,KAAK;;;;EAIT,YAAY;;;;EAIZ,GAAG,CAAC,YAAY;;EAEhB,eAAe,EAAE,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO,WAAW,CAChB,OAAO,EACP,0IAA0I,EAC1I,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO,WAAW,CAChB,aAAa,EACb,0IAA0I,EAC1I,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO,WAAW,CAChB,qBAAqB,EACrB,uOAAuO,EACvO,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,GAAkB;IAC9D,OAAO,WAAW,CAChB,wBAAwB,EACxB,gRAAgR,EAChR,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO,WAAW,CAChB,sBAAsB,EACtB,yNAAyN,EACzN,GAAG,CACJ,CAAC;AACJ,CAAC"}
|