@zhixuan92/multi-model-agent-core 3.10.4 → 3.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +1 -1
  2. package/dist/executors/_shared/findings-schema.d.ts +55 -32
  3. package/dist/executors/_shared/findings-schema.d.ts.map +1 -1
  4. package/dist/executors/_shared/findings-schema.js +60 -22
  5. package/dist/executors/_shared/findings-schema.js.map +1 -1
  6. package/dist/executors/audit.d.ts.map +1 -1
  7. package/dist/executors/audit.js +1 -1
  8. package/dist/executors/audit.js.map +1 -1
  9. package/dist/executors/debug.d.ts.map +1 -1
  10. package/dist/executors/debug.js +1 -1
  11. package/dist/executors/debug.js.map +1 -1
  12. package/dist/executors/review.d.ts.map +1 -1
  13. package/dist/executors/review.js +1 -1
  14. package/dist/executors/review.js.map +1 -1
  15. package/dist/executors/verify.d.ts.map +1 -1
  16. package/dist/executors/verify.js +1 -1
  17. package/dist/executors/verify.js.map +1 -1
  18. package/dist/intake/compilers/audit.d.ts.map +1 -1
  19. package/dist/intake/compilers/audit.js +2 -2
  20. package/dist/intake/compilers/audit.js.map +1 -1
  21. package/dist/intake/compilers/investigate.d.ts.map +1 -1
  22. package/dist/intake/compilers/investigate.js +1 -2
  23. package/dist/intake/compilers/investigate.js.map +1 -1
  24. package/dist/intake/resolve.d.ts +10 -0
  25. package/dist/intake/resolve.d.ts.map +1 -1
  26. package/dist/intake/resolve.js +10 -19
  27. package/dist/intake/resolve.js.map +1 -1
  28. package/dist/review/fallback-extraction.d.ts +17 -0
  29. package/dist/review/fallback-extraction.d.ts.map +1 -0
  30. package/dist/review/fallback-extraction.js +140 -0
  31. package/dist/review/fallback-extraction.js.map +1 -0
  32. package/dist/review/parse-reviewer-findings.d.ts +26 -0
  33. package/dist/review/parse-reviewer-findings.d.ts.map +1 -0
  34. package/dist/review/parse-reviewer-findings.js +73 -0
  35. package/dist/review/parse-reviewer-findings.js.map +1 -0
  36. package/dist/review/quality-only-prompts.d.ts +18 -11
  37. package/dist/review/quality-only-prompts.d.ts.map +1 -1
  38. package/dist/review/quality-only-prompts.js +79 -128
  39. package/dist/review/quality-only-prompts.js.map +1 -1
  40. package/dist/review/quality-reviewer.d.ts +3 -26
  41. package/dist/review/quality-reviewer.d.ts.map +1 -1
  42. package/dist/review/quality-reviewer.js +54 -141
  43. package/dist/review/quality-reviewer.js.map +1 -1
  44. package/dist/run-tasks/index.d.ts +0 -1
  45. package/dist/run-tasks/index.d.ts.map +1 -1
  46. package/dist/run-tasks/reviewed-lifecycle.d.ts +1 -1
  47. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  48. package/dist/run-tasks/reviewed-lifecycle.js +62 -9
  49. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  50. package/dist/runners/base/result-builders.d.ts +1 -1
  51. package/dist/runners/base/result-builders.d.ts.map +1 -1
  52. package/dist/telemetry/event-builder.js +2 -2
  53. package/dist/telemetry/event-builder.js.map +1 -1
  54. package/dist/telemetry/types.d.ts +7 -0
  55. package/dist/telemetry/types.d.ts.map +1 -1
  56. package/dist/telemetry/types.js +6 -5
  57. package/dist/telemetry/types.js.map +1 -1
  58. package/dist/types.d.ts +2 -1
  59. package/dist/types.d.ts.map +1 -1
  60. package/dist/types.js.map +1 -1
  61. package/package.json +1 -1
@@ -1,25 +1,16 @@
1
1
  import { DEFAULT_TASK_TIMEOUT_MS } from '../config/schema.js';
2
- // 3.8.1 worker contract: each finding object has fields {id, severity, claim, evidence, suggestion?}.
3
- // `evidence` is REQUIRED and must be ≥20 chars — embed file:line as prose plus a
4
- // one-sentence explanation of what the cited code shows. Reviewer-emitted fields
5
- // (reviewerConfidence, reviewerSeverity) are added in the annotation pass; the
6
- // worker MUST NOT include them.
7
- const FINDINGS_BASE = [
8
- 'Your output MUST include a single ```json fenced code block containing a `findings[]` array.',
9
- 'Each finding object has these fields:',
10
- '- `id` (string, unique within the array)',
11
- '- `severity` (\'high\' | \'medium\' | \'low\')',
12
- '- `claim` (string, what is wrong / what is true)',
13
- '- `evidence` (string, REQUIRED, at least 20 characters): embed `file:line` as prose plus a one-sentence explanation of what the cited code or text actually shows. For project-level findings, describe what was searched/checked instead.',
14
- '- `suggestion?` (string, optional): a fix, follow-up step, or recommendation',
15
- ].join('\n');
2
+ /**
3
+ * Worker output contract per route.
4
+ *
5
+ * The 5 read-only routes (audit / review / verify / debug / investigate) no
6
+ * longer carry a structured-output contract — the quality reviewer extracts
7
+ * findings from the worker's free-form narrative in one pass. See
8
+ * packages/core/src/review/quality-only-prompts.ts.
9
+ *
10
+ * The artifact route `execute_plan` keeps its narrative contract.
11
+ */
16
12
  export const OUTPUT_CONTRACT_CLAUSES = {
17
- review_code: `${FINDINGS_BASE}\nEach finding should describe a code-level concern (correctness, security, performance, style as applicable to the focus). Embed the file:line in evidence; the reader will jump to the source from your prose.`,
18
- debug_task: `${FINDINGS_BASE}\nUse hypothesis-driven debugging: each finding should identify a root cause and propose a fix in \`suggestion\`. Evidence should quote the relevant trace, log line, or code path.`,
19
- verify_work: `${FINDINGS_BASE}\nMap each checklist item from the brief to a finding: pass (low severity, evidence shows the criterion was met) or fail (high/medium severity, evidence shows what is missing). One finding per checklist item.`,
20
- audit_document: `${FINDINGS_BASE}\nEach finding should describe an issue discovered in the audited document. Severity reflects impact if the issue stands.`,
21
13
  execute_plan: 'Implement the task fully. Report: which task heading you matched, what files were created or modified, and any issues encountered. If no unique matching task was found, report that explicitly and do not implement anything.',
22
- investigate_codebase: `${FINDINGS_BASE}\nFor an investigation, \`suggestion\` is optional and may be a follow-up question or angle to explore rather than a code fix. Evidence may be a file:line citation or a description of what was searched (e.g., "Searched src/middleware/, src/auth/ — no auth middleware found").`,
23
14
  };
24
15
  export const ROUTE_DEFAULTS = {
25
16
  delegate_tasks: {},
@@ -1 +1 @@
1
- {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../src/intake/resolve.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,sGAAsG;AACtG,iFAAiF;AACjF,iFAAiF;AACjF,+EAA+E;AAC/E,gCAAgC;AAChC,MAAM,aAAa,GAAG;IACpB,8FAA8F;IAC9F,uCAAuC;IACvC,0CAA0C;IAC1C,gDAAgD;IAChD,kDAAkD;IAClD,4OAA4O;IAC5O,8EAA8E;CAC/E,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,uBAAuB,GAAyC;IAC3E,WAAW,EAAE,GAAG,aAAa,kNAAkN;IAC/O,UAAU,EAAE,GAAG,aAAa,qLAAqL;IACjN,WAAW,EAAE,GAAG,aAAa,kNAAkN;IAC/O,cAAc,EAAE,GAAG,aAAa,2HAA2H;IAC3J,YAAY,EAAE,gOAAgO;IAC9O,oBAAoB,EAAE,GAAG,aAAa,qRAAqR;CAC5T,CAAC;AAEF,MAAM,CAAC,MAAM,cAAc,GAA2C;IACpE,cAAc,EAAE,EAAE;IAClB,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,UAAU,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IAClE,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,cAAc,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACtE,YAAY,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE;IAC7D,oBAAoB,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;CAC7E,CAAC;AAEF,MAAM,UAAU,YAAY,CAC1B,KAAgB,EAChB,MAAwB;IAExB,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,aAAa,CAAC,SAAS,IAAI,UAAU,CAAC;IAE3E,MAAM,cAAc,GAAG,uBAAuB,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC;QACrE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,OAAO,cAAc,EAAE;QACxC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;IAEjB,OAAO;QACL,MAAM;QACN,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,SAAS,EAAE,SAAmC;QAC9C,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAC9D,KAAK,EAAE,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QACvC,SAAS,EAAE,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QAChE,UAAU,EAAE,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QAC7C,aAAa,EAAE,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC3D,kBAAkB,EAAE,KAAK;QACzB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,uBAAuB,EAAE,KAAK,CAAC,uBAAuB;KACvD,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../src/intake/resolve.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAyC;IAC3E,YAAY,EAAE,gOAAgO;CAC/O,CAAC;AAEF,MAAM,CAAC,MAAM,cAAc,GAA2C;IACpE,cAAc,EAAE,EAAE;IAClB,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,UAAU,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IAClE,WAAW,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACnE,cAAc,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;IACtE,YAAY,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE;IAC7D,oBAAoB,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,cAAc,EAAE;CAC7E,CAAC;AAEF,MAAM,UAAU,YAAY,CAC1B,KAAgB,EAChB,MAAwB;IAExB,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,aAAa,CAAC,SAAS,IAAI,UAAU,CAAC;IAE3E,MAAM,cAAc,GAAG,uBAAuB,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC;QACrE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,OAAO,cAAc,EAAE;QACxC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;IAEjB,OAAO;QACL,MAAM;QACN,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,SAAS,EAAE,SAAmC;QAC9C,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,aAAa,CAAC,YAAY;QAC9D,KAAK,EAAE,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QACvC,SAAS,EAAE,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QAChE,UAAU,EAAE,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QAC7C,aAAa,EAAE,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC3D,kBAAkB,EAAE,KAAK;QACzB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;QAClB,uBAAuB,EAAE,KAAK,CAAC,uBAAuB;KACvD,CAAC;AACJ,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { type AnnotatedFinding } from '../executors/_shared/findings-schema.js';
2
+ /**
3
+ * Deterministic regex extractor — runs when the LLM reviewer's JSON output
4
+ * fails parse twice. Synthesizes AnnotatedFinding[] so telemetry always has
5
+ * something to count.
6
+ *
7
+ * Confidence is null. Ids are always sequential `F${i+1}` (never use the
8
+ * worker's number — duplicates would violate annotatedFindingsSchema).
9
+ * evidenceGrounded reflects the actual substring check on the normalized
10
+ * worker output.
11
+ *
12
+ * If the worker output has zero parseable numbered sections and no explicit
13
+ * "no findings" language, emits a single catch-all finding so downstream
14
+ * telemetry never sees an empty list.
15
+ */
16
+ export declare function fallbackExtractFindings(workerOutput: string): AnnotatedFinding[];
17
+ //# sourceMappingURL=fallback-extraction.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fallback-extraction.d.ts","sourceRoot":"","sources":["../../src/review/fallback-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,gBAAgB,EACtB,MAAM,yCAAyC,CAAC;AA+FjD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,gBAAgB,EAAE,CAyChF"}
@@ -0,0 +1,140 @@
1
+ import { normalizeWhitespace, } from '../executors/_shared/findings-schema.js';
2
+ /**
3
+ * Map a worker-emitted severity string (case-insensitive, may be "mid")
4
+ * to the canonical 4-tier value. Default 'medium' on unknown.
5
+ */
6
+ function mapSeverity(raw) {
7
+ const s = raw.trim().toLowerCase();
8
+ if (s === 'critical')
9
+ return 'critical';
10
+ if (s === 'high')
11
+ return 'high';
12
+ if (s === 'medium' || s === 'mid')
13
+ return 'medium';
14
+ if (s === 'low')
15
+ return 'low';
16
+ return 'medium';
17
+ }
18
+ /**
19
+ * Match numbered/finding-shaped headings — broad enough to catch the most
20
+ * common Markdown patterns workers actually produce (Round-2 #5):
21
+ * "## 1. Title" — h2 with number
22
+ * "### 2. Title" — h3 with number
23
+ * "#### 3: Title" — h4 with number+colon
24
+ * "### [4] Title" — bracketed number
25
+ * "### Finding 5 — Title" — "Finding N" form
26
+ *
27
+ * Plain `### Summary` / `### Performance Notes` are ignored on purpose
28
+ * so the fallback does not invent findings out of structural sections.
29
+ *
30
+ * Capture group 1 is the bracketed number (when [N] form), 2 is the bare
31
+ * number (otherwise). At least one is always set when this regex matches.
32
+ */
33
+ const SECTION_RE = /^#{2,6}\s+(?:Finding\s+)?(?:\[(\d+)\]|(\d+))\s*[\.\:\)\-\—\–]?\s+(.+)$/gim;
34
+ const SEVERITY_RE = /^Severity:\s*(critical|high|medium|mid|low)\s*$/gim;
35
+ /** Single-pass section iteration (Round-1 P3). */
36
+ function findSections(workerOutput) {
37
+ const sections = [];
38
+ SECTION_RE.lastIndex = 0;
39
+ let prev = null;
40
+ let match;
41
+ while ((match = SECTION_RE.exec(workerOutput)) !== null) {
42
+ const startIdx = match.index;
43
+ if (prev) {
44
+ sections.push({ ...prev, endIdx: startIdx });
45
+ }
46
+ // capture[1] = [N] bracketed number; capture[2] = bare number; capture[3] = title.
47
+ const workerNumber = match[1] ?? match[2] ?? '';
48
+ prev = {
49
+ startIdx,
50
+ workerNumber,
51
+ title: (match[3] ?? '').trim(),
52
+ };
53
+ }
54
+ if (prev)
55
+ sections.push({ ...prev, endIdx: workerOutput.length });
56
+ return sections;
57
+ }
58
+ /**
59
+ * Detect explicit "no findings" language so fallback returns [] instead of
60
+ * a synthetic catch-all when a clean codebase produces a clean narrative
61
+ * but the reviewer happens to fail JSON parse (Round-2 #6).
62
+ */
63
+ const NO_FINDINGS_RE = /\b(?:no\s+(?:findings|issues|problems)\s+(?:found|detected|reported)?|nothing\s+to\s+report|0\s+findings|zero\s+findings)\b/i;
64
+ function severityFromSection(section) {
65
+ SEVERITY_RE.lastIndex = 0;
66
+ const m = SEVERITY_RE.exec(section);
67
+ if (!m)
68
+ return 'medium';
69
+ return mapSeverity(m[1]);
70
+ }
71
+ /**
72
+ * Build meaningful synthetic evidence (Round-1 #8):
73
+ * - Prefer the section body (first 240 chars after the heading).
74
+ * - If the body is too short, build a meaningful sentence from the title,
75
+ * not a space-padded string.
76
+ *
77
+ * Worker-stated number is preserved in the evidence prose, not in the id
78
+ * (Round-1 #2 — ids must always be unique sequential).
79
+ */
80
+ function buildEvidence(sectionText, title, workerNumber) {
81
+ const body = sectionText.split('\n').slice(1).join('\n').trim();
82
+ if (body.length >= 20)
83
+ return body.slice(0, 240);
84
+ const synth = `Worker finding #${workerNumber} (${title}): no detailed body provided in implementer report.`;
85
+ return synth.length >= 20 ? synth : `${synth} fallback-synthesized.`;
86
+ }
87
+ /**
88
+ * Deterministic regex extractor — runs when the LLM reviewer's JSON output
89
+ * fails parse twice. Synthesizes AnnotatedFinding[] so telemetry always has
90
+ * something to count.
91
+ *
92
+ * Confidence is null. Ids are always sequential `F${i+1}` (never use the
93
+ * worker's number — duplicates would violate annotatedFindingsSchema).
94
+ * evidenceGrounded reflects the actual substring check on the normalized
95
+ * worker output.
96
+ *
97
+ * If the worker output has zero parseable numbered sections and no explicit
98
+ * "no findings" language, emits a single catch-all finding so downstream
99
+ * telemetry never sees an empty list.
100
+ */
101
+ export function fallbackExtractFindings(workerOutput) {
102
+ const normalizedWorker = normalizeWhitespace(workerOutput);
103
+ const sections = findSections(workerOutput);
104
+ // Round-2 #6: respect explicit "no findings" worker output.
105
+ if (sections.length === 0 && NO_FINDINGS_RE.test(workerOutput)) {
106
+ return [];
107
+ }
108
+ if (sections.length === 0) {
109
+ const trimmed = workerOutput.trim();
110
+ // Use real worker text when long enough — preserves evidenceGrounded=true.
111
+ // Otherwise fall back to a meaningful synthetic sentence (knowingly ungrounded).
112
+ const evidence = trimmed.length >= 20
113
+ ? trimmed.slice(0, 240)
114
+ : `Worker output had no parseable findings (length ${trimmed.length}). Fallback emitted catch-all so telemetry has at least one entry.`;
115
+ const eNorm = normalizeWhitespace(evidence);
116
+ return [{
117
+ id: 'F1',
118
+ severity: 'medium',
119
+ claim: 'reviewer parse failed; deterministic fallback emitted single catch-all from worker output',
120
+ evidence,
121
+ reviewerConfidence: null,
122
+ evidenceGrounded: eNorm.length >= 20 && normalizedWorker.includes(eNorm),
123
+ }];
124
+ }
125
+ return sections.map((section, i) => {
126
+ const sectionText = workerOutput.slice(section.startIdx, section.endIdx);
127
+ const severity = severityFromSection(sectionText);
128
+ const evidence = buildEvidence(sectionText, section.title, section.workerNumber);
129
+ const eNorm = normalizeWhitespace(evidence);
130
+ return {
131
+ id: `F${i + 1}`,
132
+ severity,
133
+ claim: section.title || `Finding ${i + 1}`,
134
+ evidence,
135
+ reviewerConfidence: null,
136
+ evidenceGrounded: eNorm.length >= 20 && normalizedWorker.includes(eNorm),
137
+ };
138
+ });
139
+ }
140
+ //# sourceMappingURL=fallback-extraction.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fallback-extraction.js","sourceRoot":"","sources":["../../src/review/fallback-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AAIjD;;;GAGG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACnC,IAAI,CAAC,KAAK,UAAU;QAAE,OAAO,UAAU,CAAC;IACxC,IAAI,CAAC,KAAK,MAAM;QAAE,OAAO,MAAM,CAAC;IAChC,IAAI,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,KAAK;QAAE,OAAO,QAAQ,CAAC;IACnD,IAAI,CAAC,KAAK,KAAK;QAAE,OAAO,KAAK,CAAC;IAC9B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,GAAG,2EAA2E,CAAC;AAC/F,MAAM,WAAW,GAAG,oDAAoD,CAAC;AASzE,kDAAkD;AAClD,SAAS,YAAY,CAAC,YAAoB;IACxC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC;IACzB,IAAI,IAAI,GAAqE,IAAI,CAAC;IAClF,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC;QAC7B,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,mFAAmF;QACnF,MAAM,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,GAAG;YACL,QAAQ;YACR,YAAY;YACZ,KAAK,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;SAC/B,CAAC;IACJ,CAAC;IACD,IAAI,IAAI;QAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC;IAClE,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,cAAc,GAAG,8HAA8H,CAAC;AAEtJ,SAAS,mBAAmB,CAAC,OAAe;IAC1C,WAAW,CAAC,SAAS,GAAG,CAAC,CAAC;IAC1B,MAAM,CAAC,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,aAAa,CAAC,WAAmB,EAAE,KAAa,EAAE,YAAoB;IAC7E,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IAChE,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,mBAAmB,YAAY,KAAK,KAAK,qDAAqD,CAAC;IAC7G,OAAO,KAAK,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,wBAAwB,CAAC;AACvE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,uBAAuB,CAAC,YAAoB;IAC1D,MAAM,gBAAgB,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;IAE5C,4DAA4D;IAC5D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;QAC/D,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;QACpC,2EAA2E;QAC3E,iFAAiF;QACjF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,IAAI,EAAE;YACnC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;YACvB,CAAC,CAAC,mDAAmD,OAAO,CAAC,MAAM,oEAAoE,CAAC;QAC1I,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,CAAC;gBACN,EAAE,EAAE,IAAI;gBACR,QAAQ,EAAE,QAAQ;gBAClB,KAAK,EAAE,2FAA2F;gBAClG,QAAQ;gBACR,kBAAkB,EAAE,IAAI;gBACxB,gBAAgB,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC;aACzE,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE;QACjC,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QACzE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,aAAa,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;QACjF,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE;YACf,QAAQ;YACR,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE;YAC1C,QAAQ;YACR,kBAAkB,EAAE,IAAI;YACxB,gBAAgB,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC;SACzE,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,26 @@
1
+ import { type AnnotatedFinding } from '../executors/_shared/findings-schema.js';
2
+ interface ParseOk {
3
+ ok: true;
4
+ findings: AnnotatedFinding[];
5
+ /** Findings whose evidence was NOT a substring of worker output — kept but flagged. */
6
+ ungroundedCount: number;
7
+ }
8
+ interface ParseErr {
9
+ ok: false;
10
+ reason: string;
11
+ }
12
+ export type ParseReviewerFindingsResult = ParseOk | ParseErr;
13
+ /**
14
+ * Parse the reviewer's structured output and annotate each finding with
15
+ * `evidenceGrounded`.
16
+ *
17
+ * Pipeline:
18
+ * 1. Extract the final `` ```json `` block (permissive fence).
19
+ * 2. JSON.parse + Zod-validate against reviewerEmittedFindingsSchema.
20
+ * 3. Normalize worker output ONCE; for each finding set
21
+ * `evidenceGrounded` against the normalized worker.
22
+ * 4. Return ALL findings — never drop. `ungroundedCount` is informational only.
23
+ */
24
+ export declare function parseReviewerFindings(reviewerOutput: string, workerOutput: string): ParseReviewerFindingsResult;
25
+ export {};
26
+ //# sourceMappingURL=parse-reviewer-findings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse-reviewer-findings.d.ts","sourceRoot":"","sources":["../../src/review/parse-reviewer-findings.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,gBAAgB,EACtB,MAAM,yCAAyC,CAAC;AAEjD,UAAU,OAAO;IACf,EAAE,EAAE,IAAI,CAAC;IACT,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,uFAAuF;IACvF,eAAe,EAAE,MAAM,CAAC;CACzB;AACD,UAAU,QAAQ;IAChB,EAAE,EAAE,KAAK,CAAC;IACV,MAAM,EAAE,MAAM,CAAC;CAChB;AACD,MAAM,MAAM,2BAA2B,GAAG,OAAO,GAAG,QAAQ,CAAC;AAmC7D;;;;;;;;;;GAUG;AACH,wBAAgB,qBAAqB,CACnC,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,GACnB,2BAA2B,CA0B7B"}
@@ -0,0 +1,73 @@
1
+ import { reviewerEmittedFindingsSchema, normalizeWhitespace, } from '../executors/_shared/findings-schema.js';
2
+ // Permissive fence regex (Round-3 #2): case-insensitive `json`, allow:
3
+ // - any whitespace (including none) between opening fence and content
4
+ // - any whitespace (including none) between content and closing fence
5
+ // so compact "[]```" or content-on-one-line forms are accepted.
6
+ // The captured group is JSON-trimmed before parse.
7
+ const JSON_BLOCK_RE = /```json[ \t]*\r?\n?([\s\S]*?)\s*```[ \t]*/gi;
8
+ /**
9
+ * Extract the LAST `` ```json `` fenced code block from the reviewer output.
10
+ * Reviewers often emit example/format JSON earlier; the real findings array
11
+ * is conventionally last. Single-pass — does not materialize all matches.
12
+ */
13
+ function extractFinalJsonBlock(output) {
14
+ let last = null;
15
+ let match;
16
+ // Reset lastIndex on the shared regex to make this re-entrant.
17
+ JSON_BLOCK_RE.lastIndex = 0;
18
+ while ((match = JSON_BLOCK_RE.exec(output)) !== null) {
19
+ last = match;
20
+ }
21
+ return last ? last[1] ?? null : null;
22
+ }
23
+ /**
24
+ * Substring check against a pre-normalized worker output. Avoids re-normalizing
25
+ * the (potentially large) worker output for every finding (Round-1 P2).
26
+ */
27
+ function evidenceIsGroundedAgainst(evidence, normalizedWorker) {
28
+ const e = normalizeWhitespace(evidence);
29
+ if (e.length < 20)
30
+ return false;
31
+ return normalizedWorker.includes(e);
32
+ }
33
+ /**
34
+ * Parse the reviewer's structured output and annotate each finding with
35
+ * `evidenceGrounded`.
36
+ *
37
+ * Pipeline:
38
+ * 1. Extract the final `` ```json `` block (permissive fence).
39
+ * 2. JSON.parse + Zod-validate against reviewerEmittedFindingsSchema.
40
+ * 3. Normalize worker output ONCE; for each finding set
41
+ * `evidenceGrounded` against the normalized worker.
42
+ * 4. Return ALL findings — never drop. `ungroundedCount` is informational only.
43
+ */
44
+ export function parseReviewerFindings(reviewerOutput, workerOutput) {
45
+ const block = extractFinalJsonBlock(reviewerOutput);
46
+ if (block === null) {
47
+ return { ok: false, reason: 'reviewer output missing ```json fenced block' };
48
+ }
49
+ let parsed;
50
+ try {
51
+ // Trim — the permissive regex can capture leading/trailing whitespace.
52
+ parsed = JSON.parse(block.trim());
53
+ }
54
+ catch (err) {
55
+ return { ok: false, reason: `reviewer JSON parse failed: ${err instanceof Error ? err.message : String(err)}` };
56
+ }
57
+ const validated = reviewerEmittedFindingsSchema.safeParse(parsed);
58
+ if (!validated.success) {
59
+ return { ok: false, reason: `findings array validation failed: ${validated.error.message}` };
60
+ }
61
+ // Round-2 P2: fuse map + count to avoid the second pass.
62
+ const normalizedWorker = normalizeWhitespace(workerOutput);
63
+ const annotated = [];
64
+ let ungroundedCount = 0;
65
+ for (const f of validated.data) {
66
+ const grounded = evidenceIsGroundedAgainst(f.evidence, normalizedWorker);
67
+ if (!grounded)
68
+ ungroundedCount++;
69
+ annotated.push({ ...f, evidenceGrounded: grounded });
70
+ }
71
+ return { ok: true, findings: annotated, ungroundedCount };
72
+ }
73
+ //# sourceMappingURL=parse-reviewer-findings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse-reviewer-findings.js","sourceRoot":"","sources":["../../src/review/parse-reviewer-findings.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,6BAA6B,EAC7B,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AAcjD,uEAAuE;AACvE,wEAAwE;AACxE,wEAAwE;AACxE,oEAAoE;AACpE,mDAAmD;AACnD,MAAM,aAAa,GAAG,6CAA6C,CAAC;AAEpE;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,MAAc;IAC3C,IAAI,IAAI,GAA2B,IAAI,CAAC;IACxC,IAAI,KAA6B,CAAC;IAClC,+DAA+D;IAC/D,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;IAC5B,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,IAAI,GAAG,KAAK,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACvC,CAAC;AAED;;;GAGG;AACH,SAAS,yBAAyB,CAAC,QAAgB,EAAE,gBAAwB;IAC3E,MAAM,CAAC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,KAAK,CAAC;IAChC,OAAO,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,qBAAqB,CACnC,cAAsB,EACtB,YAAoB;IAEpB,MAAM,KAAK,GAAG,qBAAqB,CAAC,cAAc,CAAC,CAAC;IACpD,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,8CAA8C,EAAE,CAAC;IAC/E,CAAC;IACD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,uEAAuE;QACvE,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,+BAA+B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,SAAS,GAAG,6BAA6B,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAClE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QACvB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,qCAAqC,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;IAC/F,CAAC;IACD,yDAAyD;IACzD,MAAM,gBAAgB,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAC3D,MAAM,SAAS,GAAuB,EAAE,CAAC;IACzC,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,yBAAyB,CAAC,CAAC,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QACzE,IAAI,CAAC,QAAQ;YAAE,eAAe,EAAE,CAAC;QACjC,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,gBAAgB,EAAE,QAAQ,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AAC5D,CAAC"}
@@ -1,21 +1,28 @@
1
1
  /**
2
- * Quality-only review prompts for the 5 read-only mma-* routes (3.8.1+).
2
+ * Quality-only review prompts for the 5 read-only mma-* routes (3.10.5+).
3
3
  *
4
- * Each prompt asks the reviewer to ANNOTATE every worker finding with:
5
- * - reviewerConfidence: integer 0–100, how confident YOU (reviewer) are that
6
- * this finding is correct, on-brief, and well-grounded in the evidence.
7
- * - reviewerSeverity (optional): only set when you disagree with the worker's
8
- * severity. Workers tend to inflate; use this to dial down.
4
+ * The reviewer receives ONLY the implementer's free-form markdown narrative
5
+ * and the original brief. It must:
6
+ * 1. Read the worker's narrative.
7
+ * 2. Identify every distinct issue/finding/checklist-item the worker raised.
8
+ * 3. Assign sequential ids (F1, F2, ...) even if the worker numbered them,
9
+ * the reviewer re-numbers from 1 to ensure uniqueness.
10
+ * 4. Set `severity` to its OWN final 4-tier judgment {critical, high, medium,
11
+ * low}. The reviewer is authoritative — there is no separate
12
+ * `reviewerSeverity` field. Map worker-stated "mid" -> "medium". When
13
+ * the worker did not state a severity, judge from impact.
14
+ * 5. Score each finding's reviewerConfidence (0-100) — how confident YOU
15
+ * would be defending the finding's correctness if challenged.
16
+ * 6. Quote evidence VERBATIM (≥20 chars) from the worker's output. The
17
+ * downstream parser flags non-substring quotes via
18
+ * `evidenceGrounded:false` but never drops findings.
19
+ * 7. Emit ONE fenced JSON code block as the LAST block in your response.
9
20
  *
10
- * The reviewer returns a single ```json fenced block containing a JSON array
11
- * of {id, reviewerConfidence, reviewerSeverity?} objects, one per worker
12
- * finding (matched by id). NO verdict, NO gate, NO rework signal.
21
+ * If the worker raised zero issues, emit `[]` and stop.
13
22
  */
14
- import type { WorkerFinding } from '../executors/_shared/findings-schema.js';
15
23
  interface PromptContext {
16
24
  workerOutput: string;
17
25
  brief: string;
18
- workerFindings: WorkerFinding[];
19
26
  }
20
27
  export declare function buildAuditQualityPrompt(ctx: PromptContext): string;
21
28
  export declare function buildReviewQualityPrompt(ctx: PromptContext): string;
@@ -1 +1 @@
1
- {"version":3,"file":"quality-only-prompts.d.ts","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,yCAAyC,CAAC;AAE7E,UAAU,aAAa;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAyCD,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAmBlE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAmBnE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAoBnE;AAED,wBAAgB,6BAA6B,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAqBxE;AAED,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAoBlE"}
1
+ {"version":3,"file":"quality-only-prompts.d.ts","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,UAAU,aAAa;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;CACf;AAwED,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMlE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMnE;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMnE;AAED,wBAAgB,6BAA6B,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMxE;AAED,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,aAAa,GAAG,MAAM,CAMlE"}
@@ -1,155 +1,106 @@
1
1
  /**
2
- * Quality-only review prompts for the 5 read-only mma-* routes (3.8.1+).
2
+ * Quality-only review prompts for the 5 read-only mma-* routes (3.10.5+).
3
3
  *
4
- * Each prompt asks the reviewer to ANNOTATE every worker finding with:
5
- * - reviewerConfidence: integer 0–100, how confident YOU (reviewer) are that
6
- * this finding is correct, on-brief, and well-grounded in the evidence.
7
- * - reviewerSeverity (optional): only set when you disagree with the worker's
8
- * severity. Workers tend to inflate; use this to dial down.
4
+ * The reviewer receives ONLY the implementer's free-form markdown narrative
5
+ * and the original brief. It must:
6
+ * 1. Read the worker's narrative.
7
+ * 2. Identify every distinct issue/finding/checklist-item the worker raised.
8
+ * 3. Assign sequential ids (F1, F2, ...) even if the worker numbered them,
9
+ * the reviewer re-numbers from 1 to ensure uniqueness.
10
+ * 4. Set `severity` to its OWN final 4-tier judgment {critical, high, medium,
11
+ * low}. The reviewer is authoritative — there is no separate
12
+ * `reviewerSeverity` field. Map worker-stated "mid" -> "medium". When
13
+ * the worker did not state a severity, judge from impact.
14
+ * 5. Score each finding's reviewerConfidence (0-100) — how confident YOU
15
+ * would be defending the finding's correctness if challenged.
16
+ * 6. Quote evidence VERBATIM (≥20 chars) from the worker's output. The
17
+ * downstream parser flags non-substring quotes via
18
+ * `evidenceGrounded:false` but never drops findings.
19
+ * 7. Emit ONE fenced JSON code block as the LAST block in your response.
9
20
  *
10
- * The reviewer returns a single ```json fenced block containing a JSON array
11
- * of {id, reviewerConfidence, reviewerSeverity?} objects, one per worker
12
- * finding (matched by id). NO verdict, NO gate, NO rework signal.
21
+ * If the worker raised zero issues, emit `[]` and stop.
13
22
  */
14
- const RUBRIC = `
15
- ## How to score \`reviewerConfidence\` (integer 0-100)
16
-
17
- You are scoring whether YOU would defend this finding if pushed. Not severity.
18
- Not the worker's self-confidence.
19
-
20
- 80-100: evidence directly supports the claim, on-brief, defend without hesitation
21
- 60-79: evidence supports claim with minor gaps, on-brief, plausible
22
- 40-59: claim plausible but evidence thin, partial, or requires inference
23
- 20-39: claim weak, evidence does not back it up, OR off-brief
24
- 0-19: unsupported, contradicted, fabricated, OR completely off-brief
25
-
26
- ## How to use \`reviewerSeverity\` (optional)
27
-
28
- Only set when you DISAGREE with the worker's \`severity\`. Workers tend to
29
- inflate ("everything is high"); use \`reviewerSeverity\` to dial down. Omit
30
- when you agree.
31
-
23
+ const RUBRIC_TEMPLATE = String.raw `
32
24
  ## Output format (REQUIRED)
33
25
 
34
- Respond with exactly one fenced JSON code block. The block must contain a
35
- JSON array of objects, one entry per worker finding (matched by \`id\`). Example:
36
-
37
- \`\`\`json
38
- [
39
- { "id": "F1", "reviewerConfidence": 85 },
40
- { "id": "F2", "reviewerConfidence": 35, "reviewerSeverity": "low" },
41
- { "id": "F3", "reviewerConfidence": 70 }
26
+ Respond with exactly one fenced JSON code block AS THE LAST BLOCK in your
27
+ response. The block contains a JSON array of finding objects, in the order
28
+ the worker presented them. Example:
29
+
30
+ ` + '```json\n' + `[
31
+ {
32
+ "id": "F1",
33
+ "severity": "critical",
34
+ "claim": "Remote code execution via unsanitized input in src/handler.ts:42",
35
+ "evidence": "user input is passed directly into shellExec() without escaping",
36
+ "suggestion": "Use a parameterized API or escape input",
37
+ "reviewerConfidence": 90
38
+ },
39
+ {
40
+ "id": "F2",
41
+ "severity": "medium",
42
+ "claim": "Auth check missing on /admin endpoint",
43
+ "evidence": "router.get('/admin', adminHandler) — no auth middleware applied",
44
+ "reviewerConfidence": 60
45
+ }
42
46
  ]
43
- \`\`\`
44
-
45
- Every worker finding id must appear exactly once. No extra ids. No missing
46
- ids. Surrounding prose is allowed but ignored by the parser.
47
+ ` + '```' + `
48
+
49
+ Field rules:
50
+ - ` + '`id`' + `: assign sequentially F1, F2, F3, ... (your choice; must be unique).
51
+ - ` + '`severity`' + `: one of "critical" | "high" | "medium" | "low" — YOUR
52
+ final judgment, not the worker's. The worker's value is a hint; you may
53
+ dial it up or down based on actual impact (workers tend to inflate).
54
+ - critical: must fix before any other work (RCE, auth bypass, data loss)
55
+ - high: serious bug / security issue, blocks release
56
+ - medium: real issue, should fix soon
57
+ - low: minor issue, nice to fix
58
+ Map worker-said "mid" -> "medium". When the worker omitted severity, judge.
59
+ - ` + '`claim`' + `: one-sentence summary.
60
+ - ` + '`evidence`' + `: REQUIRED, ≥20 chars, MUST be a verbatim quote from the
61
+ worker's output. The parser flags non-substring quotes — quote precisely.
62
+ - ` + '`suggestion`' + `: optional; quote or paraphrase the worker's recommended fix.
63
+ - ` + '`reviewerConfidence`' + `: integer 0-100. How confident YOU (reviewer) are
64
+ that the finding is correct, on-brief, and well-grounded:
65
+ 80-100: defend without hesitation
66
+ 60-79: plausible, minor gaps
67
+ 40-59: thin evidence
68
+ 20-39: weak / off-brief
69
+ 0-19: unsupported / fabricated
70
+
71
+ If the worker raised NO issues, return ` + '`[]`' + `. Surrounding prose is allowed
72
+ but ignored by the parser — only the LAST ` + '```json' + ` block is read.
47
73
  `.trim();
48
- function renderFindings(findings) {
49
- return JSON.stringify(findings, null, 2);
50
- }
51
- export function buildAuditQualityPrompt(ctx) {
52
- return `You are reviewing an audit produced by a worker.
74
+ function buildPrompt(role, onBriefCheck, ctx) {
75
+ return `You are reviewing a ${role} produced by a worker.
53
76
 
54
- The user requested an audit. The brief was:
77
+ The user requested a ${role}. The brief was:
55
78
 
56
79
  ${ctx.brief}
57
80
 
58
81
  ## On-brief check (per finding)
59
82
 
60
- For each worker finding, ask: is this the kind of issue the audit asked for?
61
- A security audit should produce security findings, not style nits.
83
+ ${onBriefCheck}
62
84
 
63
- ## Worker findings to annotate
85
+ ## Worker output to extract findings from
64
86
 
65
- \`\`\`json
66
- ${renderFindings(ctx.workerFindings)}
67
- \`\`\`
87
+ ${ctx.workerOutput}
68
88
 
69
- ${RUBRIC}`;
89
+ ${RUBRIC_TEMPLATE}`;
90
+ }
91
+ export function buildAuditQualityPrompt(ctx) {
92
+ return buildPrompt('audit', 'For each finding, ask: is this the kind of issue the audit asked for? A security audit should produce security findings, not style nits.', ctx);
70
93
  }
71
94
  export function buildReviewQualityPrompt(ctx) {
72
- return `You are reviewing a code review produced by a worker.
73
-
74
- The user requested a code review. The brief was:
75
-
76
- ${ctx.brief}
77
-
78
- ## On-brief check (per finding)
79
-
80
- For each worker finding, ask: is this within the requested focus area?
81
- A security review should produce security findings, not formatting nits.
82
-
83
- ## Worker findings to annotate
84
-
85
- \`\`\`json
86
- ${renderFindings(ctx.workerFindings)}
87
- \`\`\`
88
-
89
- ${RUBRIC}`;
95
+ return buildPrompt('code review', 'For each finding, ask: is this within the requested focus area? A security review should produce security findings, not formatting nits.', ctx);
90
96
  }
91
97
  export function buildVerifyQualityPrompt(ctx) {
92
- return `You are reviewing a verification report produced by a worker.
93
-
94
- The user provided a checklist of acceptance criteria. The brief was:
95
-
96
- ${ctx.brief}
97
-
98
- ## On-brief check (per finding)
99
-
100
- Each finding should map to one checklist item with evidence the criterion was
101
- met or unmet. Flag findings that don't correspond to any checklist item, or
102
- whose evidence doesn't actually demonstrate the claimed pass/fail status.
103
-
104
- ## Worker findings to annotate
105
-
106
- \`\`\`json
107
- ${renderFindings(ctx.workerFindings)}
108
- \`\`\`
109
-
110
- ${RUBRIC}`;
98
+ return buildPrompt('verification report', 'Each finding should map to one checklist item with evidence the criterion was met or unmet. Flag findings that do not correspond to any checklist item, or whose evidence does not actually demonstrate the claimed pass/fail status.', ctx);
111
99
  }
112
100
  export function buildInvestigateQualityPrompt(ctx) {
113
- return `You are reviewing a codebase investigation produced by a worker.
114
-
115
- The user asked a question. The brief was:
116
-
117
- ${ctx.brief}
118
-
119
- ## On-brief check (per finding)
120
-
121
- Each finding should be relevant to the question. Findings may be code-level
122
- (file:line cited in evidence) or project-level synthesis (what was searched,
123
- what was not found). Flag findings whose evidence does not support the claim
124
- or whose claim drifts from the question.
125
-
126
- ## Worker findings to annotate
127
-
128
- \`\`\`json
129
- ${renderFindings(ctx.workerFindings)}
130
- \`\`\`
131
-
132
- ${RUBRIC}`;
101
+ return buildPrompt('codebase investigation', 'Each finding should be relevant to the question. Findings may be code-level (file:line cited in evidence) or project-level synthesis (what was searched, what was not found). Flag findings whose evidence does not support the claim or whose claim drifts from the question.', ctx);
133
102
  }
134
103
  export function buildDebugQualityPrompt(ctx) {
135
- return `You are reviewing a debugging hypothesis produced by a worker.
136
-
137
- The user reported a failure. The brief was:
138
-
139
- ${ctx.brief}
140
-
141
- ## On-brief check (per finding)
142
-
143
- Each finding should be a hypothesis, root-cause claim, or evidence
144
- (reproducer, error pattern, code path). Flag findings that don't logically
145
- follow from cited evidence or that exceed what the trace actually shows.
146
-
147
- ## Worker findings to annotate
148
-
149
- \`\`\`json
150
- ${renderFindings(ctx.workerFindings)}
151
- \`\`\`
152
-
153
- ${RUBRIC}`;
104
+ return buildPrompt('debugging hypothesis', 'Each finding should be a hypothesis, root-cause claim, or evidence (reproducer, error pattern, code path). Flag findings that do not logically follow from cited evidence or that exceed what the trace actually shows.', ctx);
154
105
  }
155
106
  //# sourceMappingURL=quality-only-prompts.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"quality-only-prompts.js","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAUH,MAAM,MAAM,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiCd,CAAC,IAAI,EAAE,CAAC;AAET,SAAS,cAAc,CAAC,QAAyB;IAC/C,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO;;;;EAIP,GAAG,CAAC,KAAK;;;;;;;;;;EAUT,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;;;EAGlC,MAAM,EAAE,CAAC;AACX,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO;;;;EAIP,GAAG,CAAC,KAAK;;;;;;;;;;EAUT,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;;;EAGlC,MAAM,EAAE,CAAC;AACX,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO;;;;EAIP,GAAG,CAAC,KAAK;;;;;;;;;;;EAWT,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;;;EAGlC,MAAM,EAAE,CAAC;AACX,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,GAAkB;IAC9D,OAAO;;;;EAIP,GAAG,CAAC,KAAK;;;;;;;;;;;;EAYT,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;;;EAGlC,MAAM,EAAE,CAAC;AACX,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO;;;;EAIP,GAAG,CAAC,KAAK;;;;;;;;;;;EAWT,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;;;EAGlC,MAAM,EAAE,CAAC;AACX,CAAC"}
1
+ {"version":3,"file":"quality-only-prompts.js","sourceRoot":"","sources":["../../src/review/quality-only-prompts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAOH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,CAAA;;;;;;;CAOjC,GAAG,WAAW,GAAG;;;;;;;;;;;;;;;;;CAiBjB,GAAG,KAAK,GAAG;;;GAGT,GAAG,MAAM,GAAG;GACZ,GAAG,YAAY,GAAG;;;;;;;;GAQlB,GAAG,SAAS,GAAG;GACf,GAAG,YAAY,GAAG;;GAElB,GAAG,cAAc,GAAG;GACpB,GAAG,sBAAsB,GAAG;;;;;;;;wCAQS,GAAG,MAAM,GAAG;2CACT,GAAG,SAAS,GAAG;CACzD,CAAC,IAAI,EAAE,CAAC;AAET,SAAS,WAAW,CAAC,IAAY,EAAE,YAAoB,EAAE,GAAkB;IACzE,OAAO,uBAAuB,IAAI;;uBAEb,IAAI;;EAEzB,GAAG,CAAC,KAAK;;;;EAIT,YAAY;;;;EAIZ,GAAG,CAAC,YAAY;;EAEhB,eAAe,EAAE,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO,WAAW,CAChB,OAAO,EACP,0IAA0I,EAC1I,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO,WAAW,CAChB,aAAa,EACb,0IAA0I,EAC1I,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,GAAkB;IACzD,OAAO,WAAW,CAChB,qBAAqB,EACrB,uOAAuO,EACvO,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,GAAkB;IAC9D,OAAO,WAAW,CAChB,wBAAwB,EACxB,gRAAgR,EAChR,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAkB;IACxD,OAAO,WAAW,CAChB,sBAAsB,EACtB,yNAAyN,EACzN,GAAG,CACJ,CAAC;AACJ,CAAC"}