@pi-agents/orchid 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/LICENSE +21 -0
  3. package/README.md +246 -0
  4. package/agents/AGENTS-MANIFEST.md +42 -0
  5. package/agents/brain.md +42 -0
  6. package/agents/context-builder.md +46 -0
  7. package/agents/delegate.md +12 -0
  8. package/agents/dev-1.md +42 -0
  9. package/agents/oracle.md +73 -0
  10. package/agents/planner.md +55 -0
  11. package/agents/researcher.md +52 -0
  12. package/agents/reviewer.md +79 -0
  13. package/agents/scout.md +50 -0
  14. package/agents/tester.md +45 -0
  15. package/agents/worker.md +55 -0
  16. package/extensions/ralph.ts +1 -0
  17. package/extensions/reviewer-extension.ts +125 -0
  18. package/extensions/task-orchestrator.ts +28 -0
  19. package/package.json +63 -0
  20. package/prompts/gather-context-and-clarify.md +13 -0
  21. package/prompts/parallel-cleanup.md +59 -0
  22. package/prompts/parallel-context-build.md +53 -0
  23. package/prompts/parallel-handoff-plan.md +59 -0
  24. package/prompts/parallel-research.md +50 -0
  25. package/prompts/parallel-review.md +54 -0
  26. package/prompts/review-loop.md +41 -0
  27. package/skills/orchid/SKILL.md +214 -0
  28. package/skills/orchid/orchid-cleanup/SKILL.md +122 -0
  29. package/skills/orchid/orchid-converge/SKILL.md +124 -0
  30. package/skills/orchid/orchid-decompose/SKILL.md +201 -0
  31. package/skills/orchid/orchid-doctor/SKILL.md +162 -0
  32. package/skills/orchid/orchid-investigate/SKILL.md +102 -0
  33. package/skills/orchid/orchid-launch/SKILL.md +147 -0
  34. package/skills/ralph/SKILL.md +73 -0
  35. package/skills/subagents/pi-subagents/SKILL.md +813 -0
  36. package/src/index.ts +7 -0
  37. package/src/orchestrator/abort.ts +534 -0
  38. package/src/orchestrator/agent-bridge-extension.ts +1020 -0
  39. package/src/orchestrator/agent-host.ts +954 -0
  40. package/src/orchestrator/cleanup.ts +776 -0
  41. package/src/orchestrator/config-loader.ts +1412 -0
  42. package/src/orchestrator/config-schema.ts +690 -0
  43. package/src/orchestrator/config.ts +81 -0
  44. package/src/orchestrator/context-window.ts +66 -0
  45. package/src/orchestrator/diagnostic-reports.ts +475 -0
  46. package/src/orchestrator/diagnostics.ts +394 -0
  47. package/src/orchestrator/discovery.ts +1833 -0
  48. package/src/orchestrator/engine-worker.ts +415 -0
  49. package/src/orchestrator/engine.ts +5940 -0
  50. package/src/orchestrator/execution.ts +3104 -0
  51. package/src/orchestrator/extension.ts +5934 -0
  52. package/src/orchestrator/formatting.ts +785 -0
  53. package/src/orchestrator/git.ts +88 -0
  54. package/src/orchestrator/index.ts +28 -0
  55. package/src/orchestrator/lane-runner.ts +1787 -0
  56. package/src/orchestrator/mailbox.ts +780 -0
  57. package/src/orchestrator/merge.ts +3414 -0
  58. package/src/orchestrator/messages.ts +1062 -0
  59. package/src/orchestrator/migrations.ts +278 -0
  60. package/src/orchestrator/naming.ts +117 -0
  61. package/src/orchestrator/path-resolver.ts +275 -0
  62. package/src/orchestrator/persistence.ts +2625 -0
  63. package/src/orchestrator/process-registry.ts +452 -0
  64. package/src/orchestrator/quality-gate.ts +1085 -0
  65. package/src/orchestrator/resume.ts +3488 -0
  66. package/src/orchestrator/sessions.ts +57 -0
  67. package/src/orchestrator/settings-loader.ts +136 -0
  68. package/src/orchestrator/settings-tui.ts +2208 -0
  69. package/src/orchestrator/sidecar-telemetry.ts +267 -0
  70. package/src/orchestrator/supervisor.ts +4548 -0
  71. package/src/orchestrator/task-executor-core.ts +675 -0
  72. package/src/orchestrator/tmux-compat.ts +37 -0
  73. package/src/orchestrator/tool-allowlist-constants.ts +37 -0
  74. package/src/orchestrator/types.ts +4465 -0
  75. package/src/orchestrator/verification.ts +547 -0
  76. package/src/orchestrator/waves.ts +1564 -0
  77. package/src/orchestrator/workspace.ts +707 -0
  78. package/src/orchestrator/worktree.ts +2725 -0
  79. package/src/ralph/index.ts +825 -0
  80. package/src/subagents/agents/agent-management.ts +648 -0
  81. package/src/subagents/agents/agent-scope.ts +6 -0
  82. package/src/subagents/agents/agent-selection.ts +23 -0
  83. package/src/subagents/agents/agent-serializer.ts +86 -0
  84. package/src/subagents/agents/agents.ts +832 -0
  85. package/src/subagents/agents/chain-serializer.ts +137 -0
  86. package/src/subagents/agents/frontmatter.ts +29 -0
  87. package/src/subagents/agents/identity.ts +30 -0
  88. package/src/subagents/agents/skills.ts +632 -0
  89. package/src/subagents/extension/config.ts +16 -0
  90. package/src/subagents/extension/control-notices.ts +92 -0
  91. package/src/subagents/extension/doctor.ts +199 -0
  92. package/src/subagents/extension/fanout-child.ts +170 -0
  93. package/src/subagents/extension/index.ts +573 -0
  94. package/src/subagents/extension/schemas.ts +168 -0
  95. package/src/subagents/intercom/intercom-bridge.ts +379 -0
  96. package/src/subagents/intercom/result-intercom.ts +377 -0
  97. package/src/subagents/runs/background/async-execution.ts +712 -0
  98. package/src/subagents/runs/background/async-job-tracker.ts +310 -0
  99. package/src/subagents/runs/background/async-resume.ts +345 -0
  100. package/src/subagents/runs/background/async-status.ts +325 -0
  101. package/src/subagents/runs/background/completion-dedupe.ts +63 -0
  102. package/src/subagents/runs/background/notify.ts +108 -0
  103. package/src/subagents/runs/background/parallel-groups.ts +45 -0
  104. package/src/subagents/runs/background/result-watcher.ts +307 -0
  105. package/src/subagents/runs/background/run-id-resolver.ts +83 -0
  106. package/src/subagents/runs/background/run-status.ts +269 -0
  107. package/src/subagents/runs/background/stale-run-reconciler.ts +336 -0
  108. package/src/subagents/runs/background/subagent-runner.ts +1808 -0
  109. package/src/subagents/runs/background/top-level-async.ts +13 -0
  110. package/src/subagents/runs/foreground/chain-clarify.ts +1333 -0
  111. package/src/subagents/runs/foreground/chain-execution.ts +938 -0
  112. package/src/subagents/runs/foreground/execution.ts +918 -0
  113. package/src/subagents/runs/foreground/subagent-executor.ts +2527 -0
  114. package/src/subagents/runs/shared/completion-guard.ts +147 -0
  115. package/src/subagents/runs/shared/long-running-guard.ts +175 -0
  116. package/src/subagents/runs/shared/mcp-direct-tool-allowlist.ts +365 -0
  117. package/src/subagents/runs/shared/model-fallback.ts +103 -0
  118. package/src/subagents/runs/shared/nested-events.ts +819 -0
  119. package/src/subagents/runs/shared/nested-path.ts +52 -0
  120. package/src/subagents/runs/shared/nested-render.ts +115 -0
  121. package/src/subagents/runs/shared/parallel-utils.ts +109 -0
  122. package/src/subagents/runs/shared/pi-args.ts +220 -0
  123. package/src/subagents/runs/shared/pi-spawn.ts +115 -0
  124. package/src/subagents/runs/shared/run-history.ts +60 -0
  125. package/src/subagents/runs/shared/single-output.ts +164 -0
  126. package/src/subagents/runs/shared/subagent-control.ts +226 -0
  127. package/src/subagents/runs/shared/subagent-prompt-runtime.ts +170 -0
  128. package/src/subagents/runs/shared/worktree.ts +577 -0
  129. package/src/subagents/shared/artifacts.ts +98 -0
  130. package/src/subagents/shared/atomic-json.ts +16 -0
  131. package/src/subagents/shared/file-coalescer.ts +40 -0
  132. package/src/subagents/shared/fork-context.ts +76 -0
  133. package/src/subagents/shared/formatters.ts +133 -0
  134. package/src/subagents/shared/jsonl-writer.ts +81 -0
  135. package/src/subagents/shared/model-info.ts +78 -0
  136. package/src/subagents/shared/post-exit-stdio-guard.ts +85 -0
  137. package/src/subagents/shared/session-identity.ts +10 -0
  138. package/src/subagents/shared/session-tokens.ts +44 -0
  139. package/src/subagents/shared/settings.ts +397 -0
  140. package/src/subagents/shared/status-format.ts +49 -0
  141. package/src/subagents/shared/types.ts +822 -0
  142. package/src/subagents/shared/utils.ts +450 -0
  143. package/src/subagents/slash/prompt-template-bridge.ts +397 -0
  144. package/src/subagents/slash/slash-bridge.ts +174 -0
  145. package/src/subagents/slash/slash-commands.ts +528 -0
  146. package/src/subagents/slash/slash-live-state.ts +292 -0
  147. package/src/subagents/tui/render-helpers.ts +80 -0
  148. package/src/subagents/tui/render.ts +1358 -0
  149. package/templates/agents/local/supervisor.md +33 -0
  150. package/templates/agents/local/task-merger.md +27 -0
  151. package/templates/agents/local/task-reviewer.md +30 -0
  152. package/templates/agents/local/task-worker.md +34 -0
  153. package/templates/agents/supervisor-routing.md +92 -0
  154. package/templates/agents/supervisor.md +229 -0
  155. package/templates/agents/task-merger.md +214 -0
  156. package/templates/agents/task-reviewer.md +260 -0
  157. package/templates/agents/task-worker-segment.md +44 -0
  158. package/templates/agents/task-worker.md +557 -0
  159. package/templates/tasks/CONTEXT.md +30 -0
  160. package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +98 -0
  161. package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
  162. package/templates/tasks/EXAMPLE-002-parallel-smoke/PROMPT.md +97 -0
  163. package/templates/tasks/EXAMPLE-002-parallel-smoke/STATUS.md +73 -0
@@ -0,0 +1,1085 @@
1
+ /**
2
+ * Quality Gate — structured post-completion review types and verdict evaluation.
3
+ *
4
+ * This module defines the interfaces for quality gate review verdicts and
5
+ * implements the verdict evaluation logic used by the task-runner to decide
6
+ * whether a task passes or needs fixes before `.DONE` creation.
7
+ *
8
+ * Verdict rules (from roadmap Phase 5a):
9
+ * - Any `critical` finding → NEEDS_FIXES
10
+ * - 3+ `important` findings → NEEDS_FIXES
11
+ * - Only `suggestion` findings → PASS
12
+ * - Any `status_mismatch` category → NEEDS_FIXES
13
+ *
14
+ * Fail-open behavior: malformed or missing verdict JSON → PASS
15
+ * (prevents quality gate bugs from blocking task completion)
16
+ *
17
+ * @module quality-gate
18
+ */
19
+
20
+ import type { PassThreshold } from "./config-schema.ts";
21
+ import { readFileSync, writeFileSync, existsSync } from "fs";
22
+ import { join } from "path";
23
+ import { spawnSync } from "child_process";
24
+
25
+ // ── Verdict Interfaces ───────────────────────────────────────────────
26
+
27
+ /** Severity levels for review findings, ordered by decreasing severity. */
28
+ export type FindingSeverity = "critical" | "important" | "suggestion";
29
+
30
+ /** Categories of review findings. */
31
+ export type FindingCategory =
32
+ | "missing_requirement"
33
+ | "incorrect_implementation"
34
+ | "incomplete_work"
35
+ | "status_mismatch";
36
+
37
+ /** A single finding from the quality gate review. */
38
+ export interface ReviewFinding {
39
+ /** Severity of the finding */
40
+ severity: FindingSeverity;
41
+ /** Category classifying what kind of issue was found */
42
+ category: FindingCategory;
43
+ /** Human-readable description of the issue */
44
+ description: string;
45
+ /** File path related to the finding (may be empty) */
46
+ file: string;
47
+ /** Specific fix instruction for the remediation agent */
48
+ remediation: string;
49
+ }
50
+
51
+ /** STATUS.md checkbox reconciliation entry. */
52
+ export interface StatusReconciliation {
53
+ /** Original checkbox text from STATUS.md */
54
+ checkbox: string;
55
+ /** Actual state determined by review */
56
+ actualState: "done" | "not_done" | "partial";
57
+ /** Evidence supporting the state determination */
58
+ evidence: string;
59
+ }
60
+
61
+ /** Overall quality gate verdict from the review agent. */
62
+ export interface ReviewVerdict {
63
+ /** Pass/fail verdict */
64
+ verdict: "PASS" | "NEEDS_FIXES";
65
+ /** Review agent confidence level */
66
+ confidence: "high" | "medium" | "low";
67
+ /** Brief overall assessment */
68
+ summary: string;
69
+ /** Individual findings from the review */
70
+ findings: ReviewFinding[];
71
+ /** STATUS.md checkbox reconciliation results */
72
+ statusReconciliation: StatusReconciliation[];
73
+ }
74
+
75
+ // ── Verdict Evaluation ───────────────────────────────────────────────
76
+
77
+ /** Reason why a verdict was determined to be NEEDS_FIXES. */
78
+ export interface VerdictFailReason {
79
+ /** Rule that triggered the failure */
80
+ rule: "critical_finding" | "important_threshold" | "status_mismatch" | "verdict_says_needs_fixes";
81
+ /** Human-readable explanation */
82
+ detail: string;
83
+ }
84
+
85
+ /** Result of applying verdict rules to a parsed ReviewVerdict. */
86
+ export interface VerdictEvaluation {
87
+ /** Whether the task passes the quality gate */
88
+ pass: boolean;
89
+ /** Reasons for failure (empty array if pass is true) */
90
+ failReasons: VerdictFailReason[];
91
+ }
92
+
93
+ /**
94
+ * Apply verdict rules to determine pass/fail based on findings and threshold.
95
+ *
96
+ * Rules applied in order:
97
+ * 1. Any finding with category `status_mismatch` → NEEDS_FIXES
98
+ * 2. Any finding with severity `critical` → NEEDS_FIXES
99
+ * 3. Threshold-dependent important finding count check
100
+ * 4. If verdict itself says NEEDS_FIXES → respect it
101
+ *
102
+ * Threshold behavior:
103
+ * - `no_critical`: PASS if no critical findings and no status mismatches
104
+ * - `no_important`: PASS if no critical, fewer than 3 important, no status mismatches
105
+ * - `all_clear`: PASS only if zero findings of any severity
106
+ *
107
+ * @param verdict - Parsed review verdict
108
+ * @param threshold - Configured pass threshold
109
+ * @returns Evaluation result with pass/fail and reasons
110
+ */
111
+ export function applyVerdictRules(
112
+ verdict: ReviewVerdict,
113
+ threshold: PassThreshold,
114
+ ): VerdictEvaluation {
115
+ const failReasons: VerdictFailReason[] = [];
116
+
117
+ // Rule 1: Any status_mismatch category → NEEDS_FIXES
118
+ const statusMismatches = verdict.findings.filter((f) => f.category === "status_mismatch");
119
+ if (statusMismatches.length > 0) {
120
+ failReasons.push({
121
+ rule: "status_mismatch",
122
+ detail: `${statusMismatches.length} status mismatch(es) found — checked boxes don't match actual work`,
123
+ });
124
+ }
125
+
126
+ // Rule 2: Any critical finding → NEEDS_FIXES
127
+ const criticals = verdict.findings.filter((f) => f.severity === "critical");
128
+ if (criticals.length > 0) {
129
+ failReasons.push({
130
+ rule: "critical_finding",
131
+ detail: `${criticals.length} critical finding(s)`,
132
+ });
133
+ }
134
+
135
+ // Rule 3: Threshold-dependent important check
136
+ const importants = verdict.findings.filter((f) => f.severity === "important");
137
+
138
+ if (threshold === "no_important" && importants.length >= 3) {
139
+ failReasons.push({
140
+ rule: "important_threshold",
141
+ detail: `${importants.length} important findings (threshold: fewer than 3 required for pass)`,
142
+ });
143
+ }
144
+
145
+ if (threshold === "all_clear" && verdict.findings.length > 0) {
146
+ // For all_clear, any finding of any severity blocks pass
147
+ if (importants.length > 0 && failReasons.every((r) => r.rule !== "important_threshold")) {
148
+ failReasons.push({
149
+ rule: "important_threshold",
150
+ detail: `${importants.length} important finding(s) (all_clear threshold: zero findings required)`,
151
+ });
152
+ }
153
+ // Suggestions also block under all_clear — but we don't need a separate rule
154
+ // since we'll catch it via the verdict_says_needs_fixes or the overall pass logic
155
+ }
156
+
157
+ // Rule 4: If the verdict itself says NEEDS_FIXES and we haven't already failed
158
+ if (verdict.verdict === "NEEDS_FIXES" && failReasons.length === 0) {
159
+ failReasons.push({
160
+ rule: "verdict_says_needs_fixes",
161
+ detail: `Review agent verdict: NEEDS_FIXES — ${verdict.summary}`,
162
+ });
163
+ }
164
+
165
+ // For all_clear threshold: even suggestions-only should fail
166
+ if (threshold === "all_clear" && failReasons.length === 0 && verdict.findings.length > 0) {
167
+ const suggestions = verdict.findings.filter((f) => f.severity === "suggestion");
168
+ if (suggestions.length > 0) {
169
+ failReasons.push({
170
+ rule: "important_threshold",
171
+ detail: `${suggestions.length} suggestion(s) found (all_clear threshold: zero findings required)`,
172
+ });
173
+ }
174
+ }
175
+
176
+ return {
177
+ pass: failReasons.length === 0,
178
+ failReasons,
179
+ };
180
+ }
181
+
182
+ // ── Verdict Parsing ──────────────────────────────────────────────────
183
+
184
+ /** Sentinel verdict returned when parsing fails (fail-open). */
185
+ const FAIL_OPEN_VERDICT: ReviewVerdict = {
186
+ verdict: "PASS",
187
+ confidence: "low",
188
+ summary: "Verdict could not be parsed — fail-open policy applied",
189
+ findings: [],
190
+ statusReconciliation: [],
191
+ };
192
+
193
+ /**
194
+ * Parse a JSON string into a ReviewVerdict, with fail-open behavior.
195
+ *
196
+ * If the input is missing, empty, or malformed JSON, returns a PASS verdict
197
+ * (fail-open) to prevent quality gate bugs from blocking task completion.
198
+ *
199
+ * Performs structural validation:
200
+ * - `verdict` must be "PASS" or "NEEDS_FIXES"
201
+ * - `findings` must be an array (defaults to [] if missing)
202
+ * - `statusReconciliation` must be an array (defaults to [] if missing)
203
+ * - Individual findings are validated and malformed entries are dropped
204
+ *
205
+ * @param jsonString - Raw JSON string from review agent output
206
+ * @returns Parsed and validated ReviewVerdict (never throws)
207
+ */
208
+ export function parseVerdict(jsonString: string | undefined | null): ReviewVerdict {
209
+ if (!jsonString || jsonString.trim() === "") {
210
+ return { ...FAIL_OPEN_VERDICT, summary: "No verdict provided — fail-open policy applied" };
211
+ }
212
+
213
+ let raw: unknown;
214
+ try {
215
+ raw = JSON.parse(jsonString);
216
+ } catch {
217
+ return { ...FAIL_OPEN_VERDICT, summary: "Malformed JSON in verdict — fail-open policy applied" };
218
+ }
219
+
220
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
221
+ return {
222
+ ...FAIL_OPEN_VERDICT,
223
+ summary: "Verdict is not a JSON object — fail-open policy applied",
224
+ };
225
+ }
226
+
227
+ const obj = raw as Record<string, unknown>;
228
+
229
+ // Validate verdict field
230
+ const verdict = obj.verdict;
231
+ if (verdict !== "PASS" && verdict !== "NEEDS_FIXES") {
232
+ return {
233
+ ...FAIL_OPEN_VERDICT,
234
+ summary: `Invalid verdict value "${String(verdict)}" — fail-open policy applied`,
235
+ };
236
+ }
237
+
238
+ // Parse confidence with fallback
239
+ const validConfidence = ["high", "medium", "low"];
240
+ const confidence = validConfidence.includes(obj.confidence as string)
241
+ ? (obj.confidence as "high" | "medium" | "low")
242
+ : "medium";
243
+
244
+ // Parse summary with fallback
245
+ const summary = typeof obj.summary === "string" ? obj.summary : "";
246
+
247
+ // Parse and validate findings
248
+ const findings = validateFindings(obj.findings);
249
+
250
+ // Parse and validate statusReconciliation
251
+ const statusReconciliation = validateReconciliations(obj.statusReconciliation);
252
+
253
+ return {
254
+ verdict,
255
+ confidence,
256
+ summary,
257
+ findings,
258
+ statusReconciliation,
259
+ };
260
+ }
261
+
262
+ // ── Internal Validation Helpers ──────────────────────────────────────
263
+
264
+ const VALID_SEVERITIES: FindingSeverity[] = ["critical", "important", "suggestion"];
265
+ const VALID_CATEGORIES: FindingCategory[] = [
266
+ "missing_requirement",
267
+ "incorrect_implementation",
268
+ "incomplete_work",
269
+ "status_mismatch",
270
+ ];
271
+ const VALID_STATES = ["done", "not_done", "partial"];
272
+
273
+ /**
274
+ * Validate and normalize the findings array.
275
+ * Drops individual entries that don't have minimum required fields.
276
+ */
277
+ function validateFindings(raw: unknown): ReviewFinding[] {
278
+ if (!Array.isArray(raw)) return [];
279
+
280
+ const validated: ReviewFinding[] = [];
281
+ for (const item of raw) {
282
+ if (typeof item !== "object" || item === null) continue;
283
+ const f = item as Record<string, unknown>;
284
+
285
+ // Severity is required and must be valid
286
+ if (!VALID_SEVERITIES.includes(f.severity as FindingSeverity)) continue;
287
+
288
+ // Category is required and must be valid
289
+ if (!VALID_CATEGORIES.includes(f.category as FindingCategory)) continue;
290
+
291
+ // Description is required
292
+ if (typeof f.description !== "string" || f.description.trim() === "") continue;
293
+
294
+ validated.push({
295
+ severity: f.severity as FindingSeverity,
296
+ category: f.category as FindingCategory,
297
+ description: f.description as string,
298
+ file: typeof f.file === "string" ? f.file : "",
299
+ remediation: typeof f.remediation === "string" ? f.remediation : "",
300
+ });
301
+ }
302
+
303
+ return validated;
304
+ }
305
+
306
+ /**
307
+ * Validate and normalize the statusReconciliation array.
308
+ * Drops individual entries that don't have minimum required fields.
309
+ */
310
+ function validateReconciliations(raw: unknown): StatusReconciliation[] {
311
+ if (!Array.isArray(raw)) return [];
312
+
313
+ const validated: StatusReconciliation[] = [];
314
+ for (const item of raw) {
315
+ if (typeof item !== "object" || item === null) continue;
316
+ const r = item as Record<string, unknown>;
317
+
318
+ if (typeof r.checkbox !== "string" || r.checkbox.trim() === "") continue;
319
+ if (!VALID_STATES.includes(r.actualState as string)) continue;
320
+
321
+ validated.push({
322
+ checkbox: r.checkbox as string,
323
+ actualState: r.actualState as "done" | "not_done" | "partial",
324
+ evidence: typeof r.evidence === "string" ? r.evidence : "",
325
+ });
326
+ }
327
+
328
+ return validated;
329
+ }
330
+
331
+ // ── Quality Gate Review Prompt ───────────────────────────────────────
332
+
333
+ /** Information needed to build the quality gate review evidence package. */
334
+ export interface QualityGateContext {
335
+ /** Absolute path to task folder */
336
+ taskFolder: string;
337
+ /** Absolute path to PROMPT.md */
338
+ promptPath: string;
339
+ /** Task ID (e.g., "TP-034") */
340
+ taskId: string;
341
+ /** Project name from config */
342
+ projectName: string;
343
+ /** Pass threshold from config */
344
+ passThreshold: PassThreshold;
345
+ }
346
+
347
+ /** Path where the quality gate verdict JSON file is written by the review agent. */
348
+ export const VERDICT_FILENAME = "REVIEW_VERDICT.json";
349
+
350
+ /**
351
+ * Compute a robust diff range for the task's git changes.
352
+ *
353
+ * Strategy (in order):
354
+ * 1. `git merge-base HEAD main` — ideal for topic branches
355
+ * 2. `git merge-base HEAD origin/main` — fallback for detached/worktree checkouts
356
+ * 3. `HEAD~N` where N = min(commit count, 50) — bounded fallback for repos
357
+ * without a main branch or with shallow history
358
+ * 4. Empty string (signals diff unavailable)
359
+ */
360
+ function computeDiffBase(cwd: string): string {
361
+ const opts = { encoding: "utf-8" as const, cwd, timeout: 15000 };
362
+
363
+ // Try merge-base with local main
364
+ for (const ref of ["main", "origin/main", "master", "origin/master"]) {
365
+ const result = spawnSync("git", ["merge-base", "HEAD", ref], opts);
366
+ if (result.status === 0 && result.stdout.trim()) {
367
+ return result.stdout.trim();
368
+ }
369
+ }
370
+
371
+ // Fallback: count commits and use HEAD~N (bounded)
372
+ const countResult = spawnSync("git", ["rev-list", "--count", "HEAD"], opts);
373
+ if (countResult.status === 0) {
374
+ const count = parseInt(countResult.stdout.trim(), 10);
375
+ if (count > 1) {
376
+ const n = Math.min(count - 1, 50);
377
+ return `HEAD~${n}`;
378
+ }
379
+ }
380
+
381
+ return "";
382
+ }
383
+
384
+ /**
385
+ * Build the git diff for the entire task.
386
+ *
387
+ * Uses `computeDiffBase()` to find a robust baseline, then runs `git diff`
388
+ * between that base and HEAD. Falls back gracefully when git is unavailable
389
+ * or the repository has insufficient history.
390
+ */
391
+ function buildGitDiff(cwd: string): { diff: string; fileList: string } {
392
+ try {
393
+ const base = computeDiffBase(cwd);
394
+ if (!base) {
395
+ return {
396
+ diff: "(git diff unavailable — could not determine base)",
397
+ fileList: "(file list unavailable)",
398
+ };
399
+ }
400
+
401
+ const range = `${base}..HEAD`;
402
+
403
+ // Get file list of changed files
404
+ const fileListResult = spawnSync("git", ["diff", "--name-only", range], {
405
+ encoding: "utf-8",
406
+ cwd,
407
+ timeout: 30000,
408
+ });
409
+ const fileList = fileListResult.status === 0 ? fileListResult.stdout.trim() : "";
410
+
411
+ // Get full diff (truncated to avoid blowing up context)
412
+ const diffResult = spawnSync("git", ["diff", range], {
413
+ encoding: "utf-8",
414
+ cwd,
415
+ timeout: 30000,
416
+ maxBuffer: 200 * 1024, // 200KB max
417
+ });
418
+ const diff = diffResult.status === 0 ? diffResult.stdout.trim() : "(git diff unavailable)";
419
+
420
+ return { diff, fileList };
421
+ } catch {
422
+ return { diff: "(git diff failed)", fileList: "(file list unavailable)" };
423
+ }
424
+ }
425
+
426
+ /**
427
+ * Generate the quality gate review prompt that instructs the review agent
428
+ * to produce a structured JSON verdict.
429
+ *
430
+ * The prompt includes:
431
+ * - PROMPT.md content (task requirements)
432
+ * - STATUS.md content (declared progress)
433
+ * - Git diff of all task changes
434
+ * - File change list
435
+ * - JSON schema for the verdict
436
+ * - Instructions for fail criteria
437
+ *
438
+ * @param context - Task context for evidence building
439
+ * @param cwd - Working directory for git commands
440
+ * @returns Review prompt string
441
+ */
442
+ /**
443
+ * Build threshold-specific verdict rule lines for the review prompt.
444
+ *
445
+ * This ensures the reviewer's instructions match the runtime behavior of
446
+ * `applyVerdictRules()` — preventing false failures caused by the reviewer
447
+ * emitting NEEDS_FIXES for findings that the runtime threshold would ignore.
448
+ */
449
+ function buildThresholdRules(threshold: PassThreshold): string[] {
450
+ const rules: string[] = [];
451
+
452
+ // Common rules — always apply
453
+ rules.push(
454
+ `- **NEEDS_FIXES** if any finding has category \`status_mismatch\` (checkbox claims work is done but it isn't)`,
455
+ );
456
+ rules.push(`- **NEEDS_FIXES** if any finding has severity \`critical\``);
457
+
458
+ // Threshold-specific rules
459
+ switch (threshold) {
460
+ case "no_critical":
461
+ rules.push(
462
+ `- **PASS** even if there are \`important\` or \`suggestion\` findings (threshold: \`no_critical\`)`,
463
+ );
464
+ break;
465
+ case "no_important":
466
+ rules.push(`- **NEEDS_FIXES** if 3 or more findings have severity \`important\``);
467
+ rules.push(`- **PASS** if only \`suggestion\`-level findings remain`);
468
+ break;
469
+ case "all_clear":
470
+ rules.push(`- **NEEDS_FIXES** if ANY findings exist (including \`suggestion\`-level)`);
471
+ break;
472
+ }
473
+
474
+ rules.push(`- **PASS** if no findings at all`);
475
+ rules.push(``);
476
+
477
+ return rules;
478
+ }
479
+
480
+ export function generateQualityGatePrompt(context: QualityGateContext, cwd: string): string {
481
+ const statusPath = join(context.taskFolder, "STATUS.md");
482
+ const verdictPath = join(context.taskFolder, VERDICT_FILENAME);
483
+
484
+ // Read evidence files
485
+ let promptContent = "(PROMPT.md not found)";
486
+ try {
487
+ if (existsSync(context.promptPath)) {
488
+ promptContent = readFileSync(context.promptPath, "utf-8");
489
+ }
490
+ } catch {
491
+ /* fail-open: proceed without */
492
+ }
493
+
494
+ let statusContent = "(STATUS.md not found)";
495
+ try {
496
+ if (existsSync(statusPath)) {
497
+ statusContent = readFileSync(statusPath, "utf-8");
498
+ }
499
+ } catch {
500
+ /* fail-open: proceed without */
501
+ }
502
+
503
+ const { diff, fileList } = buildGitDiff(cwd);
504
+
505
+ // Truncate diff if too long (keep first 100KB)
506
+ const maxDiffLen = 100 * 1024;
507
+ const truncatedDiff =
508
+ diff.length > maxDiffLen
509
+ ? diff.slice(0, maxDiffLen) + "\n\n... (diff truncated at 100KB) ..."
510
+ : diff;
511
+
512
+ return [
513
+ `# Quality Gate Review`,
514
+ ``,
515
+ `You are performing a structured post-completion quality gate review for task **${context.taskId}** in project **${context.projectName}**.`,
516
+ ``,
517
+ `Your job is to verify that the task was completed correctly by comparing the PROMPT requirements against the actual code changes and STATUS.md progress claims.`,
518
+ ``,
519
+ `## Task Requirements (PROMPT.md)`,
520
+ ``,
521
+ `\`\`\`markdown`,
522
+ promptContent,
523
+ `\`\`\``,
524
+ ``,
525
+ `## Declared Progress (STATUS.md)`,
526
+ ``,
527
+ `\`\`\`markdown`,
528
+ statusContent,
529
+ `\`\`\``,
530
+ ``,
531
+ `## Changed Files`,
532
+ ``,
533
+ `\`\`\``,
534
+ fileList,
535
+ `\`\`\``,
536
+ ``,
537
+ `## Git Diff`,
538
+ ``,
539
+ `\`\`\`diff`,
540
+ truncatedDiff,
541
+ `\`\`\``,
542
+ ``,
543
+ `## Instructions`,
544
+ ``,
545
+ `1. **Read the PROMPT.md requirements** carefully — identify every deliverable and acceptance criterion.`,
546
+ `2. **Cross-check STATUS.md checkboxes** — verify each checked item actually has corresponding code/test changes in the diff.`,
547
+ `3. **Review the git diff** — look for missing implementations, incorrect logic, incomplete work.`,
548
+ `4. **Use tools** to read actual source files if the diff is unclear.`,
549
+ `5. **Produce your verdict** as a JSON object written to the file specified below.`,
550
+ ``,
551
+ `## Verdict Rules`,
552
+ ``,
553
+ `Report ALL findings you discover with accurate severities. The runtime will`,
554
+ `apply the configured pass threshold (\`${context.passThreshold}\`) to decide pass/fail.`,
555
+ ``,
556
+ `Use these rules to determine your verdict:`,
557
+ ...buildThresholdRules(context.passThreshold),
558
+ ``,
559
+ `## Output Format`,
560
+ ``,
561
+ `Write a JSON file to: \`${verdictPath}\``,
562
+ ``,
563
+ `The JSON must conform to this schema:`,
564
+ ``,
565
+ `\`\`\`json`,
566
+ `{`,
567
+ ` "verdict": "PASS" | "NEEDS_FIXES",`,
568
+ ` "confidence": "high" | "medium" | "low",`,
569
+ ` "summary": "Brief overall assessment",`,
570
+ ` "findings": [`,
571
+ ` {`,
572
+ ` "severity": "critical" | "important" | "suggestion",`,
573
+ ` "category": "missing_requirement" | "incorrect_implementation" | "incomplete_work" | "status_mismatch",`,
574
+ ` "description": "What is wrong",`,
575
+ ` "file": "path/to/file.ts",`,
576
+ ` "remediation": "Specific fix instruction"`,
577
+ ` }`,
578
+ ` ],`,
579
+ ` "statusReconciliation": [`,
580
+ ` {`,
581
+ ` "checkbox": "Original checkbox text",`,
582
+ ` "actualState": "done" | "not_done" | "partial",`,
583
+ ` "evidence": "How you verified"`,
584
+ ` }`,
585
+ ` ]`,
586
+ `}`,
587
+ `\`\`\``,
588
+ ``,
589
+ `**IMPORTANT:** Write ONLY valid JSON to the verdict file. No markdown, no explanation — just the JSON object.`,
590
+ ``,
591
+ ].join("\n");
592
+ }
593
+
594
+ // ── Quality Gate Result ──────────────────────────────────────────────
595
+
596
+ /** Result of a quality gate review cycle. */
597
+ export interface QualityGateResult {
598
+ /** Whether the task passed the quality gate */
599
+ passed: boolean;
600
+ /** Parsed verdict from the review agent (fail-open sentinel if parsing failed) */
601
+ verdict: ReviewVerdict;
602
+ /** Evaluation of verdict rules against threshold */
603
+ evaluation: VerdictEvaluation;
604
+ /** Number of review cycles consumed so far */
605
+ cyclesUsed: number;
606
+ /** Whether the gate was skipped because it's disabled */
607
+ skipped: boolean;
608
+ }
609
+
610
+ /**
611
+ * Read and evaluate the quality gate verdict file from the task folder.
612
+ *
613
+ * Handles all fail-open paths:
614
+ * - Missing verdict file → synthetic PASS
615
+ * - Malformed JSON → synthetic PASS
616
+ * - Invalid verdict structure → synthetic PASS
617
+ *
618
+ * @param taskFolder - Absolute path to task folder
619
+ * @param passThreshold - Configured pass threshold
620
+ * @returns Evaluated quality gate result
621
+ */
622
+ export function readAndEvaluateVerdict(
623
+ taskFolder: string,
624
+ passThreshold: PassThreshold,
625
+ ): { verdict: ReviewVerdict; evaluation: VerdictEvaluation } {
626
+ const verdictPath = join(taskFolder, VERDICT_FILENAME);
627
+
628
+ let rawJson: string | null = null;
629
+ try {
630
+ if (existsSync(verdictPath)) {
631
+ rawJson = readFileSync(verdictPath, "utf-8");
632
+ }
633
+ } catch {
634
+ // File read error → fail-open
635
+ }
636
+
637
+ const verdict = parseVerdict(rawJson);
638
+ const evaluation = applyVerdictRules(verdict, passThreshold);
639
+
640
+ return { verdict, evaluation };
641
+ }
642
+
643
+ // ── STATUS.md Reconciliation ─────────────────────────────────────────
644
+
645
+ /** Result of applying status reconciliation to STATUS.md. */
646
+ export interface ReconciliationResult {
647
+ /** Number of checkboxes whose state was changed */
648
+ changed: number;
649
+ /** Number of reconciliation entries that matched but required no change */
650
+ alreadyCorrect: number;
651
+ /** Number of reconciliation entries that could not be matched to a checkbox */
652
+ unmatched: number;
653
+ /** Details of each action taken */
654
+ actions: ReconciliationAction[];
655
+ }
656
+
657
+ /** A single reconciliation action applied (or skipped). */
658
+ export interface ReconciliationAction {
659
+ /** The checkbox text from the reconciliation entry */
660
+ checkbox: string;
661
+ /** What happened */
662
+ outcome: "checked" | "unchecked" | "no_change" | "unmatched";
663
+ /** Human-readable reason */
664
+ reason: string;
665
+ }
666
+
667
+ /**
668
+ * Normalize checkbox text for fuzzy matching.
669
+ *
670
+ * Strips markdown formatting, collapses whitespace, lowercases, and removes
671
+ * leading punctuation/bullets. This allows reconciliation entries (which come
672
+ * from the review agent's paraphrasing) to match STATUS.md checkboxes that
673
+ * may differ in whitespace, casing, or minor formatting.
674
+ */
675
+ function normalizeCheckboxText(text: string): string {
676
+ return text
677
+ .replace(/\*\*|__|``|`/g, "") // strip bold/code formatting
678
+ .replace(/\s+/g, " ") // collapse whitespace
679
+ .replace(/^\s*[-*•]\s*/, "") // strip leading bullets
680
+ .trim()
681
+ .toLowerCase();
682
+ }
683
+
684
+ /**
685
+ * Apply statusReconciliation entries to STATUS.md checkboxes.
686
+ *
687
+ * For each reconciliation entry:
688
+ * - `done` → ensure checkbox is checked (`[x]`)
689
+ * - `not_done` → ensure checkbox is unchecked (`[ ]`)
690
+ * - `partial` → ensure checkbox is unchecked (`[ ]`) with "(partial)" annotation
691
+ *
692
+ * Matching strategy: normalize both the reconciliation `checkbox` text and the
693
+ * STATUS.md checkbox text, then match by substring containment (reconciliation
694
+ * text contained in STATUS line or vice versa). First match wins — duplicates
695
+ * are logged as "unmatched" after the first match is consumed.
696
+ *
697
+ * Idempotency: if a checkbox already has the correct state, no change is made.
698
+ * If no net changes occur, STATUS.md is not rewritten.
699
+ *
700
+ * @param statusPath - Absolute path to STATUS.md
701
+ * @param reconciliations - Array of reconciliation entries from the verdict
702
+ * @returns Summary of changes applied
703
+ */
704
+ export function applyStatusReconciliation(
705
+ statusPath: string,
706
+ reconciliations: StatusReconciliation[],
707
+ ): ReconciliationResult {
708
+ const result: ReconciliationResult = {
709
+ changed: 0,
710
+ alreadyCorrect: 0,
711
+ unmatched: 0,
712
+ actions: [],
713
+ };
714
+
715
+ if (!reconciliations || reconciliations.length === 0) {
716
+ return result;
717
+ }
718
+
719
+ let content: string;
720
+ try {
721
+ if (!existsSync(statusPath)) {
722
+ // No STATUS.md — mark all as unmatched
723
+ for (const r of reconciliations) {
724
+ result.unmatched++;
725
+ result.actions.push({
726
+ checkbox: r.checkbox,
727
+ outcome: "unmatched",
728
+ reason: "STATUS.md not found",
729
+ });
730
+ }
731
+ return result;
732
+ }
733
+ content = readFileSync(statusPath, "utf-8");
734
+ } catch {
735
+ for (const r of reconciliations) {
736
+ result.unmatched++;
737
+ result.actions.push({
738
+ checkbox: r.checkbox,
739
+ outcome: "unmatched",
740
+ reason: "STATUS.md unreadable",
741
+ });
742
+ }
743
+ return result;
744
+ }
745
+
746
+ // Parse lines, identify checkbox lines with their indices
747
+ const lines = content.split("\n");
748
+ const checkboxRegex = /^(\s*-\s*\[)([ xX])(\]\s*)(.*)/;
749
+
750
+ // Track which line indices have been consumed by a reconciliation match
751
+ const consumed = new Set<number>();
752
+
753
+ for (const recon of reconciliations) {
754
+ const normalizedRecon = normalizeCheckboxText(recon.checkbox);
755
+ if (!normalizedRecon) {
756
+ result.unmatched++;
757
+ result.actions.push({
758
+ checkbox: recon.checkbox,
759
+ outcome: "unmatched",
760
+ reason: "Empty checkbox text after normalization",
761
+ });
762
+ continue;
763
+ }
764
+
765
+ // Find the best matching checkbox line (first unconsumed match)
766
+ let matchedIdx = -1;
767
+ for (let i = 0; i < lines.length; i++) {
768
+ if (consumed.has(i)) continue;
769
+ const cbMatch = lines[i].match(checkboxRegex);
770
+ if (!cbMatch) continue;
771
+
772
+ const lineText = normalizeCheckboxText(cbMatch[4]);
773
+ // Match if either contains the other (handles paraphrasing)
774
+ if (
775
+ lineText === normalizedRecon ||
776
+ lineText.includes(normalizedRecon) ||
777
+ normalizedRecon.includes(lineText)
778
+ ) {
779
+ matchedIdx = i;
780
+ break;
781
+ }
782
+ }
783
+
784
+ if (matchedIdx === -1) {
785
+ result.unmatched++;
786
+ result.actions.push({
787
+ checkbox: recon.checkbox,
788
+ outcome: "unmatched",
789
+ reason: "No matching checkbox found in STATUS.md",
790
+ });
791
+ continue;
792
+ }
793
+
794
+ consumed.add(matchedIdx);
795
+ const cbMatch = lines[matchedIdx].match(checkboxRegex)!;
796
+ const currentlyChecked = cbMatch[2].toLowerCase() === "x";
797
+ const currentText = cbMatch[4];
798
+
799
+ // Determine desired state
800
+ const shouldBeChecked = recon.actualState === "done";
801
+ // partial → uncheck (conservative: don't claim done)
802
+
803
+ if (shouldBeChecked && currentlyChecked) {
804
+ // Already correct
805
+ result.alreadyCorrect++;
806
+ result.actions.push({
807
+ checkbox: recon.checkbox,
808
+ outcome: "no_change",
809
+ reason: "Already checked (done)",
810
+ });
811
+ } else if (!shouldBeChecked && !currentlyChecked) {
812
+ // Already correct (unchecked for not_done or partial)
813
+ // But if partial, might need annotation
814
+ if (recon.actualState === "partial" && !currentText.includes("(partial)")) {
815
+ // Add partial annotation
816
+ lines[matchedIdx] = `${cbMatch[1]} ${cbMatch[3]}${currentText} (partial)`;
817
+ result.changed++;
818
+ result.actions.push({
819
+ checkbox: recon.checkbox,
820
+ outcome: "unchecked",
821
+ reason: "Added (partial) annotation",
822
+ });
823
+ } else {
824
+ result.alreadyCorrect++;
825
+ result.actions.push({
826
+ checkbox: recon.checkbox,
827
+ outcome: "no_change",
828
+ reason: `Already unchecked (${recon.actualState})`,
829
+ });
830
+ }
831
+ } else if (shouldBeChecked && !currentlyChecked) {
832
+ // Need to check
833
+ lines[matchedIdx] = `${cbMatch[1]}x${cbMatch[3]}${currentText}`;
834
+ result.changed++;
835
+ result.actions.push({
836
+ checkbox: recon.checkbox,
837
+ outcome: "checked",
838
+ reason: "Work done but box was unchecked",
839
+ });
840
+ } else {
841
+ // currentlyChecked but should not be (not_done or partial)
842
+ const annotation = recon.actualState === "partial" ? " (partial)" : "";
843
+ const cleanText = currentText.replace(/\s*\(partial\)\s*$/, "");
844
+ lines[matchedIdx] = `${cbMatch[1]} ${cbMatch[3]}${cleanText}${annotation}`;
845
+ result.changed++;
846
+ const outcomeReason =
847
+ recon.actualState === "partial"
848
+ ? "Unchecked — work partially done"
849
+ : "Unchecked — work not done";
850
+ result.actions.push({ checkbox: recon.checkbox, outcome: "unchecked", reason: outcomeReason });
851
+ }
852
+ }
853
+
854
+ // Only rewrite if there were actual changes
855
+ if (result.changed > 0) {
856
+ try {
857
+ writeFileSync(statusPath, lines.join("\n"), "utf-8");
858
+ } catch {
859
+ // Write failed — downgrade changes to unmatched for accuracy
860
+ // (the in-memory result says "changed" but file wasn't updated)
861
+ for (const action of result.actions) {
862
+ if (action.outcome === "checked" || action.outcome === "unchecked") {
863
+ action.outcome = "unmatched";
864
+ action.reason += " (write failed)";
865
+ result.changed--;
866
+ result.unmatched++;
867
+ }
868
+ }
869
+ }
870
+ }
871
+
872
+ return result;
873
+ }
874
+
875
+ // ── Remediation: Feedback & Fix Agent Prompt ─────────────────────────
876
+
877
+ /** Path for the review feedback file written for the fix agent. */
878
+ export const FEEDBACK_FILENAME = "REVIEW_FEEDBACK.md";
879
+
880
+ /**
881
+ * Generate a deterministic REVIEW_FEEDBACK.md from a NEEDS_FIXES verdict.
882
+ *
883
+ * Includes blocking findings based on the configured pass threshold:
884
+ * - `no_critical` / `no_important`: critical + important findings only
885
+ * - `all_clear`: critical + important + suggestion findings (all are blocking)
886
+ *
887
+ * The template is stable across runs so fix-agent prompts are reproducible.
888
+ *
889
+ * This file is intentionally staged as a task artifact (aligns with
890
+ * roadmap 5e: REVIEW_FEEDBACK.md is part of the review audit trail).
891
+ *
892
+ * @param verdict - The NEEDS_FIXES review verdict
893
+ * @param cycleNum - Current remediation cycle number (1-based)
894
+ * @param maxCycles - Maximum review cycles configured
895
+ * @param passThreshold - Configured pass threshold (determines which severities are blocking)
896
+ * @returns Markdown content for REVIEW_FEEDBACK.md
897
+ */
898
+ export function generateFeedbackMd(
899
+ verdict: ReviewVerdict,
900
+ cycleNum: number,
901
+ maxCycles: number,
902
+ passThreshold: PassThreshold = "no_critical",
903
+ ): string {
904
+ const criticals = verdict.findings.filter((f) => f.severity === "critical");
905
+ const importants = verdict.findings.filter((f) => f.severity === "important");
906
+ const suggestions = verdict.findings.filter((f) => f.severity === "suggestion");
907
+ const mismatches = verdict.statusReconciliation.filter((r) => r.actualState !== "done");
908
+
909
+ // Under all_clear, suggestions are also blocking
910
+ const includeSuggestions = passThreshold === "all_clear";
911
+
912
+ const blockingLabel = includeSuggestions
913
+ ? "critical, important, and suggestion"
914
+ : "critical and important";
915
+
916
+ const lines: string[] = [
917
+ `# Review Feedback — Cycle ${cycleNum}/${maxCycles}`,
918
+ ``,
919
+ `**Verdict:** NEEDS_FIXES`,
920
+ `**Confidence:** ${verdict.confidence}`,
921
+ `**Summary:** ${verdict.summary}`,
922
+ `**Pass Threshold:** \`${passThreshold}\``,
923
+ ``,
924
+ `> This file was generated by the quality gate. Address all ${blockingLabel}`,
925
+ `> findings below, then the review will re-run automatically.`,
926
+ ``,
927
+ ];
928
+
929
+ if (criticals.length > 0) {
930
+ lines.push(`## Critical Findings (${criticals.length})`);
931
+ lines.push(``);
932
+ for (let i = 0; i < criticals.length; i++) {
933
+ const f = criticals[i];
934
+ lines.push(`### C${i + 1}: ${f.description}`);
935
+ lines.push(``);
936
+ lines.push(`- **Category:** ${f.category}`);
937
+ if (f.file) lines.push(`- **File:** \`${f.file}\``);
938
+ if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
939
+ lines.push(``);
940
+ }
941
+ }
942
+
943
+ if (importants.length > 0) {
944
+ lines.push(`## Important Findings (${importants.length})`);
945
+ lines.push(``);
946
+ for (let i = 0; i < importants.length; i++) {
947
+ const f = importants[i];
948
+ lines.push(`### I${i + 1}: ${f.description}`);
949
+ lines.push(``);
950
+ lines.push(`- **Category:** ${f.category}`);
951
+ if (f.file) lines.push(`- **File:** \`${f.file}\``);
952
+ if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
953
+ lines.push(``);
954
+ }
955
+ }
956
+
957
+ if (includeSuggestions && suggestions.length > 0) {
958
+ lines.push(`## Suggestion Findings (${suggestions.length})`);
959
+ lines.push(``);
960
+ lines.push(`> Under \`all_clear\` threshold, suggestions are also blocking.`);
961
+ lines.push(``);
962
+ for (let i = 0; i < suggestions.length; i++) {
963
+ const f = suggestions[i];
964
+ lines.push(`### S${i + 1}: ${f.description}`);
965
+ lines.push(``);
966
+ lines.push(`- **Category:** ${f.category}`);
967
+ if (f.file) lines.push(`- **File:** \`${f.file}\``);
968
+ if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
969
+ lines.push(``);
970
+ }
971
+ }
972
+
973
+ if (mismatches.length > 0) {
974
+ lines.push(`## STATUS.md Reconciliation Issues (${mismatches.length})`);
975
+ lines.push(``);
976
+ for (const r of mismatches) {
977
+ lines.push(`- **Checkbox:** ${r.checkbox}`);
978
+ lines.push(` - **Actual state:** ${r.actualState}`);
979
+ if (r.evidence) lines.push(` - **Evidence:** ${r.evidence}`);
980
+ }
981
+ lines.push(``);
982
+ }
983
+
984
+ const totalBlocking =
985
+ criticals.length +
986
+ importants.length +
987
+ (includeSuggestions ? suggestions.length : 0) +
988
+ mismatches.length;
989
+
990
+ if (totalBlocking === 0) {
991
+ lines.push(`## No blocking findings`);
992
+ lines.push(``);
993
+ lines.push(
994
+ `The review returned NEEDS_FIXES but no blocking findings were extracted for threshold \`${passThreshold}\`.`,
995
+ );
996
+ lines.push(
997
+ `This may indicate a threshold or verdict-rule mismatch. Review the REVIEW_VERDICT.json for details.`,
998
+ );
999
+ lines.push(``);
1000
+ }
1001
+
1002
+ return lines.join("\n");
1003
+ }
1004
+
1005
+ /**
1006
+ * Build the prompt for the fix agent that addresses quality gate findings.
1007
+ *
1008
+ * The fix agent is spawned in the same worktree as the task and receives
1009
+ * the REVIEW_FEEDBACK.md content along with task context. It should make
1010
+ * targeted code fixes and commit them.
1011
+ *
1012
+ * @param context - Quality gate context (task folder, IDs, etc.)
1013
+ * @param feedbackContent - Content of REVIEW_FEEDBACK.md
1014
+ * @param cycleNum - Current fix cycle number
1015
+ * @returns Prompt string for the fix agent
1016
+ */
1017
+ export function buildFixAgentPrompt(
1018
+ context: QualityGateContext,
1019
+ feedbackContent: string,
1020
+ cycleNum: number,
1021
+ ): string {
1022
+ const statusPath = join(context.taskFolder, "STATUS.md");
1023
+
1024
+ let statusContent = "(STATUS.md not found)";
1025
+ try {
1026
+ if (existsSync(statusPath)) {
1027
+ statusContent = readFileSync(statusPath, "utf-8");
1028
+ }
1029
+ } catch {
1030
+ /* proceed without */
1031
+ }
1032
+
1033
+ let promptContent = "(PROMPT.md not found)";
1034
+ try {
1035
+ if (existsSync(context.promptPath)) {
1036
+ promptContent = readFileSync(context.promptPath, "utf-8");
1037
+ }
1038
+ } catch {
1039
+ /* proceed without */
1040
+ }
1041
+
1042
+ return [
1043
+ `# Quality Gate Remediation — Fix Cycle ${cycleNum}`,
1044
+ ``,
1045
+ `You are a fix agent addressing quality gate findings for task **${context.taskId}**.`,
1046
+ ``,
1047
+ `The quality gate review found issues that must be fixed before the task can be marked complete.`,
1048
+ `Your job is to make targeted, minimal fixes to address the critical and important findings below.`,
1049
+ ``,
1050
+ `## Rules`,
1051
+ ``,
1052
+ `1. **Read REVIEW_FEEDBACK.md** below — it lists the blocking findings with specific remediation instructions.`,
1053
+ `2. **Fix each finding** — make the minimal code change needed. Do NOT refactor unrelated code.`,
1054
+ `3. **Commit your fixes** with message: \`fix(${context.taskId}): address quality gate findings (cycle ${cycleNum})\``,
1055
+ `4. **Update STATUS.md** if any checkbox states were flagged as incorrect in the reconciliation section.`,
1056
+ `5. **Do NOT create .DONE** — the quality gate will re-run automatically after you exit.`,
1057
+ ``,
1058
+ `## Task Context`,
1059
+ ``,
1060
+ `- **Task folder:** ${context.taskFolder}/`,
1061
+ `- **PROMPT:** ${context.promptPath}`,
1062
+ `- **STATUS:** ${statusPath}`,
1063
+ ``,
1064
+ `## Review Feedback`,
1065
+ ``,
1066
+ `\`\`\`markdown`,
1067
+ feedbackContent,
1068
+ `\`\`\``,
1069
+ ``,
1070
+ `## Original Task Requirements (PROMPT.md)`,
1071
+ ``,
1072
+ `\`\`\`markdown`,
1073
+ promptContent,
1074
+ `\`\`\``,
1075
+ ``,
1076
+ `## Current STATUS.md`,
1077
+ ``,
1078
+ `\`\`\`markdown`,
1079
+ statusContent,
1080
+ `\`\`\``,
1081
+ ``,
1082
+ `**IMPORTANT:** Focus only on fixing the blocking findings. Do not expand scope or create .DONE.`,
1083
+ ``,
1084
+ ].join("\n");
1085
+ }