@pi-agents/orchid 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/LICENSE +21 -0
- package/README.md +246 -0
- package/agents/AGENTS-MANIFEST.md +42 -0
- package/agents/brain.md +42 -0
- package/agents/context-builder.md +46 -0
- package/agents/delegate.md +12 -0
- package/agents/dev-1.md +42 -0
- package/agents/oracle.md +73 -0
- package/agents/planner.md +55 -0
- package/agents/researcher.md +52 -0
- package/agents/reviewer.md +79 -0
- package/agents/scout.md +50 -0
- package/agents/tester.md +45 -0
- package/agents/worker.md +55 -0
- package/extensions/ralph.ts +1 -0
- package/extensions/reviewer-extension.ts +125 -0
- package/extensions/task-orchestrator.ts +28 -0
- package/package.json +63 -0
- package/prompts/gather-context-and-clarify.md +13 -0
- package/prompts/parallel-cleanup.md +59 -0
- package/prompts/parallel-context-build.md +53 -0
- package/prompts/parallel-handoff-plan.md +59 -0
- package/prompts/parallel-research.md +50 -0
- package/prompts/parallel-review.md +54 -0
- package/prompts/review-loop.md +41 -0
- package/skills/orchid/SKILL.md +214 -0
- package/skills/orchid/orchid-cleanup/SKILL.md +122 -0
- package/skills/orchid/orchid-converge/SKILL.md +124 -0
- package/skills/orchid/orchid-decompose/SKILL.md +201 -0
- package/skills/orchid/orchid-doctor/SKILL.md +162 -0
- package/skills/orchid/orchid-investigate/SKILL.md +102 -0
- package/skills/orchid/orchid-launch/SKILL.md +147 -0
- package/skills/ralph/SKILL.md +73 -0
- package/skills/subagents/pi-subagents/SKILL.md +813 -0
- package/src/index.ts +7 -0
- package/src/orchestrator/abort.ts +534 -0
- package/src/orchestrator/agent-bridge-extension.ts +1020 -0
- package/src/orchestrator/agent-host.ts +954 -0
- package/src/orchestrator/cleanup.ts +776 -0
- package/src/orchestrator/config-loader.ts +1412 -0
- package/src/orchestrator/config-schema.ts +690 -0
- package/src/orchestrator/config.ts +81 -0
- package/src/orchestrator/context-window.ts +66 -0
- package/src/orchestrator/diagnostic-reports.ts +475 -0
- package/src/orchestrator/diagnostics.ts +394 -0
- package/src/orchestrator/discovery.ts +1833 -0
- package/src/orchestrator/engine-worker.ts +415 -0
- package/src/orchestrator/engine.ts +5940 -0
- package/src/orchestrator/execution.ts +3104 -0
- package/src/orchestrator/extension.ts +5934 -0
- package/src/orchestrator/formatting.ts +785 -0
- package/src/orchestrator/git.ts +88 -0
- package/src/orchestrator/index.ts +28 -0
- package/src/orchestrator/lane-runner.ts +1787 -0
- package/src/orchestrator/mailbox.ts +780 -0
- package/src/orchestrator/merge.ts +3414 -0
- package/src/orchestrator/messages.ts +1062 -0
- package/src/orchestrator/migrations.ts +278 -0
- package/src/orchestrator/naming.ts +117 -0
- package/src/orchestrator/path-resolver.ts +275 -0
- package/src/orchestrator/persistence.ts +2625 -0
- package/src/orchestrator/process-registry.ts +452 -0
- package/src/orchestrator/quality-gate.ts +1085 -0
- package/src/orchestrator/resume.ts +3488 -0
- package/src/orchestrator/sessions.ts +57 -0
- package/src/orchestrator/settings-loader.ts +136 -0
- package/src/orchestrator/settings-tui.ts +2208 -0
- package/src/orchestrator/sidecar-telemetry.ts +267 -0
- package/src/orchestrator/supervisor.ts +4548 -0
- package/src/orchestrator/task-executor-core.ts +675 -0
- package/src/orchestrator/tmux-compat.ts +37 -0
- package/src/orchestrator/tool-allowlist-constants.ts +37 -0
- package/src/orchestrator/types.ts +4465 -0
- package/src/orchestrator/verification.ts +547 -0
- package/src/orchestrator/waves.ts +1564 -0
- package/src/orchestrator/workspace.ts +707 -0
- package/src/orchestrator/worktree.ts +2725 -0
- package/src/ralph/index.ts +825 -0
- package/src/subagents/agents/agent-management.ts +648 -0
- package/src/subagents/agents/agent-scope.ts +6 -0
- package/src/subagents/agents/agent-selection.ts +23 -0
- package/src/subagents/agents/agent-serializer.ts +86 -0
- package/src/subagents/agents/agents.ts +832 -0
- package/src/subagents/agents/chain-serializer.ts +137 -0
- package/src/subagents/agents/frontmatter.ts +29 -0
- package/src/subagents/agents/identity.ts +30 -0
- package/src/subagents/agents/skills.ts +632 -0
- package/src/subagents/extension/config.ts +16 -0
- package/src/subagents/extension/control-notices.ts +92 -0
- package/src/subagents/extension/doctor.ts +199 -0
- package/src/subagents/extension/fanout-child.ts +170 -0
- package/src/subagents/extension/index.ts +573 -0
- package/src/subagents/extension/schemas.ts +168 -0
- package/src/subagents/intercom/intercom-bridge.ts +379 -0
- package/src/subagents/intercom/result-intercom.ts +377 -0
- package/src/subagents/runs/background/async-execution.ts +712 -0
- package/src/subagents/runs/background/async-job-tracker.ts +310 -0
- package/src/subagents/runs/background/async-resume.ts +345 -0
- package/src/subagents/runs/background/async-status.ts +325 -0
- package/src/subagents/runs/background/completion-dedupe.ts +63 -0
- package/src/subagents/runs/background/notify.ts +108 -0
- package/src/subagents/runs/background/parallel-groups.ts +45 -0
- package/src/subagents/runs/background/result-watcher.ts +307 -0
- package/src/subagents/runs/background/run-id-resolver.ts +83 -0
- package/src/subagents/runs/background/run-status.ts +269 -0
- package/src/subagents/runs/background/stale-run-reconciler.ts +336 -0
- package/src/subagents/runs/background/subagent-runner.ts +1808 -0
- package/src/subagents/runs/background/top-level-async.ts +13 -0
- package/src/subagents/runs/foreground/chain-clarify.ts +1333 -0
- package/src/subagents/runs/foreground/chain-execution.ts +938 -0
- package/src/subagents/runs/foreground/execution.ts +918 -0
- package/src/subagents/runs/foreground/subagent-executor.ts +2527 -0
- package/src/subagents/runs/shared/completion-guard.ts +147 -0
- package/src/subagents/runs/shared/long-running-guard.ts +175 -0
- package/src/subagents/runs/shared/mcp-direct-tool-allowlist.ts +365 -0
- package/src/subagents/runs/shared/model-fallback.ts +103 -0
- package/src/subagents/runs/shared/nested-events.ts +819 -0
- package/src/subagents/runs/shared/nested-path.ts +52 -0
- package/src/subagents/runs/shared/nested-render.ts +115 -0
- package/src/subagents/runs/shared/parallel-utils.ts +109 -0
- package/src/subagents/runs/shared/pi-args.ts +220 -0
- package/src/subagents/runs/shared/pi-spawn.ts +115 -0
- package/src/subagents/runs/shared/run-history.ts +60 -0
- package/src/subagents/runs/shared/single-output.ts +164 -0
- package/src/subagents/runs/shared/subagent-control.ts +226 -0
- package/src/subagents/runs/shared/subagent-prompt-runtime.ts +170 -0
- package/src/subagents/runs/shared/worktree.ts +577 -0
- package/src/subagents/shared/artifacts.ts +98 -0
- package/src/subagents/shared/atomic-json.ts +16 -0
- package/src/subagents/shared/file-coalescer.ts +40 -0
- package/src/subagents/shared/fork-context.ts +76 -0
- package/src/subagents/shared/formatters.ts +133 -0
- package/src/subagents/shared/jsonl-writer.ts +81 -0
- package/src/subagents/shared/model-info.ts +78 -0
- package/src/subagents/shared/post-exit-stdio-guard.ts +85 -0
- package/src/subagents/shared/session-identity.ts +10 -0
- package/src/subagents/shared/session-tokens.ts +44 -0
- package/src/subagents/shared/settings.ts +397 -0
- package/src/subagents/shared/status-format.ts +49 -0
- package/src/subagents/shared/types.ts +822 -0
- package/src/subagents/shared/utils.ts +450 -0
- package/src/subagents/slash/prompt-template-bridge.ts +397 -0
- package/src/subagents/slash/slash-bridge.ts +174 -0
- package/src/subagents/slash/slash-commands.ts +528 -0
- package/src/subagents/slash/slash-live-state.ts +292 -0
- package/src/subagents/tui/render-helpers.ts +80 -0
- package/src/subagents/tui/render.ts +1358 -0
- package/templates/agents/local/supervisor.md +33 -0
- package/templates/agents/local/task-merger.md +27 -0
- package/templates/agents/local/task-reviewer.md +30 -0
- package/templates/agents/local/task-worker.md +34 -0
- package/templates/agents/supervisor-routing.md +92 -0
- package/templates/agents/supervisor.md +229 -0
- package/templates/agents/task-merger.md +214 -0
- package/templates/agents/task-reviewer.md +260 -0
- package/templates/agents/task-worker-segment.md +44 -0
- package/templates/agents/task-worker.md +557 -0
- package/templates/tasks/CONTEXT.md +30 -0
- package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +98 -0
- package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
- package/templates/tasks/EXAMPLE-002-parallel-smoke/PROMPT.md +97 -0
- package/templates/tasks/EXAMPLE-002-parallel-smoke/STATUS.md +73 -0
|
@@ -0,0 +1,1085 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quality Gate — structured post-completion review types and verdict evaluation.
|
|
3
|
+
*
|
|
4
|
+
* This module defines the interfaces for quality gate review verdicts and
|
|
5
|
+
* implements the verdict evaluation logic used by the task-runner to decide
|
|
6
|
+
* whether a task passes or needs fixes before `.DONE` creation.
|
|
7
|
+
*
|
|
8
|
+
* Verdict rules (from roadmap Phase 5a):
|
|
9
|
+
* - Any `critical` finding → NEEDS_FIXES
|
|
10
|
+
* - 3+ `important` findings → NEEDS_FIXES
|
|
11
|
+
* - Only `suggestion` findings → PASS
|
|
12
|
+
* - Any `status_mismatch` category → NEEDS_FIXES
|
|
13
|
+
*
|
|
14
|
+
* Fail-open behavior: malformed or missing verdict JSON → PASS
|
|
15
|
+
* (prevents quality gate bugs from blocking task completion)
|
|
16
|
+
*
|
|
17
|
+
* @module quality-gate
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import type { PassThreshold } from "./config-schema.ts";
|
|
21
|
+
import { readFileSync, writeFileSync, existsSync } from "fs";
|
|
22
|
+
import { join } from "path";
|
|
23
|
+
import { spawnSync } from "child_process";
|
|
24
|
+
|
|
25
|
+
// ── Verdict Interfaces ───────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
/** Severity levels for review findings, ordered by decreasing severity. */
|
|
28
|
+
export type FindingSeverity = "critical" | "important" | "suggestion";
|
|
29
|
+
|
|
30
|
+
/** Categories of review findings. */
|
|
31
|
+
export type FindingCategory =
|
|
32
|
+
| "missing_requirement"
|
|
33
|
+
| "incorrect_implementation"
|
|
34
|
+
| "incomplete_work"
|
|
35
|
+
| "status_mismatch";
|
|
36
|
+
|
|
37
|
+
/** A single finding from the quality gate review. */
|
|
38
|
+
export interface ReviewFinding {
|
|
39
|
+
/** Severity of the finding */
|
|
40
|
+
severity: FindingSeverity;
|
|
41
|
+
/** Category classifying what kind of issue was found */
|
|
42
|
+
category: FindingCategory;
|
|
43
|
+
/** Human-readable description of the issue */
|
|
44
|
+
description: string;
|
|
45
|
+
/** File path related to the finding (may be empty) */
|
|
46
|
+
file: string;
|
|
47
|
+
/** Specific fix instruction for the remediation agent */
|
|
48
|
+
remediation: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** STATUS.md checkbox reconciliation entry. */
|
|
52
|
+
export interface StatusReconciliation {
|
|
53
|
+
/** Original checkbox text from STATUS.md */
|
|
54
|
+
checkbox: string;
|
|
55
|
+
/** Actual state determined by review */
|
|
56
|
+
actualState: "done" | "not_done" | "partial";
|
|
57
|
+
/** Evidence supporting the state determination */
|
|
58
|
+
evidence: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Overall quality gate verdict from the review agent. */
|
|
62
|
+
export interface ReviewVerdict {
|
|
63
|
+
/** Pass/fail verdict */
|
|
64
|
+
verdict: "PASS" | "NEEDS_FIXES";
|
|
65
|
+
/** Review agent confidence level */
|
|
66
|
+
confidence: "high" | "medium" | "low";
|
|
67
|
+
/** Brief overall assessment */
|
|
68
|
+
summary: string;
|
|
69
|
+
/** Individual findings from the review */
|
|
70
|
+
findings: ReviewFinding[];
|
|
71
|
+
/** STATUS.md checkbox reconciliation results */
|
|
72
|
+
statusReconciliation: StatusReconciliation[];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ── Verdict Evaluation ───────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
/** Reason why a verdict was determined to be NEEDS_FIXES. */
|
|
78
|
+
export interface VerdictFailReason {
|
|
79
|
+
/** Rule that triggered the failure */
|
|
80
|
+
rule: "critical_finding" | "important_threshold" | "status_mismatch" | "verdict_says_needs_fixes";
|
|
81
|
+
/** Human-readable explanation */
|
|
82
|
+
detail: string;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Result of applying verdict rules to a parsed ReviewVerdict. */
|
|
86
|
+
export interface VerdictEvaluation {
|
|
87
|
+
/** Whether the task passes the quality gate */
|
|
88
|
+
pass: boolean;
|
|
89
|
+
/** Reasons for failure (empty array if pass is true) */
|
|
90
|
+
failReasons: VerdictFailReason[];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Apply verdict rules to determine pass/fail based on findings and threshold.
|
|
95
|
+
*
|
|
96
|
+
* Rules applied in order:
|
|
97
|
+
* 1. Any finding with category `status_mismatch` → NEEDS_FIXES
|
|
98
|
+
* 2. Any finding with severity `critical` → NEEDS_FIXES
|
|
99
|
+
* 3. Threshold-dependent important finding count check
|
|
100
|
+
* 4. If verdict itself says NEEDS_FIXES → respect it
|
|
101
|
+
*
|
|
102
|
+
* Threshold behavior:
|
|
103
|
+
* - `no_critical`: PASS if no critical findings and no status mismatches
|
|
104
|
+
* - `no_important`: PASS if no critical, fewer than 3 important, no status mismatches
|
|
105
|
+
* - `all_clear`: PASS only if zero findings of any severity
|
|
106
|
+
*
|
|
107
|
+
* @param verdict - Parsed review verdict
|
|
108
|
+
* @param threshold - Configured pass threshold
|
|
109
|
+
* @returns Evaluation result with pass/fail and reasons
|
|
110
|
+
*/
|
|
111
|
+
export function applyVerdictRules(
|
|
112
|
+
verdict: ReviewVerdict,
|
|
113
|
+
threshold: PassThreshold,
|
|
114
|
+
): VerdictEvaluation {
|
|
115
|
+
const failReasons: VerdictFailReason[] = [];
|
|
116
|
+
|
|
117
|
+
// Rule 1: Any status_mismatch category → NEEDS_FIXES
|
|
118
|
+
const statusMismatches = verdict.findings.filter((f) => f.category === "status_mismatch");
|
|
119
|
+
if (statusMismatches.length > 0) {
|
|
120
|
+
failReasons.push({
|
|
121
|
+
rule: "status_mismatch",
|
|
122
|
+
detail: `${statusMismatches.length} status mismatch(es) found — checked boxes don't match actual work`,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Rule 2: Any critical finding → NEEDS_FIXES
|
|
127
|
+
const criticals = verdict.findings.filter((f) => f.severity === "critical");
|
|
128
|
+
if (criticals.length > 0) {
|
|
129
|
+
failReasons.push({
|
|
130
|
+
rule: "critical_finding",
|
|
131
|
+
detail: `${criticals.length} critical finding(s)`,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Rule 3: Threshold-dependent important check
|
|
136
|
+
const importants = verdict.findings.filter((f) => f.severity === "important");
|
|
137
|
+
|
|
138
|
+
if (threshold === "no_important" && importants.length >= 3) {
|
|
139
|
+
failReasons.push({
|
|
140
|
+
rule: "important_threshold",
|
|
141
|
+
detail: `${importants.length} important findings (threshold: fewer than 3 required for pass)`,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (threshold === "all_clear" && verdict.findings.length > 0) {
|
|
146
|
+
// For all_clear, any finding of any severity blocks pass
|
|
147
|
+
if (importants.length > 0 && failReasons.every((r) => r.rule !== "important_threshold")) {
|
|
148
|
+
failReasons.push({
|
|
149
|
+
rule: "important_threshold",
|
|
150
|
+
detail: `${importants.length} important finding(s) (all_clear threshold: zero findings required)`,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
// Suggestions also block under all_clear — but we don't need a separate rule
|
|
154
|
+
// since we'll catch it via the verdict_says_needs_fixes or the overall pass logic
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Rule 4: If the verdict itself says NEEDS_FIXES and we haven't already failed
|
|
158
|
+
if (verdict.verdict === "NEEDS_FIXES" && failReasons.length === 0) {
|
|
159
|
+
failReasons.push({
|
|
160
|
+
rule: "verdict_says_needs_fixes",
|
|
161
|
+
detail: `Review agent verdict: NEEDS_FIXES — ${verdict.summary}`,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// For all_clear threshold: even suggestions-only should fail
|
|
166
|
+
if (threshold === "all_clear" && failReasons.length === 0 && verdict.findings.length > 0) {
|
|
167
|
+
const suggestions = verdict.findings.filter((f) => f.severity === "suggestion");
|
|
168
|
+
if (suggestions.length > 0) {
|
|
169
|
+
failReasons.push({
|
|
170
|
+
rule: "important_threshold",
|
|
171
|
+
detail: `${suggestions.length} suggestion(s) found (all_clear threshold: zero findings required)`,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
pass: failReasons.length === 0,
|
|
178
|
+
failReasons,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ── Verdict Parsing ──────────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
/** Sentinel verdict returned when parsing fails (fail-open). */
|
|
185
|
+
const FAIL_OPEN_VERDICT: ReviewVerdict = {
|
|
186
|
+
verdict: "PASS",
|
|
187
|
+
confidence: "low",
|
|
188
|
+
summary: "Verdict could not be parsed — fail-open policy applied",
|
|
189
|
+
findings: [],
|
|
190
|
+
statusReconciliation: [],
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Parse a JSON string into a ReviewVerdict, with fail-open behavior.
|
|
195
|
+
*
|
|
196
|
+
* If the input is missing, empty, or malformed JSON, returns a PASS verdict
|
|
197
|
+
* (fail-open) to prevent quality gate bugs from blocking task completion.
|
|
198
|
+
*
|
|
199
|
+
* Performs structural validation:
|
|
200
|
+
* - `verdict` must be "PASS" or "NEEDS_FIXES"
|
|
201
|
+
* - `findings` must be an array (defaults to [] if missing)
|
|
202
|
+
* - `statusReconciliation` must be an array (defaults to [] if missing)
|
|
203
|
+
* - Individual findings are validated and malformed entries are dropped
|
|
204
|
+
*
|
|
205
|
+
* @param jsonString - Raw JSON string from review agent output
|
|
206
|
+
* @returns Parsed and validated ReviewVerdict (never throws)
|
|
207
|
+
*/
|
|
208
|
+
export function parseVerdict(jsonString: string | undefined | null): ReviewVerdict {
|
|
209
|
+
if (!jsonString || jsonString.trim() === "") {
|
|
210
|
+
return { ...FAIL_OPEN_VERDICT, summary: "No verdict provided — fail-open policy applied" };
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
let raw: unknown;
|
|
214
|
+
try {
|
|
215
|
+
raw = JSON.parse(jsonString);
|
|
216
|
+
} catch {
|
|
217
|
+
return { ...FAIL_OPEN_VERDICT, summary: "Malformed JSON in verdict — fail-open policy applied" };
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
221
|
+
return {
|
|
222
|
+
...FAIL_OPEN_VERDICT,
|
|
223
|
+
summary: "Verdict is not a JSON object — fail-open policy applied",
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const obj = raw as Record<string, unknown>;
|
|
228
|
+
|
|
229
|
+
// Validate verdict field
|
|
230
|
+
const verdict = obj.verdict;
|
|
231
|
+
if (verdict !== "PASS" && verdict !== "NEEDS_FIXES") {
|
|
232
|
+
return {
|
|
233
|
+
...FAIL_OPEN_VERDICT,
|
|
234
|
+
summary: `Invalid verdict value "${String(verdict)}" — fail-open policy applied`,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Parse confidence with fallback
|
|
239
|
+
const validConfidence = ["high", "medium", "low"];
|
|
240
|
+
const confidence = validConfidence.includes(obj.confidence as string)
|
|
241
|
+
? (obj.confidence as "high" | "medium" | "low")
|
|
242
|
+
: "medium";
|
|
243
|
+
|
|
244
|
+
// Parse summary with fallback
|
|
245
|
+
const summary = typeof obj.summary === "string" ? obj.summary : "";
|
|
246
|
+
|
|
247
|
+
// Parse and validate findings
|
|
248
|
+
const findings = validateFindings(obj.findings);
|
|
249
|
+
|
|
250
|
+
// Parse and validate statusReconciliation
|
|
251
|
+
const statusReconciliation = validateReconciliations(obj.statusReconciliation);
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
verdict,
|
|
255
|
+
confidence,
|
|
256
|
+
summary,
|
|
257
|
+
findings,
|
|
258
|
+
statusReconciliation,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// ── Internal Validation Helpers ──────────────────────────────────────
|
|
263
|
+
|
|
264
|
+
const VALID_SEVERITIES: FindingSeverity[] = ["critical", "important", "suggestion"];
|
|
265
|
+
const VALID_CATEGORIES: FindingCategory[] = [
|
|
266
|
+
"missing_requirement",
|
|
267
|
+
"incorrect_implementation",
|
|
268
|
+
"incomplete_work",
|
|
269
|
+
"status_mismatch",
|
|
270
|
+
];
|
|
271
|
+
const VALID_STATES = ["done", "not_done", "partial"];
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Validate and normalize the findings array.
|
|
275
|
+
* Drops individual entries that don't have minimum required fields.
|
|
276
|
+
*/
|
|
277
|
+
function validateFindings(raw: unknown): ReviewFinding[] {
|
|
278
|
+
if (!Array.isArray(raw)) return [];
|
|
279
|
+
|
|
280
|
+
const validated: ReviewFinding[] = [];
|
|
281
|
+
for (const item of raw) {
|
|
282
|
+
if (typeof item !== "object" || item === null) continue;
|
|
283
|
+
const f = item as Record<string, unknown>;
|
|
284
|
+
|
|
285
|
+
// Severity is required and must be valid
|
|
286
|
+
if (!VALID_SEVERITIES.includes(f.severity as FindingSeverity)) continue;
|
|
287
|
+
|
|
288
|
+
// Category is required and must be valid
|
|
289
|
+
if (!VALID_CATEGORIES.includes(f.category as FindingCategory)) continue;
|
|
290
|
+
|
|
291
|
+
// Description is required
|
|
292
|
+
if (typeof f.description !== "string" || f.description.trim() === "") continue;
|
|
293
|
+
|
|
294
|
+
validated.push({
|
|
295
|
+
severity: f.severity as FindingSeverity,
|
|
296
|
+
category: f.category as FindingCategory,
|
|
297
|
+
description: f.description as string,
|
|
298
|
+
file: typeof f.file === "string" ? f.file : "",
|
|
299
|
+
remediation: typeof f.remediation === "string" ? f.remediation : "",
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return validated;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Validate and normalize the statusReconciliation array.
|
|
308
|
+
* Drops individual entries that don't have minimum required fields.
|
|
309
|
+
*/
|
|
310
|
+
function validateReconciliations(raw: unknown): StatusReconciliation[] {
|
|
311
|
+
if (!Array.isArray(raw)) return [];
|
|
312
|
+
|
|
313
|
+
const validated: StatusReconciliation[] = [];
|
|
314
|
+
for (const item of raw) {
|
|
315
|
+
if (typeof item !== "object" || item === null) continue;
|
|
316
|
+
const r = item as Record<string, unknown>;
|
|
317
|
+
|
|
318
|
+
if (typeof r.checkbox !== "string" || r.checkbox.trim() === "") continue;
|
|
319
|
+
if (!VALID_STATES.includes(r.actualState as string)) continue;
|
|
320
|
+
|
|
321
|
+
validated.push({
|
|
322
|
+
checkbox: r.checkbox as string,
|
|
323
|
+
actualState: r.actualState as "done" | "not_done" | "partial",
|
|
324
|
+
evidence: typeof r.evidence === "string" ? r.evidence : "",
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return validated;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// ── Quality Gate Review Prompt ───────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
/** Information needed to build the quality gate review evidence package. */
|
|
334
|
+
export interface QualityGateContext {
|
|
335
|
+
/** Absolute path to task folder */
|
|
336
|
+
taskFolder: string;
|
|
337
|
+
/** Absolute path to PROMPT.md */
|
|
338
|
+
promptPath: string;
|
|
339
|
+
/** Task ID (e.g., "TP-034") */
|
|
340
|
+
taskId: string;
|
|
341
|
+
/** Project name from config */
|
|
342
|
+
projectName: string;
|
|
343
|
+
/** Pass threshold from config */
|
|
344
|
+
passThreshold: PassThreshold;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/** Path where the quality gate verdict JSON file is written by the review agent. */
|
|
348
|
+
export const VERDICT_FILENAME = "REVIEW_VERDICT.json";
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Compute a robust diff range for the task's git changes.
|
|
352
|
+
*
|
|
353
|
+
* Strategy (in order):
|
|
354
|
+
* 1. `git merge-base HEAD main` — ideal for topic branches
|
|
355
|
+
* 2. `git merge-base HEAD origin/main` — fallback for detached/worktree checkouts
|
|
356
|
+
* 3. `HEAD~N` where N = min(commit count, 50) — bounded fallback for repos
|
|
357
|
+
* without a main branch or with shallow history
|
|
358
|
+
* 4. Empty string (signals diff unavailable)
|
|
359
|
+
*/
|
|
360
|
+
function computeDiffBase(cwd: string): string {
|
|
361
|
+
const opts = { encoding: "utf-8" as const, cwd, timeout: 15000 };
|
|
362
|
+
|
|
363
|
+
// Try merge-base with local main
|
|
364
|
+
for (const ref of ["main", "origin/main", "master", "origin/master"]) {
|
|
365
|
+
const result = spawnSync("git", ["merge-base", "HEAD", ref], opts);
|
|
366
|
+
if (result.status === 0 && result.stdout.trim()) {
|
|
367
|
+
return result.stdout.trim();
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Fallback: count commits and use HEAD~N (bounded)
|
|
372
|
+
const countResult = spawnSync("git", ["rev-list", "--count", "HEAD"], opts);
|
|
373
|
+
if (countResult.status === 0) {
|
|
374
|
+
const count = parseInt(countResult.stdout.trim(), 10);
|
|
375
|
+
if (count > 1) {
|
|
376
|
+
const n = Math.min(count - 1, 50);
|
|
377
|
+
return `HEAD~${n}`;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return "";
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Build the git diff for the entire task.
|
|
386
|
+
*
|
|
387
|
+
* Uses `computeDiffBase()` to find a robust baseline, then runs `git diff`
|
|
388
|
+
* between that base and HEAD. Falls back gracefully when git is unavailable
|
|
389
|
+
* or the repository has insufficient history.
|
|
390
|
+
*/
|
|
391
|
+
function buildGitDiff(cwd: string): { diff: string; fileList: string } {
|
|
392
|
+
try {
|
|
393
|
+
const base = computeDiffBase(cwd);
|
|
394
|
+
if (!base) {
|
|
395
|
+
return {
|
|
396
|
+
diff: "(git diff unavailable — could not determine base)",
|
|
397
|
+
fileList: "(file list unavailable)",
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const range = `${base}..HEAD`;
|
|
402
|
+
|
|
403
|
+
// Get file list of changed files
|
|
404
|
+
const fileListResult = spawnSync("git", ["diff", "--name-only", range], {
|
|
405
|
+
encoding: "utf-8",
|
|
406
|
+
cwd,
|
|
407
|
+
timeout: 30000,
|
|
408
|
+
});
|
|
409
|
+
const fileList = fileListResult.status === 0 ? fileListResult.stdout.trim() : "";
|
|
410
|
+
|
|
411
|
+
// Get full diff (truncated to avoid blowing up context)
|
|
412
|
+
const diffResult = spawnSync("git", ["diff", range], {
|
|
413
|
+
encoding: "utf-8",
|
|
414
|
+
cwd,
|
|
415
|
+
timeout: 30000,
|
|
416
|
+
maxBuffer: 200 * 1024, // 200KB max
|
|
417
|
+
});
|
|
418
|
+
const diff = diffResult.status === 0 ? diffResult.stdout.trim() : "(git diff unavailable)";
|
|
419
|
+
|
|
420
|
+
return { diff, fileList };
|
|
421
|
+
} catch {
|
|
422
|
+
return { diff: "(git diff failed)", fileList: "(file list unavailable)" };
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Generate the quality gate review prompt that instructs the review agent
|
|
428
|
+
* to produce a structured JSON verdict.
|
|
429
|
+
*
|
|
430
|
+
* The prompt includes:
|
|
431
|
+
* - PROMPT.md content (task requirements)
|
|
432
|
+
* - STATUS.md content (declared progress)
|
|
433
|
+
* - Git diff of all task changes
|
|
434
|
+
* - File change list
|
|
435
|
+
* - JSON schema for the verdict
|
|
436
|
+
* - Instructions for fail criteria
|
|
437
|
+
*
|
|
438
|
+
* @param context - Task context for evidence building
|
|
439
|
+
* @param cwd - Working directory for git commands
|
|
440
|
+
* @returns Review prompt string
|
|
441
|
+
*/
|
|
442
|
+
/**
|
|
443
|
+
* Build threshold-specific verdict rule lines for the review prompt.
|
|
444
|
+
*
|
|
445
|
+
* This ensures the reviewer's instructions match the runtime behavior of
|
|
446
|
+
* `applyVerdictRules()` — preventing false failures caused by the reviewer
|
|
447
|
+
* emitting NEEDS_FIXES for findings that the runtime threshold would ignore.
|
|
448
|
+
*/
|
|
449
|
+
function buildThresholdRules(threshold: PassThreshold): string[] {
|
|
450
|
+
const rules: string[] = [];
|
|
451
|
+
|
|
452
|
+
// Common rules — always apply
|
|
453
|
+
rules.push(
|
|
454
|
+
`- **NEEDS_FIXES** if any finding has category \`status_mismatch\` (checkbox claims work is done but it isn't)`,
|
|
455
|
+
);
|
|
456
|
+
rules.push(`- **NEEDS_FIXES** if any finding has severity \`critical\``);
|
|
457
|
+
|
|
458
|
+
// Threshold-specific rules
|
|
459
|
+
switch (threshold) {
|
|
460
|
+
case "no_critical":
|
|
461
|
+
rules.push(
|
|
462
|
+
`- **PASS** even if there are \`important\` or \`suggestion\` findings (threshold: \`no_critical\`)`,
|
|
463
|
+
);
|
|
464
|
+
break;
|
|
465
|
+
case "no_important":
|
|
466
|
+
rules.push(`- **NEEDS_FIXES** if 3 or more findings have severity \`important\``);
|
|
467
|
+
rules.push(`- **PASS** if only \`suggestion\`-level findings remain`);
|
|
468
|
+
break;
|
|
469
|
+
case "all_clear":
|
|
470
|
+
rules.push(`- **NEEDS_FIXES** if ANY findings exist (including \`suggestion\`-level)`);
|
|
471
|
+
break;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
rules.push(`- **PASS** if no findings at all`);
|
|
475
|
+
rules.push(``);
|
|
476
|
+
|
|
477
|
+
return rules;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
export function generateQualityGatePrompt(context: QualityGateContext, cwd: string): string {
|
|
481
|
+
const statusPath = join(context.taskFolder, "STATUS.md");
|
|
482
|
+
const verdictPath = join(context.taskFolder, VERDICT_FILENAME);
|
|
483
|
+
|
|
484
|
+
// Read evidence files
|
|
485
|
+
let promptContent = "(PROMPT.md not found)";
|
|
486
|
+
try {
|
|
487
|
+
if (existsSync(context.promptPath)) {
|
|
488
|
+
promptContent = readFileSync(context.promptPath, "utf-8");
|
|
489
|
+
}
|
|
490
|
+
} catch {
|
|
491
|
+
/* fail-open: proceed without */
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
let statusContent = "(STATUS.md not found)";
|
|
495
|
+
try {
|
|
496
|
+
if (existsSync(statusPath)) {
|
|
497
|
+
statusContent = readFileSync(statusPath, "utf-8");
|
|
498
|
+
}
|
|
499
|
+
} catch {
|
|
500
|
+
/* fail-open: proceed without */
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
const { diff, fileList } = buildGitDiff(cwd);
|
|
504
|
+
|
|
505
|
+
// Truncate diff if too long (keep first 100KB)
|
|
506
|
+
const maxDiffLen = 100 * 1024;
|
|
507
|
+
const truncatedDiff =
|
|
508
|
+
diff.length > maxDiffLen
|
|
509
|
+
? diff.slice(0, maxDiffLen) + "\n\n... (diff truncated at 100KB) ..."
|
|
510
|
+
: diff;
|
|
511
|
+
|
|
512
|
+
return [
|
|
513
|
+
`# Quality Gate Review`,
|
|
514
|
+
``,
|
|
515
|
+
`You are performing a structured post-completion quality gate review for task **${context.taskId}** in project **${context.projectName}**.`,
|
|
516
|
+
``,
|
|
517
|
+
`Your job is to verify that the task was completed correctly by comparing the PROMPT requirements against the actual code changes and STATUS.md progress claims.`,
|
|
518
|
+
``,
|
|
519
|
+
`## Task Requirements (PROMPT.md)`,
|
|
520
|
+
``,
|
|
521
|
+
`\`\`\`markdown`,
|
|
522
|
+
promptContent,
|
|
523
|
+
`\`\`\``,
|
|
524
|
+
``,
|
|
525
|
+
`## Declared Progress (STATUS.md)`,
|
|
526
|
+
``,
|
|
527
|
+
`\`\`\`markdown`,
|
|
528
|
+
statusContent,
|
|
529
|
+
`\`\`\``,
|
|
530
|
+
``,
|
|
531
|
+
`## Changed Files`,
|
|
532
|
+
``,
|
|
533
|
+
`\`\`\``,
|
|
534
|
+
fileList,
|
|
535
|
+
`\`\`\``,
|
|
536
|
+
``,
|
|
537
|
+
`## Git Diff`,
|
|
538
|
+
``,
|
|
539
|
+
`\`\`\`diff`,
|
|
540
|
+
truncatedDiff,
|
|
541
|
+
`\`\`\``,
|
|
542
|
+
``,
|
|
543
|
+
`## Instructions`,
|
|
544
|
+
``,
|
|
545
|
+
`1. **Read the PROMPT.md requirements** carefully — identify every deliverable and acceptance criterion.`,
|
|
546
|
+
`2. **Cross-check STATUS.md checkboxes** — verify each checked item actually has corresponding code/test changes in the diff.`,
|
|
547
|
+
`3. **Review the git diff** — look for missing implementations, incorrect logic, incomplete work.`,
|
|
548
|
+
`4. **Use tools** to read actual source files if the diff is unclear.`,
|
|
549
|
+
`5. **Produce your verdict** as a JSON object written to the file specified below.`,
|
|
550
|
+
``,
|
|
551
|
+
`## Verdict Rules`,
|
|
552
|
+
``,
|
|
553
|
+
`Report ALL findings you discover with accurate severities. The runtime will`,
|
|
554
|
+
`apply the configured pass threshold (\`${context.passThreshold}\`) to decide pass/fail.`,
|
|
555
|
+
``,
|
|
556
|
+
`Use these rules to determine your verdict:`,
|
|
557
|
+
...buildThresholdRules(context.passThreshold),
|
|
558
|
+
``,
|
|
559
|
+
`## Output Format`,
|
|
560
|
+
``,
|
|
561
|
+
`Write a JSON file to: \`${verdictPath}\``,
|
|
562
|
+
``,
|
|
563
|
+
`The JSON must conform to this schema:`,
|
|
564
|
+
``,
|
|
565
|
+
`\`\`\`json`,
|
|
566
|
+
`{`,
|
|
567
|
+
` "verdict": "PASS" | "NEEDS_FIXES",`,
|
|
568
|
+
` "confidence": "high" | "medium" | "low",`,
|
|
569
|
+
` "summary": "Brief overall assessment",`,
|
|
570
|
+
` "findings": [`,
|
|
571
|
+
` {`,
|
|
572
|
+
` "severity": "critical" | "important" | "suggestion",`,
|
|
573
|
+
` "category": "missing_requirement" | "incorrect_implementation" | "incomplete_work" | "status_mismatch",`,
|
|
574
|
+
` "description": "What is wrong",`,
|
|
575
|
+
` "file": "path/to/file.ts",`,
|
|
576
|
+
` "remediation": "Specific fix instruction"`,
|
|
577
|
+
` }`,
|
|
578
|
+
` ],`,
|
|
579
|
+
` "statusReconciliation": [`,
|
|
580
|
+
` {`,
|
|
581
|
+
` "checkbox": "Original checkbox text",`,
|
|
582
|
+
` "actualState": "done" | "not_done" | "partial",`,
|
|
583
|
+
` "evidence": "How you verified"`,
|
|
584
|
+
` }`,
|
|
585
|
+
` ]`,
|
|
586
|
+
`}`,
|
|
587
|
+
`\`\`\``,
|
|
588
|
+
``,
|
|
589
|
+
`**IMPORTANT:** Write ONLY valid JSON to the verdict file. No markdown, no explanation — just the JSON object.`,
|
|
590
|
+
``,
|
|
591
|
+
].join("\n");
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// ── Quality Gate Result ──────────────────────────────────────────────
|
|
595
|
+
|
|
596
|
+
/** Result of a quality gate review cycle. */
|
|
597
|
+
export interface QualityGateResult {
|
|
598
|
+
/** Whether the task passed the quality gate */
|
|
599
|
+
passed: boolean;
|
|
600
|
+
/** Parsed verdict from the review agent (fail-open sentinel if parsing failed) */
|
|
601
|
+
verdict: ReviewVerdict;
|
|
602
|
+
/** Evaluation of verdict rules against threshold */
|
|
603
|
+
evaluation: VerdictEvaluation;
|
|
604
|
+
/** Number of review cycles consumed so far */
|
|
605
|
+
cyclesUsed: number;
|
|
606
|
+
/** Whether the gate was skipped because it's disabled */
|
|
607
|
+
skipped: boolean;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Read and evaluate the quality gate verdict file from the task folder.
|
|
612
|
+
*
|
|
613
|
+
* Handles all fail-open paths:
|
|
614
|
+
* - Missing verdict file → synthetic PASS
|
|
615
|
+
* - Malformed JSON → synthetic PASS
|
|
616
|
+
* - Invalid verdict structure → synthetic PASS
|
|
617
|
+
*
|
|
618
|
+
* @param taskFolder - Absolute path to task folder
|
|
619
|
+
* @param passThreshold - Configured pass threshold
|
|
620
|
+
* @returns Evaluated quality gate result
|
|
621
|
+
*/
|
|
622
|
+
export function readAndEvaluateVerdict(
|
|
623
|
+
taskFolder: string,
|
|
624
|
+
passThreshold: PassThreshold,
|
|
625
|
+
): { verdict: ReviewVerdict; evaluation: VerdictEvaluation } {
|
|
626
|
+
const verdictPath = join(taskFolder, VERDICT_FILENAME);
|
|
627
|
+
|
|
628
|
+
let rawJson: string | null = null;
|
|
629
|
+
try {
|
|
630
|
+
if (existsSync(verdictPath)) {
|
|
631
|
+
rawJson = readFileSync(verdictPath, "utf-8");
|
|
632
|
+
}
|
|
633
|
+
} catch {
|
|
634
|
+
// File read error → fail-open
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
const verdict = parseVerdict(rawJson);
|
|
638
|
+
const evaluation = applyVerdictRules(verdict, passThreshold);
|
|
639
|
+
|
|
640
|
+
return { verdict, evaluation };
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// ── STATUS.md Reconciliation ─────────────────────────────────────────
|
|
644
|
+
|
|
645
|
+
/** Result of applying status reconciliation to STATUS.md. */
|
|
646
|
+
export interface ReconciliationResult {
|
|
647
|
+
/** Number of checkboxes whose state was changed */
|
|
648
|
+
changed: number;
|
|
649
|
+
/** Number of reconciliation entries that matched but required no change */
|
|
650
|
+
alreadyCorrect: number;
|
|
651
|
+
/** Number of reconciliation entries that could not be matched to a checkbox */
|
|
652
|
+
unmatched: number;
|
|
653
|
+
/** Details of each action taken */
|
|
654
|
+
actions: ReconciliationAction[];
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/** A single reconciliation action applied (or skipped). */
|
|
658
|
+
export interface ReconciliationAction {
|
|
659
|
+
/** The checkbox text from the reconciliation entry */
|
|
660
|
+
checkbox: string;
|
|
661
|
+
/** What happened */
|
|
662
|
+
outcome: "checked" | "unchecked" | "no_change" | "unmatched";
|
|
663
|
+
/** Human-readable reason */
|
|
664
|
+
reason: string;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Normalize checkbox text for fuzzy matching.
|
|
669
|
+
*
|
|
670
|
+
* Strips markdown formatting, collapses whitespace, lowercases, and removes
|
|
671
|
+
* leading punctuation/bullets. This allows reconciliation entries (which come
|
|
672
|
+
* from the review agent's paraphrasing) to match STATUS.md checkboxes that
|
|
673
|
+
* may differ in whitespace, casing, or minor formatting.
|
|
674
|
+
*/
|
|
675
|
+
function normalizeCheckboxText(text: string): string {
|
|
676
|
+
return text
|
|
677
|
+
.replace(/\*\*|__|``|`/g, "") // strip bold/code formatting
|
|
678
|
+
.replace(/\s+/g, " ") // collapse whitespace
|
|
679
|
+
.replace(/^\s*[-*•]\s*/, "") // strip leading bullets
|
|
680
|
+
.trim()
|
|
681
|
+
.toLowerCase();
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Apply statusReconciliation entries to STATUS.md checkboxes.
|
|
686
|
+
*
|
|
687
|
+
* For each reconciliation entry:
|
|
688
|
+
* - `done` → ensure checkbox is checked (`[x]`)
|
|
689
|
+
* - `not_done` → ensure checkbox is unchecked (`[ ]`)
|
|
690
|
+
* - `partial` → ensure checkbox is unchecked (`[ ]`) with "(partial)" annotation
|
|
691
|
+
*
|
|
692
|
+
* Matching strategy: normalize both the reconciliation `checkbox` text and the
|
|
693
|
+
* STATUS.md checkbox text, then match by substring containment (reconciliation
|
|
694
|
+
* text contained in STATUS line or vice versa). First match wins — duplicates
|
|
695
|
+
* are logged as "unmatched" after the first match is consumed.
|
|
696
|
+
*
|
|
697
|
+
* Idempotency: if a checkbox already has the correct state, no change is made.
|
|
698
|
+
* If no net changes occur, STATUS.md is not rewritten.
|
|
699
|
+
*
|
|
700
|
+
* @param statusPath - Absolute path to STATUS.md
|
|
701
|
+
* @param reconciliations - Array of reconciliation entries from the verdict
|
|
702
|
+
* @returns Summary of changes applied
|
|
703
|
+
*/
|
|
704
|
+
export function applyStatusReconciliation(
|
|
705
|
+
statusPath: string,
|
|
706
|
+
reconciliations: StatusReconciliation[],
|
|
707
|
+
): ReconciliationResult {
|
|
708
|
+
const result: ReconciliationResult = {
|
|
709
|
+
changed: 0,
|
|
710
|
+
alreadyCorrect: 0,
|
|
711
|
+
unmatched: 0,
|
|
712
|
+
actions: [],
|
|
713
|
+
};
|
|
714
|
+
|
|
715
|
+
if (!reconciliations || reconciliations.length === 0) {
|
|
716
|
+
return result;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
let content: string;
|
|
720
|
+
try {
|
|
721
|
+
if (!existsSync(statusPath)) {
|
|
722
|
+
// No STATUS.md — mark all as unmatched
|
|
723
|
+
for (const r of reconciliations) {
|
|
724
|
+
result.unmatched++;
|
|
725
|
+
result.actions.push({
|
|
726
|
+
checkbox: r.checkbox,
|
|
727
|
+
outcome: "unmatched",
|
|
728
|
+
reason: "STATUS.md not found",
|
|
729
|
+
});
|
|
730
|
+
}
|
|
731
|
+
return result;
|
|
732
|
+
}
|
|
733
|
+
content = readFileSync(statusPath, "utf-8");
|
|
734
|
+
} catch {
|
|
735
|
+
for (const r of reconciliations) {
|
|
736
|
+
result.unmatched++;
|
|
737
|
+
result.actions.push({
|
|
738
|
+
checkbox: r.checkbox,
|
|
739
|
+
outcome: "unmatched",
|
|
740
|
+
reason: "STATUS.md unreadable",
|
|
741
|
+
});
|
|
742
|
+
}
|
|
743
|
+
return result;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
// Parse lines, identify checkbox lines with their indices
|
|
747
|
+
const lines = content.split("\n");
|
|
748
|
+
const checkboxRegex = /^(\s*-\s*\[)([ xX])(\]\s*)(.*)/;
|
|
749
|
+
|
|
750
|
+
// Track which line indices have been consumed by a reconciliation match
|
|
751
|
+
const consumed = new Set<number>();
|
|
752
|
+
|
|
753
|
+
for (const recon of reconciliations) {
|
|
754
|
+
const normalizedRecon = normalizeCheckboxText(recon.checkbox);
|
|
755
|
+
if (!normalizedRecon) {
|
|
756
|
+
result.unmatched++;
|
|
757
|
+
result.actions.push({
|
|
758
|
+
checkbox: recon.checkbox,
|
|
759
|
+
outcome: "unmatched",
|
|
760
|
+
reason: "Empty checkbox text after normalization",
|
|
761
|
+
});
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
// Find the best matching checkbox line (first unconsumed match)
|
|
766
|
+
let matchedIdx = -1;
|
|
767
|
+
for (let i = 0; i < lines.length; i++) {
|
|
768
|
+
if (consumed.has(i)) continue;
|
|
769
|
+
const cbMatch = lines[i].match(checkboxRegex);
|
|
770
|
+
if (!cbMatch) continue;
|
|
771
|
+
|
|
772
|
+
const lineText = normalizeCheckboxText(cbMatch[4]);
|
|
773
|
+
// Match if either contains the other (handles paraphrasing)
|
|
774
|
+
if (
|
|
775
|
+
lineText === normalizedRecon ||
|
|
776
|
+
lineText.includes(normalizedRecon) ||
|
|
777
|
+
normalizedRecon.includes(lineText)
|
|
778
|
+
) {
|
|
779
|
+
matchedIdx = i;
|
|
780
|
+
break;
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
if (matchedIdx === -1) {
|
|
785
|
+
result.unmatched++;
|
|
786
|
+
result.actions.push({
|
|
787
|
+
checkbox: recon.checkbox,
|
|
788
|
+
outcome: "unmatched",
|
|
789
|
+
reason: "No matching checkbox found in STATUS.md",
|
|
790
|
+
});
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
consumed.add(matchedIdx);
|
|
795
|
+
const cbMatch = lines[matchedIdx].match(checkboxRegex)!;
|
|
796
|
+
const currentlyChecked = cbMatch[2].toLowerCase() === "x";
|
|
797
|
+
const currentText = cbMatch[4];
|
|
798
|
+
|
|
799
|
+
// Determine desired state
|
|
800
|
+
const shouldBeChecked = recon.actualState === "done";
|
|
801
|
+
// partial → uncheck (conservative: don't claim done)
|
|
802
|
+
|
|
803
|
+
if (shouldBeChecked && currentlyChecked) {
|
|
804
|
+
// Already correct
|
|
805
|
+
result.alreadyCorrect++;
|
|
806
|
+
result.actions.push({
|
|
807
|
+
checkbox: recon.checkbox,
|
|
808
|
+
outcome: "no_change",
|
|
809
|
+
reason: "Already checked (done)",
|
|
810
|
+
});
|
|
811
|
+
} else if (!shouldBeChecked && !currentlyChecked) {
|
|
812
|
+
// Already correct (unchecked for not_done or partial)
|
|
813
|
+
// But if partial, might need annotation
|
|
814
|
+
if (recon.actualState === "partial" && !currentText.includes("(partial)")) {
|
|
815
|
+
// Add partial annotation
|
|
816
|
+
lines[matchedIdx] = `${cbMatch[1]} ${cbMatch[3]}${currentText} (partial)`;
|
|
817
|
+
result.changed++;
|
|
818
|
+
result.actions.push({
|
|
819
|
+
checkbox: recon.checkbox,
|
|
820
|
+
outcome: "unchecked",
|
|
821
|
+
reason: "Added (partial) annotation",
|
|
822
|
+
});
|
|
823
|
+
} else {
|
|
824
|
+
result.alreadyCorrect++;
|
|
825
|
+
result.actions.push({
|
|
826
|
+
checkbox: recon.checkbox,
|
|
827
|
+
outcome: "no_change",
|
|
828
|
+
reason: `Already unchecked (${recon.actualState})`,
|
|
829
|
+
});
|
|
830
|
+
}
|
|
831
|
+
} else if (shouldBeChecked && !currentlyChecked) {
|
|
832
|
+
// Need to check
|
|
833
|
+
lines[matchedIdx] = `${cbMatch[1]}x${cbMatch[3]}${currentText}`;
|
|
834
|
+
result.changed++;
|
|
835
|
+
result.actions.push({
|
|
836
|
+
checkbox: recon.checkbox,
|
|
837
|
+
outcome: "checked",
|
|
838
|
+
reason: "Work done but box was unchecked",
|
|
839
|
+
});
|
|
840
|
+
} else {
|
|
841
|
+
// currentlyChecked but should not be (not_done or partial)
|
|
842
|
+
const annotation = recon.actualState === "partial" ? " (partial)" : "";
|
|
843
|
+
const cleanText = currentText.replace(/\s*\(partial\)\s*$/, "");
|
|
844
|
+
lines[matchedIdx] = `${cbMatch[1]} ${cbMatch[3]}${cleanText}${annotation}`;
|
|
845
|
+
result.changed++;
|
|
846
|
+
const outcomeReason =
|
|
847
|
+
recon.actualState === "partial"
|
|
848
|
+
? "Unchecked — work partially done"
|
|
849
|
+
: "Unchecked — work not done";
|
|
850
|
+
result.actions.push({ checkbox: recon.checkbox, outcome: "unchecked", reason: outcomeReason });
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// Only rewrite if there were actual changes
|
|
855
|
+
if (result.changed > 0) {
|
|
856
|
+
try {
|
|
857
|
+
writeFileSync(statusPath, lines.join("\n"), "utf-8");
|
|
858
|
+
} catch {
|
|
859
|
+
// Write failed — downgrade changes to unmatched for accuracy
|
|
860
|
+
// (the in-memory result says "changed" but file wasn't updated)
|
|
861
|
+
for (const action of result.actions) {
|
|
862
|
+
if (action.outcome === "checked" || action.outcome === "unchecked") {
|
|
863
|
+
action.outcome = "unmatched";
|
|
864
|
+
action.reason += " (write failed)";
|
|
865
|
+
result.changed--;
|
|
866
|
+
result.unmatched++;
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
return result;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// ── Remediation: Feedback & Fix Agent Prompt ─────────────────────────
|
|
876
|
+
|
|
877
|
+
/** Path for the review feedback file written for the fix agent. */
|
|
878
|
+
export const FEEDBACK_FILENAME = "REVIEW_FEEDBACK.md";
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Generate a deterministic REVIEW_FEEDBACK.md from a NEEDS_FIXES verdict.
|
|
882
|
+
*
|
|
883
|
+
* Includes blocking findings based on the configured pass threshold:
|
|
884
|
+
* - `no_critical` / `no_important`: critical + important findings only
|
|
885
|
+
* - `all_clear`: critical + important + suggestion findings (all are blocking)
|
|
886
|
+
*
|
|
887
|
+
* The template is stable across runs so fix-agent prompts are reproducible.
|
|
888
|
+
*
|
|
889
|
+
* This file is intentionally staged as a task artifact (aligns with
|
|
890
|
+
* roadmap 5e: REVIEW_FEEDBACK.md is part of the review audit trail).
|
|
891
|
+
*
|
|
892
|
+
* @param verdict - The NEEDS_FIXES review verdict
|
|
893
|
+
* @param cycleNum - Current remediation cycle number (1-based)
|
|
894
|
+
* @param maxCycles - Maximum review cycles configured
|
|
895
|
+
* @param passThreshold - Configured pass threshold (determines which severities are blocking)
|
|
896
|
+
* @returns Markdown content for REVIEW_FEEDBACK.md
|
|
897
|
+
*/
|
|
898
|
+
export function generateFeedbackMd(
|
|
899
|
+
verdict: ReviewVerdict,
|
|
900
|
+
cycleNum: number,
|
|
901
|
+
maxCycles: number,
|
|
902
|
+
passThreshold: PassThreshold = "no_critical",
|
|
903
|
+
): string {
|
|
904
|
+
const criticals = verdict.findings.filter((f) => f.severity === "critical");
|
|
905
|
+
const importants = verdict.findings.filter((f) => f.severity === "important");
|
|
906
|
+
const suggestions = verdict.findings.filter((f) => f.severity === "suggestion");
|
|
907
|
+
const mismatches = verdict.statusReconciliation.filter((r) => r.actualState !== "done");
|
|
908
|
+
|
|
909
|
+
// Under all_clear, suggestions are also blocking
|
|
910
|
+
const includeSuggestions = passThreshold === "all_clear";
|
|
911
|
+
|
|
912
|
+
const blockingLabel = includeSuggestions
|
|
913
|
+
? "critical, important, and suggestion"
|
|
914
|
+
: "critical and important";
|
|
915
|
+
|
|
916
|
+
const lines: string[] = [
|
|
917
|
+
`# Review Feedback — Cycle ${cycleNum}/${maxCycles}`,
|
|
918
|
+
``,
|
|
919
|
+
`**Verdict:** NEEDS_FIXES`,
|
|
920
|
+
`**Confidence:** ${verdict.confidence}`,
|
|
921
|
+
`**Summary:** ${verdict.summary}`,
|
|
922
|
+
`**Pass Threshold:** \`${passThreshold}\``,
|
|
923
|
+
``,
|
|
924
|
+
`> This file was generated by the quality gate. Address all ${blockingLabel}`,
|
|
925
|
+
`> findings below, then the review will re-run automatically.`,
|
|
926
|
+
``,
|
|
927
|
+
];
|
|
928
|
+
|
|
929
|
+
if (criticals.length > 0) {
|
|
930
|
+
lines.push(`## Critical Findings (${criticals.length})`);
|
|
931
|
+
lines.push(``);
|
|
932
|
+
for (let i = 0; i < criticals.length; i++) {
|
|
933
|
+
const f = criticals[i];
|
|
934
|
+
lines.push(`### C${i + 1}: ${f.description}`);
|
|
935
|
+
lines.push(``);
|
|
936
|
+
lines.push(`- **Category:** ${f.category}`);
|
|
937
|
+
if (f.file) lines.push(`- **File:** \`${f.file}\``);
|
|
938
|
+
if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
|
|
939
|
+
lines.push(``);
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
if (importants.length > 0) {
|
|
944
|
+
lines.push(`## Important Findings (${importants.length})`);
|
|
945
|
+
lines.push(``);
|
|
946
|
+
for (let i = 0; i < importants.length; i++) {
|
|
947
|
+
const f = importants[i];
|
|
948
|
+
lines.push(`### I${i + 1}: ${f.description}`);
|
|
949
|
+
lines.push(``);
|
|
950
|
+
lines.push(`- **Category:** ${f.category}`);
|
|
951
|
+
if (f.file) lines.push(`- **File:** \`${f.file}\``);
|
|
952
|
+
if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
|
|
953
|
+
lines.push(``);
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
if (includeSuggestions && suggestions.length > 0) {
|
|
958
|
+
lines.push(`## Suggestion Findings (${suggestions.length})`);
|
|
959
|
+
lines.push(``);
|
|
960
|
+
lines.push(`> Under \`all_clear\` threshold, suggestions are also blocking.`);
|
|
961
|
+
lines.push(``);
|
|
962
|
+
for (let i = 0; i < suggestions.length; i++) {
|
|
963
|
+
const f = suggestions[i];
|
|
964
|
+
lines.push(`### S${i + 1}: ${f.description}`);
|
|
965
|
+
lines.push(``);
|
|
966
|
+
lines.push(`- **Category:** ${f.category}`);
|
|
967
|
+
if (f.file) lines.push(`- **File:** \`${f.file}\``);
|
|
968
|
+
if (f.remediation) lines.push(`- **Remediation:** ${f.remediation}`);
|
|
969
|
+
lines.push(``);
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
if (mismatches.length > 0) {
|
|
974
|
+
lines.push(`## STATUS.md Reconciliation Issues (${mismatches.length})`);
|
|
975
|
+
lines.push(``);
|
|
976
|
+
for (const r of mismatches) {
|
|
977
|
+
lines.push(`- **Checkbox:** ${r.checkbox}`);
|
|
978
|
+
lines.push(` - **Actual state:** ${r.actualState}`);
|
|
979
|
+
if (r.evidence) lines.push(` - **Evidence:** ${r.evidence}`);
|
|
980
|
+
}
|
|
981
|
+
lines.push(``);
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
const totalBlocking =
|
|
985
|
+
criticals.length +
|
|
986
|
+
importants.length +
|
|
987
|
+
(includeSuggestions ? suggestions.length : 0) +
|
|
988
|
+
mismatches.length;
|
|
989
|
+
|
|
990
|
+
if (totalBlocking === 0) {
|
|
991
|
+
lines.push(`## No blocking findings`);
|
|
992
|
+
lines.push(``);
|
|
993
|
+
lines.push(
|
|
994
|
+
`The review returned NEEDS_FIXES but no blocking findings were extracted for threshold \`${passThreshold}\`.`,
|
|
995
|
+
);
|
|
996
|
+
lines.push(
|
|
997
|
+
`This may indicate a threshold or verdict-rule mismatch. Review the REVIEW_VERDICT.json for details.`,
|
|
998
|
+
);
|
|
999
|
+
lines.push(``);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
return lines.join("\n");
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Build the prompt for the fix agent that addresses quality gate findings.
|
|
1007
|
+
*
|
|
1008
|
+
* The fix agent is spawned in the same worktree as the task and receives
|
|
1009
|
+
* the REVIEW_FEEDBACK.md content along with task context. It should make
|
|
1010
|
+
* targeted code fixes and commit them.
|
|
1011
|
+
*
|
|
1012
|
+
* @param context - Quality gate context (task folder, IDs, etc.)
|
|
1013
|
+
* @param feedbackContent - Content of REVIEW_FEEDBACK.md
|
|
1014
|
+
* @param cycleNum - Current fix cycle number
|
|
1015
|
+
* @returns Prompt string for the fix agent
|
|
1016
|
+
*/
|
|
1017
|
+
export function buildFixAgentPrompt(
|
|
1018
|
+
context: QualityGateContext,
|
|
1019
|
+
feedbackContent: string,
|
|
1020
|
+
cycleNum: number,
|
|
1021
|
+
): string {
|
|
1022
|
+
const statusPath = join(context.taskFolder, "STATUS.md");
|
|
1023
|
+
|
|
1024
|
+
let statusContent = "(STATUS.md not found)";
|
|
1025
|
+
try {
|
|
1026
|
+
if (existsSync(statusPath)) {
|
|
1027
|
+
statusContent = readFileSync(statusPath, "utf-8");
|
|
1028
|
+
}
|
|
1029
|
+
} catch {
|
|
1030
|
+
/* proceed without */
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
let promptContent = "(PROMPT.md not found)";
|
|
1034
|
+
try {
|
|
1035
|
+
if (existsSync(context.promptPath)) {
|
|
1036
|
+
promptContent = readFileSync(context.promptPath, "utf-8");
|
|
1037
|
+
}
|
|
1038
|
+
} catch {
|
|
1039
|
+
/* proceed without */
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
return [
|
|
1043
|
+
`# Quality Gate Remediation — Fix Cycle ${cycleNum}`,
|
|
1044
|
+
``,
|
|
1045
|
+
`You are a fix agent addressing quality gate findings for task **${context.taskId}**.`,
|
|
1046
|
+
``,
|
|
1047
|
+
`The quality gate review found issues that must be fixed before the task can be marked complete.`,
|
|
1048
|
+
`Your job is to make targeted, minimal fixes to address the critical and important findings below.`,
|
|
1049
|
+
``,
|
|
1050
|
+
`## Rules`,
|
|
1051
|
+
``,
|
|
1052
|
+
`1. **Read REVIEW_FEEDBACK.md** below — it lists the blocking findings with specific remediation instructions.`,
|
|
1053
|
+
`2. **Fix each finding** — make the minimal code change needed. Do NOT refactor unrelated code.`,
|
|
1054
|
+
`3. **Commit your fixes** with message: \`fix(${context.taskId}): address quality gate findings (cycle ${cycleNum})\``,
|
|
1055
|
+
`4. **Update STATUS.md** if any checkbox states were flagged as incorrect in the reconciliation section.`,
|
|
1056
|
+
`5. **Do NOT create .DONE** — the quality gate will re-run automatically after you exit.`,
|
|
1057
|
+
``,
|
|
1058
|
+
`## Task Context`,
|
|
1059
|
+
``,
|
|
1060
|
+
`- **Task folder:** ${context.taskFolder}/`,
|
|
1061
|
+
`- **PROMPT:** ${context.promptPath}`,
|
|
1062
|
+
`- **STATUS:** ${statusPath}`,
|
|
1063
|
+
``,
|
|
1064
|
+
`## Review Feedback`,
|
|
1065
|
+
``,
|
|
1066
|
+
`\`\`\`markdown`,
|
|
1067
|
+
feedbackContent,
|
|
1068
|
+
`\`\`\``,
|
|
1069
|
+
``,
|
|
1070
|
+
`## Original Task Requirements (PROMPT.md)`,
|
|
1071
|
+
``,
|
|
1072
|
+
`\`\`\`markdown`,
|
|
1073
|
+
promptContent,
|
|
1074
|
+
`\`\`\``,
|
|
1075
|
+
``,
|
|
1076
|
+
`## Current STATUS.md`,
|
|
1077
|
+
``,
|
|
1078
|
+
`\`\`\`markdown`,
|
|
1079
|
+
statusContent,
|
|
1080
|
+
`\`\`\``,
|
|
1081
|
+
``,
|
|
1082
|
+
`**IMPORTANT:** Focus only on fixing the blocking findings. Do not expand scope or create .DONE.`,
|
|
1083
|
+
``,
|
|
1084
|
+
].join("\n");
|
|
1085
|
+
}
|