@gethmy/agent 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/dist/board-helpers.d.ts +31 -0
- package/dist/board-helpers.js +150 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +2 -11761
- package/dist/completion.d.ts +14 -0
- package/dist/completion.js +142 -0
- package/dist/config.d.ts +23 -0
- package/dist/config.js +91 -0
- package/dist/git-pr.d.ts +25 -0
- package/dist/git-pr.js +305 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +169 -11730
- package/dist/log.d.ts +10 -0
- package/dist/log.js +35 -0
- package/dist/merge-monitor.d.ts +23 -0
- package/dist/merge-monitor.js +167 -0
- package/dist/pm.d.ts +14 -0
- package/dist/pm.js +63 -0
- package/dist/pool.d.ts +40 -0
- package/dist/pool.js +157 -0
- package/dist/progress-tracker.d.ts +64 -0
- package/dist/progress-tracker.js +361 -0
- package/dist/prompt.d.ts +5 -0
- package/dist/prompt.js +40 -0
- package/dist/queue.d.ts +37 -0
- package/dist/queue.js +96 -0
- package/dist/reconcile.d.ts +21 -0
- package/dist/reconcile.js +114 -0
- package/dist/review-completion.d.ts +31 -0
- package/dist/review-completion.js +253 -0
- package/dist/review-knowledge.d.ts +14 -0
- package/dist/review-knowledge.js +89 -0
- package/dist/review-prompt.d.ts +12 -0
- package/dist/review-prompt.js +103 -0
- package/dist/review-worker.d.ts +46 -0
- package/dist/review-worker.js +437 -0
- package/dist/review-worktree.d.ts +12 -0
- package/dist/review-worktree.js +83 -0
- package/dist/stream-parser.d.ts +31 -0
- package/dist/stream-parser.js +95 -0
- package/dist/types.d.ts +76 -0
- package/dist/types.js +56 -0
- package/dist/verification.d.ts +16 -0
- package/dist/verification.js +251 -0
- package/dist/watcher.d.ts +27 -0
- package/dist/watcher.js +74 -0
- package/dist/worker.d.ts +43 -0
- package/dist/worker.js +327 -0
- package/dist/worktree.d.ts +13 -0
- package/dist/worktree.js +115 -0
- package/package.json +8 -7
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
|
|
2
|
+
import type { Card } from "@harmony/shared";
|
|
3
|
+
import type { AgentConfig } from "./types.js";
|
|
4
|
+
export interface ReviewFinding {
|
|
5
|
+
severity: "critical" | "major" | "minor";
|
|
6
|
+
title: string;
|
|
7
|
+
description: string;
|
|
8
|
+
category?: string;
|
|
9
|
+
location?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface ScopeCheck {
|
|
12
|
+
status: "clean" | "drift" | "missing";
|
|
13
|
+
notes?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface ReviewResult {
|
|
16
|
+
verdict: "approved" | "rejected";
|
|
17
|
+
summary: string;
|
|
18
|
+
scopeCheck?: ScopeCheck;
|
|
19
|
+
findings: ReviewFinding[];
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Parse Claude's review output into a structured ReviewResult.
|
|
23
|
+
* Looks for a JSON block in the output.
|
|
24
|
+
*/
|
|
25
|
+
export declare function parseReviewOutput(stdout: string): ReviewResult;
|
|
26
|
+
/**
|
|
27
|
+
* Post-review completion pipeline.
|
|
28
|
+
* Handles approved/rejected verdicts, creates subtasks for findings,
|
|
29
|
+
* and moves the card to the appropriate column.
|
|
30
|
+
*/
|
|
31
|
+
export declare function runReviewCompletion(client: HarmonyApiClient, card: Card, result: ReviewResult, config: AgentConfig, worktreePath: string, branchName: string | null): Promise<void>;
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import { addLabelByName, moveCardToColumn } from "./board-helpers.js";
|
|
2
|
+
import { createPullRequest, detectGitProvider, pushBranch } from "./git-pr.js";
|
|
3
|
+
import { log } from "./log.js";
|
|
4
|
+
import { cleanupWorktree } from "./worktree.js";
|
|
5
|
+
const TAG = "review-completion";
|
|
6
|
+
const MAX_FINDINGS = 10;
|
|
7
|
+
const REVIEW_MARKER = "---\n**Review:";
|
|
8
|
+
/**
|
|
9
|
+
* Parse Claude's review output into a structured ReviewResult.
|
|
10
|
+
* Looks for a JSON block in the output.
|
|
11
|
+
*/
|
|
12
|
+
export function parseReviewOutput(stdout) {
|
|
13
|
+
// Try to find a JSON block in the output
|
|
14
|
+
const jsonMatch = stdout.match(/```json\s*([\s\S]*?)```/);
|
|
15
|
+
const rawJson = jsonMatch?.[1]?.trim() ?? stdout.trim();
|
|
16
|
+
try {
|
|
17
|
+
const parsed = JSON.parse(rawJson);
|
|
18
|
+
const verdict = parsed.verdict === "approved" || parsed.verdict === "rejected"
|
|
19
|
+
? parsed.verdict
|
|
20
|
+
: "rejected";
|
|
21
|
+
const findings = Array.isArray(parsed.findings)
|
|
22
|
+
? parsed.findings
|
|
23
|
+
.filter((f) => typeof f === "object" && f !== null && "title" in f)
|
|
24
|
+
.map((f) => ({
|
|
25
|
+
severity: f.severity === "critical"
|
|
26
|
+
? "critical"
|
|
27
|
+
: f.severity === "minor"
|
|
28
|
+
? "minor"
|
|
29
|
+
: "major",
|
|
30
|
+
title: String(f.title ?? "Untitled finding"),
|
|
31
|
+
description: String(f.description ?? ""),
|
|
32
|
+
category: f.category ? String(f.category) : undefined,
|
|
33
|
+
location: f.location ? String(f.location) : undefined,
|
|
34
|
+
}))
|
|
35
|
+
: [];
|
|
36
|
+
const scopeCheck = parsed.scopeCheck &&
|
|
37
|
+
typeof parsed.scopeCheck === "object" &&
|
|
38
|
+
"status" in parsed.scopeCheck
|
|
39
|
+
? {
|
|
40
|
+
status: ["clean", "drift", "missing"].includes(parsed.scopeCheck.status)
|
|
41
|
+
? parsed.scopeCheck.status
|
|
42
|
+
: "clean",
|
|
43
|
+
notes: parsed.scopeCheck.notes
|
|
44
|
+
? String(parsed.scopeCheck.notes)
|
|
45
|
+
: undefined,
|
|
46
|
+
}
|
|
47
|
+
: undefined;
|
|
48
|
+
return {
|
|
49
|
+
verdict,
|
|
50
|
+
summary: String(parsed.summary ?? "").slice(0, 2000),
|
|
51
|
+
scopeCheck,
|
|
52
|
+
findings,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// If we can't parse JSON, treat as rejection with the raw output as summary
|
|
57
|
+
log.warn(TAG, "Failed to parse review JSON output, treating as rejection");
|
|
58
|
+
return {
|
|
59
|
+
verdict: "rejected",
|
|
60
|
+
summary: stdout.slice(0, 500),
|
|
61
|
+
findings: [],
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Get the current review cycle count from card description.
|
|
67
|
+
* Looks for: Review cycle: N/M
|
|
68
|
+
*/
|
|
69
|
+
function getReviewCycle(description) {
|
|
70
|
+
if (!description)
|
|
71
|
+
return 0;
|
|
72
|
+
const match = description.match(/Review cycle:\s*(\d+)/);
|
|
73
|
+
return match ? parseInt(match[1], 10) : 0;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Update the review cycle marker in the card description.
|
|
77
|
+
*/
|
|
78
|
+
function updateReviewCycleMarker(description, cycle, maxCycles) {
|
|
79
|
+
const marker = `Review cycle: ${cycle}/${maxCycles}`;
|
|
80
|
+
const existing = description.match(/Review cycle:\s*\d+\/\d+/);
|
|
81
|
+
if (existing) {
|
|
82
|
+
return description.replace(/Review cycle:\s*\d+\/\d+/, marker);
|
|
83
|
+
}
|
|
84
|
+
return `${description}\n\n${marker}`;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Strip any previous review summary block from the description.
|
|
88
|
+
*/
|
|
89
|
+
function stripReviewSummary(description) {
|
|
90
|
+
const idx = description.indexOf(REVIEW_MARKER);
|
|
91
|
+
if (idx === -1)
|
|
92
|
+
return description;
|
|
93
|
+
return description.slice(0, idx).trimEnd();
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Post-review completion pipeline.
|
|
97
|
+
* Handles approved/rejected verdicts, creates subtasks for findings,
|
|
98
|
+
* and moves the card to the appropriate column.
|
|
99
|
+
*/
|
|
100
|
+
export async function runReviewCompletion(client, card, result, config, worktreePath, branchName) {
|
|
101
|
+
// Re-fetch card for fresh description (avoids stale data from enqueue time)
|
|
102
|
+
let freshDesc;
|
|
103
|
+
try {
|
|
104
|
+
const { card: fresh } = (await client.getCard(card.id));
|
|
105
|
+
freshDesc = fresh.description || "";
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
freshDesc = card.description || "";
|
|
109
|
+
}
|
|
110
|
+
const currentCycle = getReviewCycle(freshDesc) + 1;
|
|
111
|
+
const maxCycles = config.review.maxReviewCycles;
|
|
112
|
+
if (result.verdict === "approved") {
|
|
113
|
+
// Ensure branch is pushed (skip in local mode — no branch to push)
|
|
114
|
+
let prUrl = null;
|
|
115
|
+
if (branchName) {
|
|
116
|
+
pushBranch(branchName, worktreePath);
|
|
117
|
+
// Create PR if configured
|
|
118
|
+
if (config.review.createPR) {
|
|
119
|
+
const provider = detectGitProvider(worktreePath);
|
|
120
|
+
prUrl = createPullRequest(card, branchName, worktreePath, config, provider);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
// Add "Ready to Merge" label
|
|
124
|
+
await addLabelByName(client, card, config.review.approvedLabel, config.review.approvedLabelColor);
|
|
125
|
+
// Post approval summary (card stays in Review, strip stale cycle marker)
|
|
126
|
+
if (config.review.postFindings) {
|
|
127
|
+
const baseDesc = stripReviewSummary(freshDesc).replace(/\n\nReview cycle:\s*\d+\/\d+/, "");
|
|
128
|
+
const scopeLine = result.scopeCheck
|
|
129
|
+
? `\nScope: ${result.scopeCheck.status}${result.scopeCheck.notes ? ` — ${result.scopeCheck.notes}` : ""}`
|
|
130
|
+
: "";
|
|
131
|
+
const summaryParts = [
|
|
132
|
+
`\n\n${REVIEW_MARKER} Approved**`,
|
|
133
|
+
result.summary ? `\n${result.summary}` : "",
|
|
134
|
+
scopeLine,
|
|
135
|
+
result.findings.length > 0
|
|
136
|
+
? `\n${result.findings.length} minor finding(s) noted.`
|
|
137
|
+
: "",
|
|
138
|
+
];
|
|
139
|
+
if (prUrl) {
|
|
140
|
+
summaryParts.push(`\nPR: ${prUrl}`);
|
|
141
|
+
}
|
|
142
|
+
const summary = summaryParts.join("");
|
|
143
|
+
try {
|
|
144
|
+
await client.updateCard(card.id, { description: baseDesc + summary });
|
|
145
|
+
}
|
|
146
|
+
catch (err) {
|
|
147
|
+
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
await client.endAgentSession(card.id, {
|
|
151
|
+
status: "completed",
|
|
152
|
+
progressPercent: 100,
|
|
153
|
+
});
|
|
154
|
+
log.info(TAG, `#${card.short_id} approved${prUrl ? ` — PR: ${prUrl}` : ""} — labeled "${config.review.approvedLabel}"`);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
// Rejected
|
|
158
|
+
const criticalFindings = result.findings
|
|
159
|
+
.filter((f) => f.severity === "critical")
|
|
160
|
+
.slice(0, MAX_FINDINGS);
|
|
161
|
+
const majorFindings = result.findings
|
|
162
|
+
.filter((f) => f.severity === "major")
|
|
163
|
+
.slice(0, MAX_FINDINGS);
|
|
164
|
+
const linkedFindings = [...criticalFindings, ...majorFindings];
|
|
165
|
+
const minorFindings = result.findings
|
|
166
|
+
.filter((f) => f.severity === "minor")
|
|
167
|
+
.slice(0, MAX_FINDINGS);
|
|
168
|
+
// Check if we've exceeded max review cycles
|
|
169
|
+
if (currentCycle >= maxCycles) {
|
|
170
|
+
log.warn(TAG, `#${card.short_id} reached max review cycles (${maxCycles}), moving to Done with note`);
|
|
171
|
+
await moveCardToColumn(client, card, config.review.moveToColumn);
|
|
172
|
+
const baseDesc = stripReviewSummary(freshDesc);
|
|
173
|
+
const summary = [
|
|
174
|
+
`\n\n${REVIEW_MARKER} Needs human review**`,
|
|
175
|
+
`\nReached max review cycles (${maxCycles}). Please review manually.`,
|
|
176
|
+
result.summary ? `\n${result.summary}` : "",
|
|
177
|
+
].join("");
|
|
178
|
+
try {
|
|
179
|
+
await client.updateCard(card.id, { description: baseDesc + summary });
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
183
|
+
}
|
|
184
|
+
await client.endAgentSession(card.id, { status: "completed" });
|
|
185
|
+
if (branchName) {
|
|
186
|
+
cleanupWorktree(worktreePath, branchName);
|
|
187
|
+
}
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
// Post findings
|
|
191
|
+
if (config.review.postFindings) {
|
|
192
|
+
// Add critical + major findings as new linked cards (parallel)
|
|
193
|
+
await Promise.all(linkedFindings.map(async (finding) => {
|
|
194
|
+
try {
|
|
195
|
+
const locationLine = finding.location
|
|
196
|
+
? `\n**Location:** ${finding.location}`
|
|
197
|
+
: "";
|
|
198
|
+
const newCard = await client.createCard(card.project_id, {
|
|
199
|
+
title: `[Review: ${finding.severity}] ${finding.title}`,
|
|
200
|
+
description: `Found during review of #${card.short_id} (${finding.severity}):\n\n${finding.description}${locationLine}`,
|
|
201
|
+
});
|
|
202
|
+
const newCardId = newCard?.card?.id;
|
|
203
|
+
if (newCardId) {
|
|
204
|
+
await client.addLinkToCard(card.id, newCardId, "relates_to");
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
catch (err) {
|
|
208
|
+
log.error(TAG, `Failed to create finding card: ${err instanceof Error ? err.message : err}`);
|
|
209
|
+
}
|
|
210
|
+
}));
|
|
211
|
+
// Add minor findings as subtasks (parallel)
|
|
212
|
+
await Promise.all(minorFindings.map(async (finding) => {
|
|
213
|
+
try {
|
|
214
|
+
const title = finding.title.length > 120
|
|
215
|
+
? `${finding.title.slice(0, 117)}...`
|
|
216
|
+
: finding.title;
|
|
217
|
+
await client.createSubtask(card.id, title);
|
|
218
|
+
}
|
|
219
|
+
catch (err) {
|
|
220
|
+
log.error(TAG, `Failed to create subtask: ${err instanceof Error ? err.message : err}`);
|
|
221
|
+
}
|
|
222
|
+
}));
|
|
223
|
+
// Update description with review summary and cycle counter
|
|
224
|
+
const baseDesc = stripReviewSummary(freshDesc);
|
|
225
|
+
const updatedDesc = updateReviewCycleMarker(baseDesc, currentCycle, maxCycles);
|
|
226
|
+
const scopeLine = result.scopeCheck
|
|
227
|
+
? `\nScope: ${result.scopeCheck.status}${result.scopeCheck.notes ? ` — ${result.scopeCheck.notes}` : ""}`
|
|
228
|
+
: "";
|
|
229
|
+
const summary = [
|
|
230
|
+
`\n\n${REVIEW_MARKER} Rejected**`,
|
|
231
|
+
result.summary ? `\n${result.summary}` : "",
|
|
232
|
+
scopeLine,
|
|
233
|
+
`\n${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor finding(s).`,
|
|
234
|
+
].join("");
|
|
235
|
+
try {
|
|
236
|
+
await client.updateCard(card.id, {
|
|
237
|
+
description: updatedDesc + summary,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// Move back to failColumn (To Do) for re-implementation
|
|
245
|
+
await moveCardToColumn(client, card, config.review.failColumn);
|
|
246
|
+
await client.endAgentSession(card.id, { status: "paused" });
|
|
247
|
+
log.info(TAG, `#${card.short_id} rejected (cycle ${currentCycle}/${maxCycles}) — moved to "${config.review.failColumn}"`);
|
|
248
|
+
}
|
|
249
|
+
// Cleanup worktree (skip in local mode — no worktree to clean)
|
|
250
|
+
if (branchName) {
|
|
251
|
+
cleanupWorktree(worktreePath, branchName);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedded review and QA knowledge for the review worker.
|
|
3
|
+
* Condensed from the /review checklist and /qa skill.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Static system prompt with review methodology.
|
|
7
|
+
* Covers two-pass review categories, suppressions, and severity classification.
|
|
8
|
+
*/
|
|
9
|
+
export declare const REVIEW_SYSTEM_PROMPT = "You are a senior code reviewer. Follow this two-pass methodology strictly.\nReport findings; do NOT fix them. This is a read-only review.\n\n## Two-Pass Review\n\n### Pass 1 \u2014 CRITICAL (highest severity)\n\n**SQL & Data Safety**\n- String interpolation in SQL \u2014 use parameterized queries / prepared statements\n- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE\n\n**Race Conditions & Concurrency**\n- Read-check-write without uniqueness constraint or duplicate key handling\n- Status transitions without atomic WHERE old_status UPDATE SET new_status\n- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)\n\n**LLM Output Trust Boundary**\n- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())\n- Structured tool output accepted without type/shape checks before database writes\n\n**Enum & Value Completeness**\n- When the diff introduces a new enum/status/type value, trace it through every consumer\n- Check allowlists, filter arrays, and case/if-elsif chains for the new value\n- Use Grep to find all references to sibling values and Read each match \u2014 look OUTSIDE the diff\n\n### Pass 2 \u2014 INFORMATIONAL (lower severity)\n\n**Conditional Side Effects**\n- Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)\n\n**Dead Code & Consistency**\n- Variables assigned but never read\n- Comments/docstrings describing old behavior after code changed\n\n**Test Gaps**\n- Missing negative-path tests for new error handling\n- Security enforcement features without integration tests\n\n**Completeness Gaps**\n- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add\n\n**View/Frontend**\n- O(n*m) lookups in views (Array.find in a loop instead of Map/index)\n- Inline styles re-parsed every render\n\n## Severity Classification\n\n- **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors\n- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects\n- **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps\n\n## Suppressions \u2014 DO NOT flag these\n\n- Redundancy that aids readability (e.g., present? redundant with length > 20)\n- \"Add a comment explaining why this threshold was chosen\" \u2014 thresholds change, comments rot\n- Consistency-only changes (wrapping a value to match how another constant is guarded)\n- Regex edge cases when input is constrained and the edge case never occurs in practice\n- Eval threshold changes \u2014 these are tuned empirically\n- Harmless no-ops (e.g., .reject on an element never in the array)\n- ANYTHING already addressed in the diff you are reviewing \u2014 read the FULL diff before flagging";
|
|
10
|
+
/**
|
|
11
|
+
* Visual QA checklist for browser-based verification.
|
|
12
|
+
* Condensed from the /qa skill's per-page exploration checklist.
|
|
13
|
+
*/
|
|
14
|
+
export declare const QA_VISUAL_CHECKLIST = "## Visual QA Checklist\n\nFor each page affected by the changes:\n\n1. **Visual scan** \u2014 Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.\n2. **Interactive elements** \u2014 Click every button, link, and control. Does each do what it says?\n3. **Forms** \u2014 Fill and submit. Test empty submission, invalid data, edge cases.\n4. **Navigation** \u2014 Check all paths in/out. Breadcrumbs, back button, deep links.\n5. **States** \u2014 Check empty state, loading state, error state, overflow state.\n6. **Console** \u2014 Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.\n7. **Responsiveness** \u2014 If the change is visual, check mobile viewport (375px).\n\n### SPA-Specific (React/Vite)\n- Use snapshot for navigation \u2014 client-side routes may not appear in link lists.\n- Check for stale state: navigate away and back \u2014 does data refresh correctly?\n- Test browser back/forward \u2014 does the app handle history correctly?\n- Watch for hydration errors or layout shifts after dynamic content loads.";
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedded review and QA knowledge for the review worker.
|
|
3
|
+
* Condensed from the /review checklist and /qa skill.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Static system prompt with review methodology.
|
|
7
|
+
* Covers two-pass review categories, suppressions, and severity classification.
|
|
8
|
+
*/
|
|
9
|
+
export const REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
|
|
10
|
+
Report findings; do NOT fix them. This is a read-only review.
|
|
11
|
+
|
|
12
|
+
## Two-Pass Review
|
|
13
|
+
|
|
14
|
+
### Pass 1 — CRITICAL (highest severity)
|
|
15
|
+
|
|
16
|
+
**SQL & Data Safety**
|
|
17
|
+
- String interpolation in SQL — use parameterized queries / prepared statements
|
|
18
|
+
- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
|
|
19
|
+
|
|
20
|
+
**Race Conditions & Concurrency**
|
|
21
|
+
- Read-check-write without uniqueness constraint or duplicate key handling
|
|
22
|
+
- Status transitions without atomic WHERE old_status UPDATE SET new_status
|
|
23
|
+
- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
|
|
24
|
+
|
|
25
|
+
**LLM Output Trust Boundary**
|
|
26
|
+
- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
|
|
27
|
+
- Structured tool output accepted without type/shape checks before database writes
|
|
28
|
+
|
|
29
|
+
**Enum & Value Completeness**
|
|
30
|
+
- When the diff introduces a new enum/status/type value, trace it through every consumer
|
|
31
|
+
- Check allowlists, filter arrays, and case/if-elsif chains for the new value
|
|
32
|
+
- Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
|
|
33
|
+
|
|
34
|
+
### Pass 2 — INFORMATIONAL (lower severity)
|
|
35
|
+
|
|
36
|
+
**Conditional Side Effects**
|
|
37
|
+
- Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
|
|
38
|
+
|
|
39
|
+
**Dead Code & Consistency**
|
|
40
|
+
- Variables assigned but never read
|
|
41
|
+
- Comments/docstrings describing old behavior after code changed
|
|
42
|
+
|
|
43
|
+
**Test Gaps**
|
|
44
|
+
- Missing negative-path tests for new error handling
|
|
45
|
+
- Security enforcement features without integration tests
|
|
46
|
+
|
|
47
|
+
**Completeness Gaps**
|
|
48
|
+
- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
|
|
49
|
+
|
|
50
|
+
**View/Frontend**
|
|
51
|
+
- O(n*m) lookups in views (Array.find in a loop instead of Map/index)
|
|
52
|
+
- Inline styles re-parsed every render
|
|
53
|
+
|
|
54
|
+
## Severity Classification
|
|
55
|
+
|
|
56
|
+
- **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors
|
|
57
|
+
- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects
|
|
58
|
+
- **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps
|
|
59
|
+
|
|
60
|
+
## Suppressions — DO NOT flag these
|
|
61
|
+
|
|
62
|
+
- Redundancy that aids readability (e.g., present? redundant with length > 20)
|
|
63
|
+
- "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
|
|
64
|
+
- Consistency-only changes (wrapping a value to match how another constant is guarded)
|
|
65
|
+
- Regex edge cases when input is constrained and the edge case never occurs in practice
|
|
66
|
+
- Eval threshold changes — these are tuned empirically
|
|
67
|
+
- Harmless no-ops (e.g., .reject on an element never in the array)
|
|
68
|
+
- ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`;
|
|
69
|
+
/**
|
|
70
|
+
* Visual QA checklist for browser-based verification.
|
|
71
|
+
* Condensed from the /qa skill's per-page exploration checklist.
|
|
72
|
+
*/
|
|
73
|
+
export const QA_VISUAL_CHECKLIST = `## Visual QA Checklist
|
|
74
|
+
|
|
75
|
+
For each page affected by the changes:
|
|
76
|
+
|
|
77
|
+
1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
|
|
78
|
+
2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
|
|
79
|
+
3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
|
|
80
|
+
4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
|
|
81
|
+
5. **States** — Check empty state, loading state, error state, overflow state.
|
|
82
|
+
6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
|
|
83
|
+
7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
|
|
84
|
+
|
|
85
|
+
### SPA-Specific (React/Vite)
|
|
86
|
+
- Use snapshot for navigation — client-side routes may not appear in link lists.
|
|
87
|
+
- Check for stale state: navigate away and back — does data refresh correctly?
|
|
88
|
+
- Test browser back/forward — does the app handle history correctly?
|
|
89
|
+
- Watch for hydration errors or layout shifts after dynamic content loads.`;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { EnrichedCard } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Build the static system prompt for the review agent.
|
|
4
|
+
* Contains review methodology, checklist, and QA guidance.
|
|
5
|
+
* Passed via --append-system-prompt to Claude CLI.
|
|
6
|
+
*/
|
|
7
|
+
export declare function buildReviewSystemPrompt(): string;
|
|
8
|
+
/**
|
|
9
|
+
* Build the card-specific user prompt for the review agent.
|
|
10
|
+
* Contains the diff, requirements, and structured review steps.
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildReviewUserPrompt(enriched: EnrichedCard, branchName: string | null, worktreePath: string, previewUrl: string, diff: string, baseBranch: string): string;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { QA_VISUAL_CHECKLIST, REVIEW_SYSTEM_PROMPT, } from "./review-knowledge.js";
|
|
2
|
+
/**
|
|
3
|
+
* Build the static system prompt for the review agent.
|
|
4
|
+
* Contains review methodology, checklist, and QA guidance.
|
|
5
|
+
* Passed via --append-system-prompt to Claude CLI.
|
|
6
|
+
*/
|
|
7
|
+
export function buildReviewSystemPrompt() {
|
|
8
|
+
return `You are a code review agent. You review changes made by an implementation agent.
|
|
9
|
+
You are thorough, specific, and cite file:line locations for every finding.
|
|
10
|
+
|
|
11
|
+
${REVIEW_SYSTEM_PROMPT}
|
|
12
|
+
|
|
13
|
+
${QA_VISUAL_CHECKLIST}`;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Build the card-specific user prompt for the review agent.
|
|
17
|
+
* Contains the diff, requirements, and structured review steps.
|
|
18
|
+
*/
|
|
19
|
+
export function buildReviewUserPrompt(enriched, branchName, worktreePath, previewUrl, diff, baseBranch) {
|
|
20
|
+
const { card, labels, subtasks } = enriched;
|
|
21
|
+
const labelStr = labels.length > 0 ? labels.map((l) => l.name).join(", ") : "none";
|
|
22
|
+
const subtaskStr = subtasks.length > 0
|
|
23
|
+
? subtasks
|
|
24
|
+
.map((s) => `- [${s.completed ? "x" : " "}] ${s.title}`)
|
|
25
|
+
.join("\n")
|
|
26
|
+
: "No subtasks defined.";
|
|
27
|
+
const description = card.description?.trim() || "No description provided.";
|
|
28
|
+
const truncatedDiff = diff.length > 80_000
|
|
29
|
+
? `${diff.slice(0, 80_000)}\n\n... (diff truncated at 80K characters)`
|
|
30
|
+
: diff;
|
|
31
|
+
const branchLine = branchName
|
|
32
|
+
? `**Branch**: ${branchName}`
|
|
33
|
+
: `**Mode**: Local review (no branch — reviewing working tree changes)`;
|
|
34
|
+
return `## Card: #${card.short_id} - ${card.title}
|
|
35
|
+
**Labels**: ${labelStr}
|
|
36
|
+
${branchLine}
|
|
37
|
+
|
|
38
|
+
## Original Requirements
|
|
39
|
+
${description}
|
|
40
|
+
|
|
41
|
+
## Subtasks (Acceptance Criteria)
|
|
42
|
+
${subtaskStr}
|
|
43
|
+
|
|
44
|
+
## Diff ${branchName ? `(origin/${baseBranch}..HEAD)` : "(local changes)"}
|
|
45
|
+
\`\`\`diff
|
|
46
|
+
${truncatedDiff}
|
|
47
|
+
\`\`\`
|
|
48
|
+
|
|
49
|
+
## Review Steps
|
|
50
|
+
|
|
51
|
+
Follow these steps in order:
|
|
52
|
+
|
|
53
|
+
### Step 1: Scope Check
|
|
54
|
+
Compare the diff against the card description and subtasks above.
|
|
55
|
+
- Are all requirements from the description addressed?
|
|
56
|
+
- Are all subtasks implemented?
|
|
57
|
+
- Is there scope creep — changes unrelated to the card requirements?
|
|
58
|
+
Flag any missing requirements or scope drift.
|
|
59
|
+
|
|
60
|
+
### Step 2: Code Review (Two-Pass)
|
|
61
|
+
Apply the two-pass review from your system instructions:
|
|
62
|
+
- **Pass 1 (CRITICAL)**: SQL safety, race conditions, LLM trust boundary, enum completeness.
|
|
63
|
+
- **Pass 2 (INFORMATIONAL)**: Conditional side effects, dead code, test gaps, completeness gaps, view issues.
|
|
64
|
+
|
|
65
|
+
For enum completeness checks, use Grep and Read to trace new values through consumers OUTSIDE the diff.
|
|
66
|
+
|
|
67
|
+
### Step 3: Visual QA
|
|
68
|
+
Use the \`/browse\` skill to navigate to ${previewUrl} and apply the visual QA checklist:
|
|
69
|
+
- Verify the UI changes match the requirements
|
|
70
|
+
- Check interactive elements, forms, navigation
|
|
71
|
+
- Check console for JS errors and failed network requests
|
|
72
|
+
- Test on mobile viewport if the change is visual
|
|
73
|
+
|
|
74
|
+
### Step 4: Output
|
|
75
|
+
After completing all steps, output EXACTLY one JSON block (and nothing else after it):
|
|
76
|
+
|
|
77
|
+
\`\`\`json
|
|
78
|
+
{
|
|
79
|
+
"verdict": "approved" | "rejected",
|
|
80
|
+
"summary": "Brief overall assessment",
|
|
81
|
+
"scopeCheck": {
|
|
82
|
+
"status": "clean" | "drift" | "missing",
|
|
83
|
+
"notes": "Optional explanation of scope issues"
|
|
84
|
+
},
|
|
85
|
+
"findings": [
|
|
86
|
+
{
|
|
87
|
+
"severity": "critical" | "major" | "minor",
|
|
88
|
+
"category": "sql-safety | race-condition | llm-trust | enum-completeness | visual | functional | ux | console | scope | other",
|
|
89
|
+
"title": "Short title",
|
|
90
|
+
"description": "Detailed description of the issue",
|
|
91
|
+
"location": "file:line (if applicable)"
|
|
92
|
+
}
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
\`\`\`
|
|
96
|
+
|
|
97
|
+
**Decision rules:**
|
|
98
|
+
- **rejected**: Any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
|
|
99
|
+
- **approved**: No critical findings, at most 1 major finding with minor findings OK.
|
|
100
|
+
|
|
101
|
+
**Do NOT modify any code.** This is a read-only review.
|
|
102
|
+
${branchName ? `You are reviewing code in a git worktree at \`${worktreePath}\` on branch \`${branchName}\`.` : `You are reviewing local changes in the repository at \`${worktreePath}\`.`}`;
|
|
103
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { HarmonyApiClient } from "@gethmy/mcp/src/api-client.js";
|
|
2
|
+
import type { Card, Column, Label, Subtask } from "@harmony/shared";
|
|
3
|
+
import { type AgentConfig, type WorkerState } from "./types.js";
|
|
4
|
+
export declare class ReviewWorker {
|
|
5
|
+
private config;
|
|
6
|
+
private client;
|
|
7
|
+
private onDone;
|
|
8
|
+
id: number;
|
|
9
|
+
state: WorkerState;
|
|
10
|
+
cardId: string | null;
|
|
11
|
+
branchName: string | null;
|
|
12
|
+
worktreePath: string | null;
|
|
13
|
+
startedAt: number | null;
|
|
14
|
+
private process;
|
|
15
|
+
private devServerProcess;
|
|
16
|
+
private timeoutTimer;
|
|
17
|
+
private progressTracker;
|
|
18
|
+
private aborted;
|
|
19
|
+
constructor(id: number, config: AgentConfig, client: HarmonyApiClient, _userEmail: string, onDone: (worker: ReviewWorker) => void);
|
|
20
|
+
get tag(): string;
|
|
21
|
+
get isIdle(): boolean;
|
|
22
|
+
private get reviewPort();
|
|
23
|
+
get isActive(): boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Start reviewing a card. Runs the full lifecycle:
|
|
26
|
+
* PREPARING → REVIEWING → COMPLETING → IDLE
|
|
27
|
+
*/
|
|
28
|
+
run(card: Card, column: Column, labels: Label[], subtasks: Subtask[]): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Pause the current review by suspending the Claude process (SIGSTOP).
|
|
31
|
+
*/
|
|
32
|
+
pause(): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Resume the Claude process after a pause (SIGCONT).
|
|
35
|
+
*/
|
|
36
|
+
resume(): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Cancel the current review. Sends escalating signals to both processes.
|
|
39
|
+
*/
|
|
40
|
+
cancel(): Promise<void>;
|
|
41
|
+
private checkDevServer;
|
|
42
|
+
private spawnClaude;
|
|
43
|
+
private waitForExit;
|
|
44
|
+
private killDevServer;
|
|
45
|
+
private cleanup;
|
|
46
|
+
}
|