@gethmy/agent 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/cli.js +6376 -205
- package/dist/index.js +6206 -341
- package/package.json +2 -2
- package/dist/board-helpers.d.ts +0 -31
- package/dist/board-helpers.js +0 -150
- package/dist/budget.d.ts +0 -47
- package/dist/budget.js +0 -161
- package/dist/cli.d.ts +0 -16
- package/dist/completion.d.ts +0 -32
- package/dist/completion.js +0 -304
- package/dist/config-validation.d.ts +0 -23
- package/dist/config-validation.js +0 -77
- package/dist/config.d.ts +0 -23
- package/dist/config.js +0 -103
- package/dist/episode-writer.d.ts +0 -84
- package/dist/episode-writer.js +0 -232
- package/dist/git-pr.d.ts +0 -38
- package/dist/git-pr.js +0 -399
- package/dist/http-server.d.ts +0 -79
- package/dist/http-server.js +0 -114
- package/dist/index.d.ts +0 -5
- package/dist/log.d.ts +0 -34
- package/dist/log.js +0 -100
- package/dist/merge-monitor.d.ts +0 -23
- package/dist/merge-monitor.js +0 -169
- package/dist/pm.d.ts +0 -14
- package/dist/pm.js +0 -63
- package/dist/pool.d.ts +0 -70
- package/dist/pool.js +0 -258
- package/dist/process-group.d.ts +0 -26
- package/dist/process-group.js +0 -72
- package/dist/progress-tracker.d.ts +0 -79
- package/dist/progress-tracker.js +0 -442
- package/dist/prompt.d.ts +0 -18
- package/dist/prompt.js +0 -117
- package/dist/queue.d.ts +0 -39
- package/dist/queue.js +0 -100
- package/dist/reconcile.d.ts +0 -35
- package/dist/reconcile.js +0 -174
- package/dist/recovery.d.ts +0 -30
- package/dist/recovery.js +0 -141
- package/dist/review-completion.d.ts +0 -40
- package/dist/review-completion.js +0 -474
- package/dist/review-knowledge.d.ts +0 -14
- package/dist/review-knowledge.js +0 -89
- package/dist/review-prompt.d.ts +0 -12
- package/dist/review-prompt.js +0 -103
- package/dist/review-worker.d.ts +0 -56
- package/dist/review-worker.js +0 -638
- package/dist/review-worktree.d.ts +0 -12
- package/dist/review-worktree.js +0 -95
- package/dist/run-log.d.ts +0 -6
- package/dist/run-log.js +0 -19
- package/dist/startup-banner.d.ts +0 -29
- package/dist/startup-banner.js +0 -143
- package/dist/state-store.d.ts +0 -88
- package/dist/state-store.js +0 -239
- package/dist/stream-parser-selftest.d.ts +0 -9
- package/dist/stream-parser-selftest.js +0 -97
- package/dist/stream-parser.d.ts +0 -43
- package/dist/stream-parser.js +0 -174
- package/dist/transitions.d.ts +0 -57
- package/dist/transitions.js +0 -131
- package/dist/types.d.ts +0 -140
- package/dist/types.js +0 -79
- package/dist/verification.d.ts +0 -39
- package/dist/verification.js +0 -317
- package/dist/watcher.d.ts +0 -53
- package/dist/watcher.js +0 -153
- package/dist/worker.d.ts +0 -53
- package/dist/worker.js +0 -464
- package/dist/worktree-gc.d.ts +0 -67
- package/dist/worktree-gc.js +0 -245
- package/dist/worktree.d.ts +0 -18
- package/dist/worktree.js +0 -177
|
@@ -1,474 +0,0 @@
|
|
|
1
|
-
import { readFileSync, statSync } from "node:fs";
|
|
2
|
-
import { addLabelByName, moveCardToColumn } from "./board-helpers.js";
|
|
3
|
-
import { buildTokenPayload } from "./completion.js";
|
|
4
|
-
import { backfillReviewVerdict, findLatestImplementEpisode, writeEpisode, } from "./episode-writer.js";
|
|
5
|
-
import { createPullRequest, detectGitProvider, getBranchWebUrl, pushBranch, renameRemoteBranch, } from "./git-pr.js";
|
|
6
|
-
import { log } from "./log.js";
|
|
7
|
-
import { NEED_REVIEW_LABEL, NEED_REVIEW_LABEL_COLOR, } from "./types.js";
|
|
8
|
-
import { cleanupWorktree } from "./worktree.js";
|
|
9
|
-
const TAG = "review-completion";
|
|
10
|
-
const MAX_FINDINGS = 10;
|
|
11
|
-
const REVIEW_MARKER = "---\n**Review:";
|
|
12
|
-
const RUN_LOG_TAIL_BYTES = 2048;
|
|
13
|
-
/**
|
|
14
|
-
* Read the last N bytes of a file as UTF-8. Returns null on any IO failure —
|
|
15
|
-
* the parse-error surfacing is best-effort diagnostic, it must not throw.
|
|
16
|
-
*/
|
|
17
|
-
function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
|
|
18
|
-
try {
|
|
19
|
-
const size = statSync(path).size;
|
|
20
|
-
if (size === 0)
|
|
21
|
-
return null;
|
|
22
|
-
const start = Math.max(0, size - bytes);
|
|
23
|
-
const buf = readFileSync(path);
|
|
24
|
-
return buf.subarray(start).toString("utf-8");
|
|
25
|
-
}
|
|
26
|
-
catch {
|
|
27
|
-
return null;
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Extract structured fields from a parsed JSON object into a ReviewResult.
|
|
32
|
-
*/
|
|
33
|
-
function extractResult(parsed) {
|
|
34
|
-
const verdict = parsed.verdict === "approved" || parsed.verdict === "rejected"
|
|
35
|
-
? parsed.verdict
|
|
36
|
-
: "rejected";
|
|
37
|
-
const findings = Array.isArray(parsed.findings)
|
|
38
|
-
? parsed.findings
|
|
39
|
-
.filter((f) => typeof f === "object" && f !== null && "title" in f)
|
|
40
|
-
.map((f) => ({
|
|
41
|
-
severity: f.severity === "critical"
|
|
42
|
-
? "critical"
|
|
43
|
-
: f.severity === "minor"
|
|
44
|
-
? "minor"
|
|
45
|
-
: "major",
|
|
46
|
-
title: String(f.title ?? "Untitled finding"),
|
|
47
|
-
description: String(f.description ?? ""),
|
|
48
|
-
category: f.category ? String(f.category) : undefined,
|
|
49
|
-
location: f.location ? String(f.location) : undefined,
|
|
50
|
-
}))
|
|
51
|
-
: [];
|
|
52
|
-
const scopeCheck = parsed.scopeCheck &&
|
|
53
|
-
typeof parsed.scopeCheck === "object" &&
|
|
54
|
-
"status" in parsed.scopeCheck
|
|
55
|
-
? {
|
|
56
|
-
status: ["clean", "drift", "missing"].includes(parsed.scopeCheck.status)
|
|
57
|
-
? parsed.scopeCheck.status
|
|
58
|
-
: "clean",
|
|
59
|
-
notes: parsed.scopeCheck.notes
|
|
60
|
-
? String(parsed.scopeCheck.notes)
|
|
61
|
-
: undefined,
|
|
62
|
-
}
|
|
63
|
-
: undefined;
|
|
64
|
-
return {
|
|
65
|
-
verdict,
|
|
66
|
-
summary: String(parsed.summary ?? "").slice(0, 2000),
|
|
67
|
-
scopeCheck,
|
|
68
|
-
findings,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Parse Claude's review output into a structured ReviewResult.
|
|
73
|
-
*
|
|
74
|
-
* Tries multiple extraction strategies in order:
|
|
75
|
-
* 1. ```json ... ``` fenced block (what the prompt asks for)
|
|
76
|
-
* 2. Any top-level JSON object containing a "verdict" key (last-wins)
|
|
77
|
-
* 3. Regex for a bare `"verdict": "approved|rejected"` anywhere — lossy
|
|
78
|
-
* but keeps the pipeline moving
|
|
79
|
-
* 4. Falls back to verdict: "error" — keeps card in Review instead of
|
|
80
|
-
* bouncing it to To Do for a parse failure that isn't a code quality signal.
|
|
81
|
-
*/
|
|
82
|
-
export function parseReviewOutput(stdout) {
|
|
83
|
-
// Strategy 1: fenced ```json block (greedy-last to handle multiple blocks)
|
|
84
|
-
const fencedBlocks = [...stdout.matchAll(/```json\s*([\s\S]*?)```/g)];
|
|
85
|
-
for (let i = fencedBlocks.length - 1; i >= 0; i--) {
|
|
86
|
-
const raw = fencedBlocks[i][1].trim();
|
|
87
|
-
try {
|
|
88
|
-
const parsed = JSON.parse(raw);
|
|
89
|
-
if (parsed && typeof parsed === "object" && "verdict" in parsed) {
|
|
90
|
-
log.debug(TAG, "Parsed review output from fenced JSON block");
|
|
91
|
-
return extractResult(parsed);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
catch {
|
|
95
|
-
// try next block
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
// Strategy 2: scan every top-level { ... } block and take the last one
|
|
99
|
-
// that parses AND contains "verdict". This handles cases where the output
|
|
100
|
-
// has multiple stray braces before the real JSON object.
|
|
101
|
-
const candidates = [];
|
|
102
|
-
let depth = 0;
|
|
103
|
-
let start = -1;
|
|
104
|
-
for (let i = 0; i < stdout.length; i++) {
|
|
105
|
-
const ch = stdout[i];
|
|
106
|
-
if (ch === "{") {
|
|
107
|
-
if (depth === 0)
|
|
108
|
-
start = i;
|
|
109
|
-
depth++;
|
|
110
|
-
}
|
|
111
|
-
else if (ch === "}") {
|
|
112
|
-
depth--;
|
|
113
|
-
if (depth === 0 && start !== -1) {
|
|
114
|
-
candidates.push(stdout.slice(start, i + 1));
|
|
115
|
-
start = -1;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
for (let i = candidates.length - 1; i >= 0; i--) {
|
|
120
|
-
try {
|
|
121
|
-
const parsed = JSON.parse(candidates[i]);
|
|
122
|
-
if (parsed && typeof parsed === "object" && "verdict" in parsed) {
|
|
123
|
-
log.debug(TAG, "Parsed review output from raw JSON object");
|
|
124
|
-
return extractResult(parsed);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
catch {
|
|
128
|
-
// try next
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
// Strategy 3: regex for a bare verdict declaration anywhere in the output.
|
|
132
|
-
// Loses findings/summary but preserves approve/reject signal so the pipeline
|
|
133
|
-
// can make progress instead of looping on "error".
|
|
134
|
-
const verdictMatch = stdout.match(/"verdict"\s*:\s*"(approved|rejected)"/i);
|
|
135
|
-
if (verdictMatch) {
|
|
136
|
-
log.warn(TAG, `Parsed verdict via regex fallback — findings lost (${verdictMatch[1]})`);
|
|
137
|
-
return {
|
|
138
|
-
verdict: verdictMatch[1].toLowerCase(),
|
|
139
|
-
summary: "Parsed via regex fallback — original JSON was malformed. Check run log.",
|
|
140
|
-
findings: [],
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
// Strategy 4: nothing parseable — return error verdict so the card stays in Review
|
|
144
|
-
log.warn(TAG, "Failed to parse review JSON output — returning error verdict (card stays in Review)");
|
|
145
|
-
return {
|
|
146
|
-
verdict: "error",
|
|
147
|
-
summary: stdout.slice(0, 500),
|
|
148
|
-
findings: [],
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Get the current review cycle count from card description.
|
|
153
|
-
* Looks for: Review cycle: N/M
|
|
154
|
-
*/
|
|
155
|
-
function getReviewCycle(description) {
|
|
156
|
-
if (!description)
|
|
157
|
-
return 0;
|
|
158
|
-
const match = description.match(/Review cycle:\s*(\d+)/);
|
|
159
|
-
return match ? parseInt(match[1], 10) : 0;
|
|
160
|
-
}
|
|
161
|
-
/**
|
|
162
|
-
* Update the review cycle marker in the card description.
|
|
163
|
-
*/
|
|
164
|
-
function updateReviewCycleMarker(description, cycle, maxCycles) {
|
|
165
|
-
const marker = `Review cycle: ${cycle}/${maxCycles}`;
|
|
166
|
-
const existing = description.match(/Review cycle:\s*\d+\/\d+/);
|
|
167
|
-
if (existing) {
|
|
168
|
-
return description.replace(/Review cycle:\s*\d+\/\d+/, marker);
|
|
169
|
-
}
|
|
170
|
-
return `${description}\n\n${marker}`;
|
|
171
|
-
}
|
|
172
|
-
/**
|
|
173
|
-
* Strip any previous review summary block from the description.
|
|
174
|
-
*/
|
|
175
|
-
function stripReviewSummary(description) {
|
|
176
|
-
const idx = description.indexOf(REVIEW_MARKER);
|
|
177
|
-
if (idx === -1)
|
|
178
|
-
return description;
|
|
179
|
-
return description.slice(0, idx).trimEnd();
|
|
180
|
-
}
|
|
181
|
-
/**
|
|
182
|
-
* Post-review completion pipeline.
|
|
183
|
-
* Handles approved/rejected verdicts, creates subtasks for findings,
|
|
184
|
-
* and moves the card to the appropriate column.
|
|
185
|
-
*/
|
|
186
|
-
export async function runReviewCompletion(client, card, result, config, worktreePath, branchName, sessionStats, runLogPath, workspaceId, agentSessionId, stateStore) {
|
|
187
|
-
// Re-fetch card for fresh description (avoids stale data from enqueue time)
|
|
188
|
-
let freshDesc;
|
|
189
|
-
try {
|
|
190
|
-
const { card: fresh } = (await client.getCard(card.id));
|
|
191
|
-
freshDesc = fresh.description || "";
|
|
192
|
-
}
|
|
193
|
-
catch {
|
|
194
|
-
freshDesc = card.description || "";
|
|
195
|
-
}
|
|
196
|
-
const currentCycle = getReviewCycle(freshDesc) + 1;
|
|
197
|
-
const maxCycles = config.review.maxReviewCycles;
|
|
198
|
-
if (result.verdict === "error") {
|
|
199
|
-
// Parse failure — not a code quality signal. Keep card in Review and
|
|
200
|
-
// add the "Need Review" label so reconcile stops re-enqueueing it.
|
|
201
|
-
// Without the label, the reconcile loop would respawn the review every
|
|
202
|
-
// cycle and burn budget on the same unparseable output (see #122).
|
|
203
|
-
log.warn(TAG, `#${card.short_id} review output unparseable — labelling "${NEED_REVIEW_LABEL}" for manual inspection`);
|
|
204
|
-
try {
|
|
205
|
-
await addLabelByName(client, card, NEED_REVIEW_LABEL, NEED_REVIEW_LABEL_COLOR);
|
|
206
|
-
}
|
|
207
|
-
catch (err) {
|
|
208
|
-
log.warn(TAG, `Failed to add "${NEED_REVIEW_LABEL}" label: ${err instanceof Error ? err.message : err}`);
|
|
209
|
-
}
|
|
210
|
-
if (config.review.postFindings) {
|
|
211
|
-
const baseDesc = stripReviewSummary(freshDesc);
|
|
212
|
-
const rawTail = runLogPath ? tailRunLog(runLogPath) : null;
|
|
213
|
-
// Log content routinely contains ```json fences from Claude's own
|
|
214
|
-
// output; embedding it inside a 3-backtick fence would break the card's
|
|
215
|
-
// markdown. Use a 4-backtick fence and downgrade any 4+-backtick runs.
|
|
216
|
-
const runLogTail = rawTail
|
|
217
|
-
? rawTail.replace(/`{4,}/g, (_m) => "`".repeat(3))
|
|
218
|
-
: null;
|
|
219
|
-
const runLogHint = runLogPath
|
|
220
|
-
? `\nRun log: \`${runLogPath}\``
|
|
221
|
-
: "\nRun log: (not captured)";
|
|
222
|
-
const summary = [
|
|
223
|
-
`\n\n${REVIEW_MARKER} Parse error**`,
|
|
224
|
-
'\nThe review agent\'s output could not be parsed. Card stays in Review with the "Need Review" label — inspect the run log below to diagnose.',
|
|
225
|
-
runLogHint,
|
|
226
|
-
result.summary ? `\n\nRaw output (truncated):\n${result.summary}` : "",
|
|
227
|
-
runLogTail
|
|
228
|
-
? `\n\nRun log tail (last ${RUN_LOG_TAIL_BYTES}B):\n\`\`\`\`\n${runLogTail}\n\`\`\`\``
|
|
229
|
-
: "",
|
|
230
|
-
].join("");
|
|
231
|
-
try {
|
|
232
|
-
await client.updateCard(card.id, { description: baseDesc + summary });
|
|
233
|
-
}
|
|
234
|
-
catch (err) {
|
|
235
|
-
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
await client.endAgentSession(card.id, {
|
|
239
|
-
status: "paused",
|
|
240
|
-
...buildTokenPayload(sessionStats),
|
|
241
|
-
});
|
|
242
|
-
// Cleanup worktree but do NOT move the card
|
|
243
|
-
if (branchName) {
|
|
244
|
-
cleanupWorktree(worktreePath, branchName);
|
|
245
|
-
}
|
|
246
|
-
return;
|
|
247
|
-
}
|
|
248
|
-
if (result.verdict === "approved") {
|
|
249
|
-
// Ensure branch is pushed (skip in local mode — no branch to push)
|
|
250
|
-
let prUrl = null;
|
|
251
|
-
let approvedBranch = branchName;
|
|
252
|
-
if (branchName) {
|
|
253
|
-
pushBranch(branchName, worktreePath);
|
|
254
|
-
// Graduate the branch from `agent-attempts/*` to `agent/*` so the
|
|
255
|
-
// approved PR opens on a clean ref. Renaming on origin is force-with-
|
|
256
|
-
// lease + delete-old; the old ref is the same SHA, so no work is lost.
|
|
257
|
-
const failedPrefix = config.worktree.failedBranchPrefix;
|
|
258
|
-
const approvedPrefix = config.worktree.approvedBranchPrefix;
|
|
259
|
-
if (failedPrefix &&
|
|
260
|
-
approvedPrefix &&
|
|
261
|
-
branchName.startsWith(failedPrefix)) {
|
|
262
|
-
const newRef = `${approvedPrefix}${branchName.slice(failedPrefix.length)}`;
|
|
263
|
-
try {
|
|
264
|
-
renameRemoteBranch(branchName, newRef, worktreePath);
|
|
265
|
-
approvedBranch = newRef;
|
|
266
|
-
}
|
|
267
|
-
catch (err) {
|
|
268
|
-
log.warn(TAG, `Branch rename failed (continuing on ${branchName}): ${err instanceof Error ? err.message : err}`);
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
// Create PR if configured
|
|
272
|
-
if (config.review.createPR && approvedBranch) {
|
|
273
|
-
const provider = detectGitProvider(worktreePath);
|
|
274
|
-
prUrl = createPullRequest(card, approvedBranch, worktreePath, config, provider);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
// Add "Ready to Merge" label
|
|
278
|
-
await addLabelByName(client, card, config.review.approvedLabel, config.review.approvedLabelColor);
|
|
279
|
-
// Post approval summary (card stays in Review, strip stale cycle marker)
|
|
280
|
-
if (config.review.postFindings) {
|
|
281
|
-
const baseDesc = stripReviewSummary(freshDesc).replace(/\n\nReview cycle:\s*\d+\/\d+/, "");
|
|
282
|
-
const scopeLine = result.scopeCheck
|
|
283
|
-
? `\nScope: ${result.scopeCheck.status}${result.scopeCheck.notes ? ` — ${result.scopeCheck.notes}` : ""}`
|
|
284
|
-
: "";
|
|
285
|
-
const summaryParts = [
|
|
286
|
-
`\n\n${REVIEW_MARKER} Approved**`,
|
|
287
|
-
result.summary ? `\n${result.summary}` : "",
|
|
288
|
-
scopeLine,
|
|
289
|
-
result.findings.length > 0
|
|
290
|
-
? `\n${result.findings.length} minor finding(s) noted.`
|
|
291
|
-
: "",
|
|
292
|
-
];
|
|
293
|
-
if (prUrl) {
|
|
294
|
-
summaryParts.push(`\nPR: ${prUrl}`);
|
|
295
|
-
}
|
|
296
|
-
const summary = summaryParts.join("");
|
|
297
|
-
try {
|
|
298
|
-
await client.updateCard(card.id, { description: baseDesc + summary });
|
|
299
|
-
}
|
|
300
|
-
catch (err) {
|
|
301
|
-
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
await client.endAgentSession(card.id, {
|
|
305
|
-
status: "completed",
|
|
306
|
-
progressPercent: 100,
|
|
307
|
-
...buildTokenPayload(sessionStats),
|
|
308
|
-
});
|
|
309
|
-
log.info(TAG, `#${card.short_id} approved${prUrl ? ` — PR: ${prUrl}` : ""} — labeled "${config.review.approvedLabel}"`);
|
|
310
|
-
}
|
|
311
|
-
else {
|
|
312
|
-
// Rejected
|
|
313
|
-
const criticalFindings = result.findings
|
|
314
|
-
.filter((f) => f.severity === "critical")
|
|
315
|
-
.slice(0, MAX_FINDINGS);
|
|
316
|
-
const majorFindings = result.findings
|
|
317
|
-
.filter((f) => f.severity === "major")
|
|
318
|
-
.slice(0, MAX_FINDINGS);
|
|
319
|
-
const linkedFindings = [...criticalFindings, ...majorFindings];
|
|
320
|
-
const minorFindings = result.findings
|
|
321
|
-
.filter((f) => f.severity === "minor")
|
|
322
|
-
.slice(0, MAX_FINDINGS);
|
|
323
|
-
// Check if we've exceeded max review cycles
|
|
324
|
-
if (currentCycle >= maxCycles) {
|
|
325
|
-
log.warn(TAG, `#${card.short_id} reached max review cycles (${maxCycles}), moving to Done with note`);
|
|
326
|
-
await moveCardToColumn(client, card, config.review.moveToColumn);
|
|
327
|
-
const baseDesc = stripReviewSummary(freshDesc);
|
|
328
|
-
const summary = [
|
|
329
|
-
`\n\n${REVIEW_MARKER} Needs human review**`,
|
|
330
|
-
`\nReached max review cycles (${maxCycles}). Please review manually.`,
|
|
331
|
-
result.summary ? `\n${result.summary}` : "",
|
|
332
|
-
].join("");
|
|
333
|
-
try {
|
|
334
|
-
await client.updateCard(card.id, { description: baseDesc + summary });
|
|
335
|
-
}
|
|
336
|
-
catch (err) {
|
|
337
|
-
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
338
|
-
}
|
|
339
|
-
await client.endAgentSession(card.id, {
|
|
340
|
-
status: "completed",
|
|
341
|
-
...buildTokenPayload(sessionStats),
|
|
342
|
-
});
|
|
343
|
-
// Max-cycles rejection: the verdict still teaches "this approach kept
|
|
344
|
-
// failing review" — write the episode + back-fill before exiting.
|
|
345
|
-
if (workspaceId) {
|
|
346
|
-
const origId = await findLatestImplementEpisode(client, workspaceId, card.project_id, card.short_id);
|
|
347
|
-
const reviewId = await writeEpisode(client, {
|
|
348
|
-
kind: "review",
|
|
349
|
-
card,
|
|
350
|
-
workspaceId,
|
|
351
|
-
verdict: "rejected",
|
|
352
|
-
summary: `Reached max review cycles (${maxCycles}). ${result.summary}`,
|
|
353
|
-
cost: sessionStats?.cost ?? null,
|
|
354
|
-
agentSessionId: agentSessionId ?? null,
|
|
355
|
-
originalEpisodeId: origId,
|
|
356
|
-
});
|
|
357
|
-
if (origId) {
|
|
358
|
-
await backfillReviewVerdict(client, origId, "rejected", reviewId);
|
|
359
|
-
}
|
|
360
|
-
}
|
|
361
|
-
if (branchName) {
|
|
362
|
-
cleanupWorktree(worktreePath, branchName);
|
|
363
|
-
}
|
|
364
|
-
return;
|
|
365
|
-
}
|
|
366
|
-
// Post findings
|
|
367
|
-
if (config.review.postFindings) {
|
|
368
|
-
// Add critical + major findings as new linked cards (parallel)
|
|
369
|
-
await Promise.all(linkedFindings.map(async (finding) => {
|
|
370
|
-
try {
|
|
371
|
-
const locationLine = finding.location
|
|
372
|
-
? `\n**Location:** ${finding.location}`
|
|
373
|
-
: "";
|
|
374
|
-
const newCard = await client.createCard(card.project_id, {
|
|
375
|
-
title: `[Review: ${finding.severity}] ${finding.title}`,
|
|
376
|
-
description: `Found during review of #${card.short_id} (${finding.severity}):\n\n${finding.description}${locationLine}`,
|
|
377
|
-
});
|
|
378
|
-
const newCardId = newCard?.card?.id;
|
|
379
|
-
if (newCardId) {
|
|
380
|
-
await client.addLinkToCard(card.id, newCardId, "relates_to");
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
catch (err) {
|
|
384
|
-
log.error(TAG, `Failed to create finding card: ${err instanceof Error ? err.message : err}`);
|
|
385
|
-
}
|
|
386
|
-
}));
|
|
387
|
-
// Add minor findings as subtasks (parallel)
|
|
388
|
-
await Promise.all(minorFindings.map(async (finding) => {
|
|
389
|
-
try {
|
|
390
|
-
const title = finding.title.length > 120
|
|
391
|
-
? `${finding.title.slice(0, 117)}...`
|
|
392
|
-
: finding.title;
|
|
393
|
-
await client.createSubtask(card.id, title);
|
|
394
|
-
}
|
|
395
|
-
catch (err) {
|
|
396
|
-
log.error(TAG, `Failed to create subtask: ${err instanceof Error ? err.message : err}`);
|
|
397
|
-
}
|
|
398
|
-
}));
|
|
399
|
-
// Update description with review summary and cycle counter
|
|
400
|
-
const baseDesc = stripReviewSummary(freshDesc);
|
|
401
|
-
const updatedDesc = updateReviewCycleMarker(baseDesc, currentCycle, maxCycles);
|
|
402
|
-
const scopeLine = result.scopeCheck
|
|
403
|
-
? `\nScope: ${result.scopeCheck.status}${result.scopeCheck.notes ? ` — ${result.scopeCheck.notes}` : ""}`
|
|
404
|
-
: "";
|
|
405
|
-
const summary = [
|
|
406
|
-
`\n\n${REVIEW_MARKER} Rejected**`,
|
|
407
|
-
result.summary ? `\n${result.summary}` : "",
|
|
408
|
-
scopeLine,
|
|
409
|
-
`\n${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor finding(s).`,
|
|
410
|
-
].join("");
|
|
411
|
-
try {
|
|
412
|
-
await client.updateCard(card.id, {
|
|
413
|
-
description: updatedDesc + summary,
|
|
414
|
-
});
|
|
415
|
-
}
|
|
416
|
-
catch (err) {
|
|
417
|
-
log.error(TAG, `Failed to update description: ${err instanceof Error ? err.message : err}`);
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
// Move back to failColumn (To Do) for re-implementation
|
|
421
|
-
await moveCardToColumn(client, card, config.review.failColumn);
|
|
422
|
-
const failureSummary = `Review rejected (cycle ${currentCycle}/${maxCycles}): ${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor`;
|
|
423
|
-
const recoveryBranch = branchName ?? undefined;
|
|
424
|
-
const recoveryUrl = branchName
|
|
425
|
-
? getBranchWebUrl(branchName, worktreePath)
|
|
426
|
-
: null;
|
|
427
|
-
try {
|
|
428
|
-
await stateStore.recordFailureSummary(card.id, {
|
|
429
|
-
summary: failureSummary,
|
|
430
|
-
reason: "review",
|
|
431
|
-
recoveryBranch,
|
|
432
|
-
});
|
|
433
|
-
}
|
|
434
|
-
catch (err) {
|
|
435
|
-
log.debug(TAG, `recordFailureSummary failed: ${err instanceof Error ? err.message : err}`);
|
|
436
|
-
}
|
|
437
|
-
if (recoveryBranch) {
|
|
438
|
-
log.info(TAG, `#${card.short_id} recovery branch ${recoveryBranch}${recoveryUrl ? ` (${recoveryUrl})` : ""}`);
|
|
439
|
-
}
|
|
440
|
-
await client.endAgentSession(card.id, {
|
|
441
|
-
status: "failed",
|
|
442
|
-
failureReason: "review",
|
|
443
|
-
failureSummary,
|
|
444
|
-
recoveryBranch,
|
|
445
|
-
...buildTokenPayload(sessionStats),
|
|
446
|
-
});
|
|
447
|
-
log.info(TAG, `#${card.short_id} rejected (cycle ${currentCycle}/${maxCycles}) — moved to "${config.review.failColumn}"`);
|
|
448
|
-
}
|
|
449
|
-
// Episode write + verdict back-fill (Phase 1.5). Runs for approved or
|
|
450
|
-
// rejected verdicts only — "error" verdicts return early above. Best-effort:
|
|
451
|
-
// failures are logged by writeEpisode/backfillReviewVerdict and never block
|
|
452
|
-
// worktree cleanup.
|
|
453
|
-
if (workspaceId &&
|
|
454
|
-
(result.verdict === "approved" || result.verdict === "rejected")) {
|
|
455
|
-
const originalEpisodeId = await findLatestImplementEpisode(client, workspaceId, card.project_id, card.short_id);
|
|
456
|
-
const reviewEpisodeId = await writeEpisode(client, {
|
|
457
|
-
kind: "review",
|
|
458
|
-
card,
|
|
459
|
-
workspaceId,
|
|
460
|
-
verdict: result.verdict,
|
|
461
|
-
summary: result.summary,
|
|
462
|
-
cost: sessionStats?.cost ?? null,
|
|
463
|
-
agentSessionId: agentSessionId ?? null,
|
|
464
|
-
originalEpisodeId,
|
|
465
|
-
});
|
|
466
|
-
if (originalEpisodeId) {
|
|
467
|
-
await backfillReviewVerdict(client, originalEpisodeId, result.verdict, reviewEpisodeId);
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
// Cleanup worktree (skip in local mode — no worktree to clean)
|
|
471
|
-
if (branchName) {
|
|
472
|
-
cleanupWorktree(worktreePath, branchName);
|
|
473
|
-
}
|
|
474
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Embedded review and QA knowledge for the review worker.
|
|
3
|
-
* Condensed from the /review checklist and /qa skill.
|
|
4
|
-
*/
|
|
5
|
-
/**
|
|
6
|
-
* Static system prompt with review methodology.
|
|
7
|
-
* Covers two-pass review categories, suppressions, and severity classification.
|
|
8
|
-
*/
|
|
9
|
-
export declare const REVIEW_SYSTEM_PROMPT = "You are a senior code reviewer. Follow this two-pass methodology strictly.\nReport findings; do NOT fix them. This is a read-only review.\n\n## Two-Pass Review\n\n### Pass 1 \u2014 CRITICAL (highest severity)\n\n**SQL & Data Safety**\n- String interpolation in SQL \u2014 use parameterized queries / prepared statements\n- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE\n\n**Race Conditions & Concurrency**\n- Read-check-write without uniqueness constraint or duplicate key handling\n- Status transitions without atomic WHERE old_status UPDATE SET new_status\n- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)\n\n**LLM Output Trust Boundary**\n- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())\n- Structured tool output accepted without type/shape checks before database writes\n\n**Enum & Value Completeness**\n- When the diff introduces a new enum/status/type value, trace it through every consumer\n- Check allowlists, filter arrays, and case/if-elsif chains for the new value\n- Use Grep to find all references to sibling values and Read each match \u2014 look OUTSIDE the diff\n\n### Pass 2 \u2014 INFORMATIONAL (lower severity)\n\n**Conditional Side Effects**\n- Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)\n\n**Dead Code & Consistency**\n- Variables assigned but never read\n- Comments/docstrings describing old behavior after code changed\n\n**Test Gaps**\n- Missing negative-path tests for new error handling\n- Security enforcement features without integration tests\n\n**Completeness Gaps**\n- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add\n\n**View/Frontend**\n- O(n*m) lookups in views (Array.find in a loop instead of Map/index)\n- Inline styles re-parsed every render\n\n## Severity Classification\n\n- **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors\n- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects\n- **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps\n\n## Suppressions \u2014 DO NOT flag these\n\n- Redundancy that aids readability (e.g., present? redundant with length > 20)\n- \"Add a comment explaining why this threshold was chosen\" \u2014 thresholds change, comments rot\n- Consistency-only changes (wrapping a value to match how another constant is guarded)\n- Regex edge cases when input is constrained and the edge case never occurs in practice\n- Eval threshold changes \u2014 these are tuned empirically\n- Harmless no-ops (e.g., .reject on an element never in the array)\n- ANYTHING already addressed in the diff you are reviewing \u2014 read the FULL diff before flagging";
|
|
10
|
-
/**
|
|
11
|
-
* Visual QA checklist for browser-based verification.
|
|
12
|
-
* Condensed from the /qa skill's per-page exploration checklist.
|
|
13
|
-
*/
|
|
14
|
-
export declare const QA_VISUAL_CHECKLIST = "## Visual QA Checklist\n\nFor each page affected by the changes:\n\n1. **Visual scan** \u2014 Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.\n2. **Interactive elements** \u2014 Click every button, link, and control. Does each do what it says?\n3. **Forms** \u2014 Fill and submit. Test empty submission, invalid data, edge cases.\n4. **Navigation** \u2014 Check all paths in/out. Breadcrumbs, back button, deep links.\n5. **States** \u2014 Check empty state, loading state, error state, overflow state.\n6. **Console** \u2014 Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.\n7. **Responsiveness** \u2014 If the change is visual, check mobile viewport (375px).\n\n### SPA-Specific (React/Vite)\n- Use snapshot for navigation \u2014 client-side routes may not appear in link lists.\n- Check for stale state: navigate away and back \u2014 does data refresh correctly?\n- Test browser back/forward \u2014 does the app handle history correctly?\n- Watch for hydration errors or layout shifts after dynamic content loads.";
|
package/dist/review-knowledge.js
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Embedded review and QA knowledge for the review worker.
|
|
3
|
-
* Condensed from the /review checklist and /qa skill.
|
|
4
|
-
*/
|
|
5
|
-
/**
|
|
6
|
-
* Static system prompt with review methodology.
|
|
7
|
-
* Covers two-pass review categories, suppressions, and severity classification.
|
|
8
|
-
*/
|
|
9
|
-
export const REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
|
|
10
|
-
Report findings; do NOT fix them. This is a read-only review.
|
|
11
|
-
|
|
12
|
-
## Two-Pass Review
|
|
13
|
-
|
|
14
|
-
### Pass 1 — CRITICAL (highest severity)
|
|
15
|
-
|
|
16
|
-
**SQL & Data Safety**
|
|
17
|
-
- String interpolation in SQL — use parameterized queries / prepared statements
|
|
18
|
-
- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
|
|
19
|
-
|
|
20
|
-
**Race Conditions & Concurrency**
|
|
21
|
-
- Read-check-write without uniqueness constraint or duplicate key handling
|
|
22
|
-
- Status transitions without atomic WHERE old_status UPDATE SET new_status
|
|
23
|
-
- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
|
|
24
|
-
|
|
25
|
-
**LLM Output Trust Boundary**
|
|
26
|
-
- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
|
|
27
|
-
- Structured tool output accepted without type/shape checks before database writes
|
|
28
|
-
|
|
29
|
-
**Enum & Value Completeness**
|
|
30
|
-
- When the diff introduces a new enum/status/type value, trace it through every consumer
|
|
31
|
-
- Check allowlists, filter arrays, and case/if-elsif chains for the new value
|
|
32
|
-
- Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
|
|
33
|
-
|
|
34
|
-
### Pass 2 — INFORMATIONAL (lower severity)
|
|
35
|
-
|
|
36
|
-
**Conditional Side Effects**
|
|
37
|
-
- Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
|
|
38
|
-
|
|
39
|
-
**Dead Code & Consistency**
|
|
40
|
-
- Variables assigned but never read
|
|
41
|
-
- Comments/docstrings describing old behavior after code changed
|
|
42
|
-
|
|
43
|
-
**Test Gaps**
|
|
44
|
-
- Missing negative-path tests for new error handling
|
|
45
|
-
- Security enforcement features without integration tests
|
|
46
|
-
|
|
47
|
-
**Completeness Gaps**
|
|
48
|
-
- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
|
|
49
|
-
|
|
50
|
-
**View/Frontend**
|
|
51
|
-
- O(n*m) lookups in views (Array.find in a loop instead of Map/index)
|
|
52
|
-
- Inline styles re-parsed every render
|
|
53
|
-
|
|
54
|
-
## Severity Classification
|
|
55
|
-
|
|
56
|
-
- **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors
|
|
57
|
-
- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects
|
|
58
|
-
- **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps
|
|
59
|
-
|
|
60
|
-
## Suppressions — DO NOT flag these
|
|
61
|
-
|
|
62
|
-
- Redundancy that aids readability (e.g., present? redundant with length > 20)
|
|
63
|
-
- "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
|
|
64
|
-
- Consistency-only changes (wrapping a value to match how another constant is guarded)
|
|
65
|
-
- Regex edge cases when input is constrained and the edge case never occurs in practice
|
|
66
|
-
- Eval threshold changes — these are tuned empirically
|
|
67
|
-
- Harmless no-ops (e.g., .reject on an element never in the array)
|
|
68
|
-
- ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`;
|
|
69
|
-
/**
|
|
70
|
-
* Visual QA checklist for browser-based verification.
|
|
71
|
-
* Condensed from the /qa skill's per-page exploration checklist.
|
|
72
|
-
*/
|
|
73
|
-
export const QA_VISUAL_CHECKLIST = `## Visual QA Checklist
|
|
74
|
-
|
|
75
|
-
For each page affected by the changes:
|
|
76
|
-
|
|
77
|
-
1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
|
|
78
|
-
2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
|
|
79
|
-
3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
|
|
80
|
-
4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
|
|
81
|
-
5. **States** — Check empty state, loading state, error state, overflow state.
|
|
82
|
-
6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
|
|
83
|
-
7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
|
|
84
|
-
|
|
85
|
-
### SPA-Specific (React/Vite)
|
|
86
|
-
- Use snapshot for navigation — client-side routes may not appear in link lists.
|
|
87
|
-
- Check for stale state: navigate away and back — does data refresh correctly?
|
|
88
|
-
- Test browser back/forward — does the app handle history correctly?
|
|
89
|
-
- Watch for hydration errors or layout shifts after dynamic content loads.`;
|
package/dist/review-prompt.d.ts
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import type { EnrichedCard } from "./types.js";
|
|
2
|
-
/**
|
|
3
|
-
* Build the static system prompt for the review agent.
|
|
4
|
-
* Contains review methodology, checklist, and QA guidance.
|
|
5
|
-
* Passed via --append-system-prompt to Claude CLI.
|
|
6
|
-
*/
|
|
7
|
-
export declare function buildReviewSystemPrompt(): string;
|
|
8
|
-
/**
|
|
9
|
-
* Build the card-specific user prompt for the review agent.
|
|
10
|
-
* Contains the diff, requirements, and structured review steps.
|
|
11
|
-
*/
|
|
12
|
-
export declare function buildReviewUserPrompt(enriched: EnrichedCard, branchName: string | null, worktreePath: string, previewUrl: string, diff: string, baseBranch: string): string;
|