mstro-app 0.4.13 → 0.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/services/file-explorer-ops.d.ts +1 -1
- package/dist/server/services/file-explorer-ops.d.ts.map +1 -1
- package/dist/server/services/file-explorer-ops.js +7 -2
- package/dist/server/services/file-explorer-ops.js.map +1 -1
- package/dist/server/services/plan/composer.d.ts +1 -1
- package/dist/server/services/plan/composer.d.ts.map +1 -1
- package/dist/server/services/plan/composer.js +3 -2
- package/dist/server/services/plan/composer.js.map +1 -1
- package/dist/server/services/plan/executor.d.ts +5 -0
- package/dist/server/services/plan/executor.d.ts.map +1 -1
- package/dist/server/services/plan/executor.js +32 -1
- package/dist/server/services/plan/executor.js.map +1 -1
- package/dist/server/services/plan/parser-core.d.ts.map +1 -1
- package/dist/server/services/plan/parser-core.js +1 -0
- package/dist/server/services/plan/parser-core.js.map +1 -1
- package/dist/server/services/plan/review-gate.d.ts +2 -0
- package/dist/server/services/plan/review-gate.d.ts.map +1 -1
- package/dist/server/services/plan/review-gate.js +25 -3
- package/dist/server/services/plan/review-gate.js.map +1 -1
- package/dist/server/services/plan/types.d.ts +2 -0
- package/dist/server/services/plan/types.d.ts.map +1 -1
- package/dist/server/services/websocket/file-explorer-handlers.js +2 -1
- package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
- package/dist/server/services/websocket/git-log-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/git-log-handlers.js +29 -9
- package/dist/server/services/websocket/git-log-handlers.js.map +1 -1
- package/dist/server/services/websocket/git-worktree-handlers.js +8 -0
- package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -1
- package/dist/server/services/websocket/handler.d.ts.map +1 -1
- package/dist/server/services/websocket/handler.js +5 -3
- package/dist/server/services/websocket/handler.js.map +1 -1
- package/dist/server/services/websocket/plan-execution-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/plan-execution-handlers.js +4 -1
- package/dist/server/services/websocket/plan-execution-handlers.js.map +1 -1
- package/dist/server/services/websocket/plan-helpers.js +1 -1
- package/dist/server/services/websocket/plan-helpers.js.map +1 -1
- package/dist/server/services/websocket/quality-handlers.d.ts +1 -1
- package/dist/server/services/websocket/quality-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-handlers.js +67 -14
- package/dist/server/services/websocket/quality-handlers.js.map +1 -1
- package/dist/server/services/websocket/quality-persistence.d.ts +2 -0
- package/dist/server/services/websocket/quality-persistence.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-persistence.js +33 -2
- package/dist/server/services/websocket/quality-persistence.js.map +1 -1
- package/dist/server/services/websocket/quality-review-agent.d.ts +33 -0
- package/dist/server/services/websocket/quality-review-agent.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-review-agent.js +360 -72
- package/dist/server/services/websocket/quality-review-agent.js.map +1 -1
- package/dist/server/services/websocket/quality-types.d.ts +3 -0
- package/dist/server/services/websocket/quality-types.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-types.js.map +1 -1
- package/package.json +1 -1
- package/server/services/file-explorer-ops.ts +7 -2
- package/server/services/plan/composer.ts +3 -1
- package/server/services/plan/executor.ts +32 -1
- package/server/services/plan/parser-core.ts +1 -0
- package/server/services/plan/review-gate.ts +28 -3
- package/server/services/plan/types.ts +2 -0
- package/server/services/websocket/file-explorer-handlers.ts +2 -1
- package/server/services/websocket/git-log-handlers.ts +30 -9
- package/server/services/websocket/git-worktree-handlers.ts +9 -0
- package/server/services/websocket/handler.ts +6 -3
- package/server/services/websocket/plan-execution-handlers.ts +4 -1
- package/server/services/websocket/plan-helpers.ts +1 -1
- package/server/services/websocket/quality-handlers.ts +69 -9
- package/server/services/websocket/quality-persistence.ts +32 -2
- package/server/services/websocket/quality-review-agent.ts +427 -72
- package/server/services/websocket/quality-types.ts +3 -0
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
* Builds the review prompt, runs the agent, parses findings, and persists results.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
+
import { isAbsolute, join } from 'node:path';
|
|
10
12
|
import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
|
|
11
13
|
import { HeadlessRunner } from '../../cli/headless/index.js';
|
|
12
14
|
import type { ToolUseEvent } from '../../cli/headless/types.js';
|
|
@@ -25,6 +27,9 @@ export interface CodeReviewFinding {
|
|
|
25
27
|
title: string;
|
|
26
28
|
description: string;
|
|
27
29
|
suggestion?: string;
|
|
30
|
+
evidence?: string;
|
|
31
|
+
verified?: boolean;
|
|
32
|
+
verificationNote?: string;
|
|
28
33
|
}
|
|
29
34
|
|
|
30
35
|
// ── Prompt ────────────────────────────────────────────────────
|
|
@@ -66,11 +71,35 @@ ${cliFindingsSection}
|
|
|
66
71
|
### Performance
|
|
67
72
|
- N+1 queries, unnecessary re-renders, missing memoization, blocking I/O in hot paths, unbounded data structures, missing pagination
|
|
68
73
|
|
|
74
|
+
## CRITICAL — Structured Evidence Requirement
|
|
75
|
+
|
|
76
|
+
For EACH finding, you MUST provide structured evidence that grounds the finding in actual code. This is required to prevent false positives.
|
|
77
|
+
|
|
78
|
+
For each finding, use this reasoning process:
|
|
79
|
+
|
|
80
|
+
1. **PREMISE**: State the observable fact from the code. Quote the exact code you see.
|
|
81
|
+
2. **CONTEXT**: What is the surrounding code doing? Are there guards, fixes, or patterns elsewhere that might handle this?
|
|
82
|
+
3. **COUNTER-CHECK**: Actively look for evidence that CONTRADICTS your finding. Check for:
|
|
83
|
+
- Guards or validation earlier in the call chain
|
|
84
|
+
- Error handling wrapping the code
|
|
85
|
+
- Configuration that changes behavior (e.g., NODE_ENV checks)
|
|
86
|
+
- Comments explaining intentional design choices
|
|
87
|
+
4. **CONCLUSION**: Only report the finding if you could not find contradicting evidence.
|
|
88
|
+
|
|
89
|
+
### Common False Positive Patterns to AVOID
|
|
90
|
+
|
|
91
|
+
- Claiming a function uses API X when it actually uses API Y (e.g., claiming Math.random() when code uses crypto.randomInt()) — ALWAYS quote the actual function call
|
|
92
|
+
- Claiming a header/value is leaked when code already deletes/filters it — READ the full function
|
|
93
|
+
- Claiming there's no guard when a condition check exists nearby — READ surrounding lines
|
|
94
|
+
- Claiming N fields/methods when the actual count differs — COUNT explicitly
|
|
95
|
+
- Claiming a resource leaks when cleanup exists in a different handler — SEARCH for the cleanup code
|
|
96
|
+
|
|
69
97
|
## Rules
|
|
70
98
|
|
|
71
|
-
- Only report findings you are >
|
|
99
|
+
- Only report findings you are >90% confident about after completing the counter-check step.
|
|
72
100
|
- Focus on architecture, SOLID violations, bugs, and security over style nits.
|
|
73
101
|
- Each finding MUST reference a specific file and line number. Do not report vague or file-level issues.
|
|
102
|
+
- Each finding MUST include an "evidence" field with the exact code snippet (1-5 lines) proving the issue exists.
|
|
74
103
|
- Limit to the 25 most important findings, ranked by severity.
|
|
75
104
|
- Do NOT modify any files. This is a read-only review.
|
|
76
105
|
- Be HONEST about the overall quality. A codebase with serious issues should score low.
|
|
@@ -103,7 +132,8 @@ After your analysis, output EXACTLY one JSON code block with your findings. No o
|
|
|
103
132
|
"line": 42,
|
|
104
133
|
"title": "Short title describing the issue",
|
|
105
134
|
"description": "What the problem is and why it matters.",
|
|
106
|
-
"suggestion": "How to fix it."
|
|
135
|
+
"suggestion": "How to fix it.",
|
|
136
|
+
"evidence": "const token = Math.random().toString(36) // exact code from file proving the issue"
|
|
107
137
|
}
|
|
108
138
|
],
|
|
109
139
|
"summary": "Brief 1-2 sentence summary of overall code quality."
|
|
@@ -126,6 +156,7 @@ function normalizeFinding(f: Record<string, unknown>): CodeReviewFinding | null
|
|
|
126
156
|
title: f.title as string,
|
|
127
157
|
description: typeof f.description === 'string' ? f.description : '',
|
|
128
158
|
suggestion: typeof f.suggestion === 'string' ? f.suggestion : undefined,
|
|
159
|
+
evidence: typeof f.evidence === 'string' ? f.evidence : undefined,
|
|
129
160
|
};
|
|
130
161
|
}
|
|
131
162
|
|
|
@@ -167,6 +198,276 @@ export function parseCodeReviewResponse(response: string): CodeReviewResult {
|
|
|
167
198
|
}
|
|
168
199
|
}
|
|
169
200
|
|
|
201
|
+
// ── Phase 3: Deterministic post-validation ───────────────────
|
|
202
|
+
//
|
|
203
|
+
// Fast grep/file-based checks that catch hallucinated references
|
|
204
|
+
// before the more expensive LLM verification pass.
|
|
205
|
+
|
|
206
|
+
interface ValidationResult {
|
|
207
|
+
finding: CodeReviewFinding;
|
|
208
|
+
valid: boolean;
|
|
209
|
+
reason?: string;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function resolveFilePath(dirPath: string, filePath: string): string {
|
|
213
|
+
if (isAbsolute(filePath)) return filePath;
|
|
214
|
+
return join(dirPath, filePath);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Extract keywords/identifiers that the finding's description claims exist in the code.
|
|
219
|
+
* Looks for patterns like: "calls X()", "uses X", "X is called", "X at line N".
|
|
220
|
+
*/
|
|
221
|
+
function extractClaimedIdentifiers(description: string, title: string): string[] {
|
|
222
|
+
const identifiers: string[] = [];
|
|
223
|
+
const combined = `${title} ${description}`;
|
|
224
|
+
|
|
225
|
+
// Match function calls: functionName(), ClassName.method()
|
|
226
|
+
const callPatterns = combined.matchAll(/\b([a-zA-Z_$][\w.$]*)\s*\(/g);
|
|
227
|
+
for (const m of callPatterns) {
|
|
228
|
+
const name = m[1];
|
|
229
|
+
// Skip common English words that look like function calls
|
|
230
|
+
if (!['If', 'When', 'While', 'For', 'This', 'That', 'The', 'Each', 'Uses', 'Has', 'Does', 'Returns', 'Takes', 'Calls'].includes(name)) {
|
|
231
|
+
identifiers.push(name);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Match backtick-quoted code: `someCode`
|
|
236
|
+
const backtickPatterns = combined.matchAll(/`([^`]+)`/g);
|
|
237
|
+
for (const m of backtickPatterns) {
|
|
238
|
+
// Extract identifiers from the backtick content
|
|
239
|
+
const inner = m[1].replace(/[()[\]{};,]/g, ' ').trim();
|
|
240
|
+
if (inner && inner.length < 60) {
|
|
241
|
+
identifiers.push(inner);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return identifiers;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Validate findings against the actual filesystem. Returns findings annotated
|
|
250
|
+
* with validation results. Does NOT remove findings — only marks them.
|
|
251
|
+
*/
|
|
252
|
+
export function validateFindings(
|
|
253
|
+
findings: CodeReviewFinding[],
|
|
254
|
+
dirPath: string,
|
|
255
|
+
): { validated: CodeReviewFinding[]; rejected: CodeReviewFinding[]; stats: { total: number; passed: number; failed: number } } {
|
|
256
|
+
const validated: CodeReviewFinding[] = [];
|
|
257
|
+
const rejected: CodeReviewFinding[] = [];
|
|
258
|
+
|
|
259
|
+
for (const finding of findings) {
|
|
260
|
+
const result = validateSingleFinding(finding, dirPath);
|
|
261
|
+
if (result.valid) {
|
|
262
|
+
validated.push({ ...finding, verified: undefined }); // Don't mark yet — Phase 2 does that
|
|
263
|
+
} else {
|
|
264
|
+
rejected.push({ ...finding, verified: false, verificationNote: result.reason });
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
validated,
|
|
270
|
+
rejected,
|
|
271
|
+
stats: { total: findings.length, passed: validated.length, failed: rejected.length },
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function readFileContent(filePath: string): string | null {
|
|
276
|
+
try {
|
|
277
|
+
return readFileSync(filePath, 'utf-8');
|
|
278
|
+
} catch {
|
|
279
|
+
return null;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const COMMON_KEYWORDS = /^(const|let|var|function|return|import|export|from|this|true|false|null|undefined|new|if|else|for|while|try|catch)$/;
|
|
284
|
+
|
|
285
|
+
function checkLineInRange(content: string, line: number | null): string | null {
|
|
286
|
+
if (line === null || line <= 0) return null;
|
|
287
|
+
const lineCount = content.split('\n').length;
|
|
288
|
+
if (line > lineCount) return `Line ${line} exceeds file length (${lineCount} lines)`;
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function checkEvidenceTokens(content: string, evidence: string | undefined): string | null {
|
|
293
|
+
if (!evidence) return null;
|
|
294
|
+
const evidenceTokens = evidence
|
|
295
|
+
.replace(/['"`;{}[\]()]/g, ' ')
|
|
296
|
+
.split(/\s+/)
|
|
297
|
+
.filter(t => t.length > 3 && !COMMON_KEYWORDS.test(t));
|
|
298
|
+
if (evidenceTokens.length === 0) return null;
|
|
299
|
+
const matchCount = evidenceTokens.filter(token => content.includes(token)).length;
|
|
300
|
+
if (matchCount === 0) return `Evidence tokens not found in file: ${evidenceTokens.slice(0, 3).join(', ')}`;
|
|
301
|
+
return null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function checkClaimedIdentifiers(content: string, finding: CodeReviewFinding): string | null {
|
|
305
|
+
const claimedIds = extractClaimedIdentifiers(finding.description, finding.title);
|
|
306
|
+
if (claimedIds.length < 2) return null;
|
|
307
|
+
const foundAny = claimedIds.some(id => {
|
|
308
|
+
if (id.includes('.')) return content.includes(id);
|
|
309
|
+
return new RegExp(`\\b${id.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`).test(content);
|
|
310
|
+
});
|
|
311
|
+
if (!foundAny) return `Claimed identifiers not found in file: ${claimedIds.slice(0, 3).join(', ')}`;
|
|
312
|
+
return null;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function validateSingleFinding(finding: CodeReviewFinding, dirPath: string): ValidationResult {
|
|
316
|
+
const filePath = resolveFilePath(dirPath, finding.file);
|
|
317
|
+
|
|
318
|
+
if (!existsSync(filePath)) {
|
|
319
|
+
return { finding, valid: false, reason: `File does not exist: ${finding.file}` };
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const content = readFileContent(filePath);
|
|
323
|
+
if (!content) return { finding, valid: true }; // Can't read — don't reject
|
|
324
|
+
|
|
325
|
+
const lineErr = checkLineInRange(content, finding.line);
|
|
326
|
+
if (lineErr) return { finding, valid: false, reason: lineErr };
|
|
327
|
+
|
|
328
|
+
const evidenceErr = checkEvidenceTokens(content, finding.evidence);
|
|
329
|
+
if (evidenceErr) return { finding, valid: false, reason: evidenceErr };
|
|
330
|
+
|
|
331
|
+
const idErr = checkClaimedIdentifiers(content, finding);
|
|
332
|
+
if (idErr) return { finding, valid: false, reason: idErr };
|
|
333
|
+
|
|
334
|
+
return { finding, valid: true };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ── Phase 2: LLM verification pass ──────────────────────────
|
|
338
|
+
//
|
|
339
|
+
// Runs a second headless Claude pass that independently verifies
|
|
340
|
+
// each finding against the actual code.
|
|
341
|
+
|
|
342
|
+
export function buildVerificationPrompt(
|
|
343
|
+
dirPath: string,
|
|
344
|
+
findings: CodeReviewFinding[],
|
|
345
|
+
): string {
|
|
346
|
+
const findingsJson = findings.map((f, i) => ({
|
|
347
|
+
id: i + 1,
|
|
348
|
+
severity: f.severity,
|
|
349
|
+
category: f.category,
|
|
350
|
+
file: f.file,
|
|
351
|
+
line: f.line,
|
|
352
|
+
title: f.title,
|
|
353
|
+
description: f.description,
|
|
354
|
+
evidence: f.evidence || '(none provided)',
|
|
355
|
+
}));
|
|
356
|
+
|
|
357
|
+
return `You are an independent code review VERIFIER. A separate reviewer produced the findings below. Your job is to VERIFY each finding against the actual code. You are a skeptic — do NOT trust the original reviewer's claims.
|
|
358
|
+
|
|
359
|
+
IMPORTANT: Your current working directory is "${dirPath}". Only read files within this directory.
|
|
360
|
+
|
|
361
|
+
## Findings to Verify
|
|
362
|
+
|
|
363
|
+
${JSON.stringify(findingsJson, null, 2)}
|
|
364
|
+
|
|
365
|
+
## Verification Process
|
|
366
|
+
|
|
367
|
+
For EACH finding:
|
|
368
|
+
|
|
369
|
+
1. **Read the cited file and line** using the Read tool. Read at least 20 lines around the cited line for context.
|
|
370
|
+
2. **Check the specific claim** in the description. Does the code actually do what the finding claims?
|
|
371
|
+
3. **Search for counter-evidence**:
|
|
372
|
+
- If the finding claims something is missing (no validation, no cleanup, no guard): search for it with Grep
|
|
373
|
+
- If the finding claims an API is used: verify the actual API call at that line
|
|
374
|
+
- If the finding claims a value is leaked/exposed: check if it's filtered/deleted elsewhere in the same function
|
|
375
|
+
4. **Verdict**: Mark as "confirmed" or "rejected" with a brief explanation
|
|
376
|
+
|
|
377
|
+
## Rules
|
|
378
|
+
|
|
379
|
+
- You MUST actually Read each cited file. Do not rely on memory or assumptions.
|
|
380
|
+
- Use Grep to search for patterns the finding claims exist (or don't exist).
|
|
381
|
+
- A finding is "rejected" if:
|
|
382
|
+
- The code does NOT match what the description claims
|
|
383
|
+
- There IS a guard/fix that the finding claims is missing
|
|
384
|
+
- The line number doesn't contain the relevant code
|
|
385
|
+
- The finding is about a different version of the code than what exists now
|
|
386
|
+
- A finding is "confirmed" if you can independently verify the issue exists in the current code.
|
|
387
|
+
- Be thorough but efficient — focus verification effort on high/critical severity findings.
|
|
388
|
+
|
|
389
|
+
## Output
|
|
390
|
+
|
|
391
|
+
Output EXACTLY one JSON code block. No other text after the JSON block.
|
|
392
|
+
|
|
393
|
+
\`\`\`json
|
|
394
|
+
{
|
|
395
|
+
"verifications": [
|
|
396
|
+
{
|
|
397
|
+
"id": 1,
|
|
398
|
+
"verdict": "confirmed|rejected",
|
|
399
|
+
"confidence": 0.95,
|
|
400
|
+
"note": "Brief explanation of what you found when checking the code"
|
|
401
|
+
}
|
|
402
|
+
]
|
|
403
|
+
}
|
|
404
|
+
\`\`\``;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
interface VerificationVerdict {
|
|
408
|
+
id: number;
|
|
409
|
+
verdict: 'confirmed' | 'rejected';
|
|
410
|
+
confidence: number;
|
|
411
|
+
note: string;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
export function parseVerificationResponse(response: string): VerificationVerdict[] {
|
|
415
|
+
const jsonStr = extractJson(response);
|
|
416
|
+
try {
|
|
417
|
+
const parsed = JSON.parse(jsonStr);
|
|
418
|
+
const raw: Record<string, unknown>[] = Array.isArray(parsed.verifications) ? parsed.verifications : [];
|
|
419
|
+
return raw
|
|
420
|
+
.filter((v): v is Record<string, unknown> & { id: number } =>
|
|
421
|
+
typeof v.id === 'number' && typeof v.verdict === 'string')
|
|
422
|
+
.map(v => ({
|
|
423
|
+
id: v.id as number,
|
|
424
|
+
verdict: v.verdict === 'rejected' ? 'rejected' as const : 'confirmed' as const,
|
|
425
|
+
confidence: typeof v.confidence === 'number' ? v.confidence : 0.5,
|
|
426
|
+
note: typeof v.note === 'string' ? v.note : '',
|
|
427
|
+
}));
|
|
428
|
+
} catch {
|
|
429
|
+
return [];
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Apply verification verdicts to findings.
|
|
435
|
+
* Confirmed findings get verified=true. Rejected findings are removed.
|
|
436
|
+
*/
|
|
437
|
+
export function applyVerification(
|
|
438
|
+
findings: CodeReviewFinding[],
|
|
439
|
+
verdicts: VerificationVerdict[],
|
|
440
|
+
): { verified: CodeReviewFinding[]; rejected: CodeReviewFinding[] } {
|
|
441
|
+
const verdictMap = new Map(verdicts.map(v => [v.id, v]));
|
|
442
|
+
const verified: CodeReviewFinding[] = [];
|
|
443
|
+
const rejected: CodeReviewFinding[] = [];
|
|
444
|
+
|
|
445
|
+
for (let i = 0; i < findings.length; i++) {
|
|
446
|
+
const verdict = verdictMap.get(i + 1);
|
|
447
|
+
if (!verdict) {
|
|
448
|
+
// No verdict — keep finding but mark unverified
|
|
449
|
+
verified.push({ ...findings[i], verified: undefined });
|
|
450
|
+
continue;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
if (verdict.verdict === 'confirmed' && verdict.confidence >= 0.6) {
|
|
454
|
+
verified.push({
|
|
455
|
+
...findings[i],
|
|
456
|
+
verified: true,
|
|
457
|
+
verificationNote: verdict.note || undefined,
|
|
458
|
+
});
|
|
459
|
+
} else {
|
|
460
|
+
rejected.push({
|
|
461
|
+
...findings[i],
|
|
462
|
+
verified: false,
|
|
463
|
+
verificationNote: verdict.note || undefined,
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return { verified, rejected };
|
|
469
|
+
}
|
|
470
|
+
|
|
170
471
|
// ── Progress tracking ─────────────────────────────────────────
|
|
171
472
|
|
|
172
473
|
const TOOL_START_MESSAGES: Record<string, string> = {
|
|
@@ -202,6 +503,99 @@ function createCodeReviewProgressTracker() {
|
|
|
202
503
|
};
|
|
203
504
|
}
|
|
204
505
|
|
|
506
|
+
// ── Handler helpers ───────────────────────────────────────────
|
|
507
|
+
|
|
508
|
+
type ProgressSender = (message: string) => void;
|
|
509
|
+
|
|
510
|
+
function makeProgressSender(ctx: HandlerContext, ws: WSContext, reportPath: string): ProgressSender {
|
|
511
|
+
return (message: string) => {
|
|
512
|
+
ctx.send(ws, { type: 'qualityCodeReviewProgress', data: { path: reportPath, message } });
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function makeToolCallback(send: ProgressSender, prefix?: string): (event: ToolUseEvent) => void {
|
|
517
|
+
const getProgressMessage = createCodeReviewProgressTracker();
|
|
518
|
+
return (event: ToolUseEvent) => {
|
|
519
|
+
const message = getProgressMessage(event);
|
|
520
|
+
if (message) send(prefix ? `${prefix}${message}` : message);
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
function loadCliFindings(
|
|
525
|
+
getPersistence: (dir: string) => QualityPersistence,
|
|
526
|
+
workingDir: string,
|
|
527
|
+
reportPath: string,
|
|
528
|
+
): Array<{ severity: string; category: string; file: string; line: number | null; title: string; description: string }> | undefined {
|
|
529
|
+
try {
|
|
530
|
+
const persistence = getPersistence(workingDir);
|
|
531
|
+
const existingReport = persistence.loadReport(reportPath);
|
|
532
|
+
return existingReport?.findings;
|
|
533
|
+
} catch {
|
|
534
|
+
return undefined;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async function runVerificationPass(
|
|
539
|
+
dirPath: string,
|
|
540
|
+
findings: CodeReviewFinding[],
|
|
541
|
+
send: ProgressSender,
|
|
542
|
+
): Promise<CodeReviewFinding[]> {
|
|
543
|
+
send(`Verifying ${findings.length} findings against actual code...`);
|
|
544
|
+
|
|
545
|
+
const verificationRunner = new HeadlessRunner({
|
|
546
|
+
workingDir: dirPath,
|
|
547
|
+
directPrompt: buildVerificationPrompt(dirPath, findings),
|
|
548
|
+
stallWarningMs: 120_000,
|
|
549
|
+
stallKillMs: 300_000,
|
|
550
|
+
stallHardCapMs: 600_000,
|
|
551
|
+
toolUseCallback: makeToolCallback(send, 'Verifying: '),
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
const verifyResult = await runWithFileLogger('code-review-verify', () => verificationRunner.run());
|
|
555
|
+
const verdicts = parseVerificationResponse(verifyResult.assistantResponse || '');
|
|
556
|
+
|
|
557
|
+
if (verdicts.length === 0) return findings; // No verdicts — keep all as-is
|
|
558
|
+
|
|
559
|
+
const { verified, rejected } = applyVerification(findings, verdicts);
|
|
560
|
+
if (rejected.length > 0) {
|
|
561
|
+
send(`Verification rejected ${rejected.length} inaccurate finding(s)`);
|
|
562
|
+
}
|
|
563
|
+
return verified;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function persistReviewResults(
|
|
567
|
+
reviewResult: CodeReviewResult,
|
|
568
|
+
reportPath: string,
|
|
569
|
+
getPersistence: (dir: string) => QualityPersistence,
|
|
570
|
+
workingDir: string,
|
|
571
|
+
): import('./quality-service.js').QualityResults | null {
|
|
572
|
+
const persistence = getPersistence(workingDir);
|
|
573
|
+
const existingReport = persistence.loadReport(reportPath);
|
|
574
|
+
if (!existingReport) {
|
|
575
|
+
persistence.saveCodeReview(reportPath, reviewResult.findings as unknown as Record<string, unknown>[], reviewResult.summary);
|
|
576
|
+
return null;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
let updatedResults: import('./quality-service.js').QualityResults;
|
|
580
|
+
if (reviewResult.score !== null && reviewResult.grade !== null) {
|
|
581
|
+
updatedResults = {
|
|
582
|
+
...existingReport,
|
|
583
|
+
overall: reviewResult.score,
|
|
584
|
+
grade: reviewResult.grade,
|
|
585
|
+
codeReview: reviewResult.findings as unknown as typeof existingReport.codeReview,
|
|
586
|
+
scoreRationale: reviewResult.scoreRationale ?? undefined,
|
|
587
|
+
};
|
|
588
|
+
} else {
|
|
589
|
+
updatedResults = recomputeWithAiReview(existingReport, reviewResult.findings);
|
|
590
|
+
updatedResults = { ...updatedResults, codeReview: reviewResult.findings as unknown as typeof updatedResults.codeReview };
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
persistence.saveReport(reportPath, updatedResults);
|
|
594
|
+
persistence.appendHistory(updatedResults, reportPath);
|
|
595
|
+
persistence.saveCodeReview(reportPath, reviewResult.findings as unknown as Record<string, unknown>[], reviewResult.summary);
|
|
596
|
+
return updatedResults;
|
|
597
|
+
}
|
|
598
|
+
|
|
205
599
|
// ── Handler ───────────────────────────────────────────────────
|
|
206
600
|
|
|
207
601
|
export async function handleCodeReview(
|
|
@@ -214,104 +608,65 @@ export async function handleCodeReview(
|
|
|
214
608
|
getPersistence: (dir: string) => QualityPersistence,
|
|
215
609
|
): Promise<void> {
|
|
216
610
|
if (activeReviews.has(dirPath)) {
|
|
217
|
-
ctx.send(ws, {
|
|
218
|
-
type: 'qualityError',
|
|
219
|
-
data: { path: reportPath, error: 'A code review is already running for this directory.' },
|
|
220
|
-
});
|
|
611
|
+
ctx.send(ws, { type: 'qualityError', data: { path: reportPath, error: 'A code review is already running for this directory.' } });
|
|
221
612
|
return;
|
|
222
613
|
}
|
|
223
614
|
|
|
224
615
|
activeReviews.add(dirPath);
|
|
225
|
-
|
|
226
|
-
ctx.send(ws, {
|
|
227
|
-
type: 'qualityCodeReviewProgress',
|
|
228
|
-
data: { path: reportPath, message: 'Starting AI code review...' },
|
|
229
|
-
});
|
|
616
|
+
const send = makeProgressSender(ctx, ws, reportPath);
|
|
230
617
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
const persistence = getPersistence(workingDir);
|
|
235
|
-
const existingReport = persistence.loadReport(reportPath);
|
|
236
|
-
if (existingReport?.findings) {
|
|
237
|
-
cliFindings = existingReport.findings;
|
|
238
|
-
}
|
|
239
|
-
} catch {
|
|
240
|
-
// Continue without CLI findings if persistence fails
|
|
241
|
-
}
|
|
618
|
+
try {
|
|
619
|
+
send('Starting AI code review...');
|
|
620
|
+
const cliFindings = loadCliFindings(getPersistence, workingDir, reportPath);
|
|
242
621
|
|
|
622
|
+
// ── Pass 1: Initial AI code review ──────────────────────
|
|
243
623
|
const runner = new HeadlessRunner({
|
|
244
624
|
workingDir: dirPath,
|
|
245
625
|
directPrompt: buildCodeReviewPrompt(dirPath, cliFindings),
|
|
246
626
|
stallWarningMs: 120_000,
|
|
247
627
|
stallKillMs: 600_000,
|
|
248
628
|
stallHardCapMs: 900_000,
|
|
249
|
-
toolUseCallback: (
|
|
250
|
-
const getProgressMessage = createCodeReviewProgressTracker();
|
|
251
|
-
return (event: ToolUseEvent) => {
|
|
252
|
-
const message = getProgressMessage(event);
|
|
253
|
-
if (message) {
|
|
254
|
-
ctx.send(ws, {
|
|
255
|
-
type: 'qualityCodeReviewProgress',
|
|
256
|
-
data: { path: reportPath, message },
|
|
257
|
-
});
|
|
258
|
-
}
|
|
259
|
-
};
|
|
260
|
-
})(),
|
|
261
|
-
});
|
|
262
|
-
|
|
263
|
-
ctx.send(ws, {
|
|
264
|
-
type: 'qualityCodeReviewProgress',
|
|
265
|
-
data: { path: reportPath, message: 'Claude is analyzing your codebase...' },
|
|
629
|
+
toolUseCallback: makeToolCallback(send),
|
|
266
630
|
});
|
|
267
631
|
|
|
632
|
+
send('Claude is analyzing your codebase...');
|
|
268
633
|
const result = await runWithFileLogger('code-review', () => runner.run());
|
|
634
|
+
const reviewResult = parseCodeReviewResponse(result.assistantResponse || '');
|
|
269
635
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
636
|
+
// ── Phase 3: Deterministic post-validation ──────────────
|
|
637
|
+
send(`Validating ${reviewResult.findings.length} findings against codebase...`);
|
|
638
|
+
const validation = validateFindings(reviewResult.findings, dirPath);
|
|
639
|
+
if (validation.stats.failed > 0) {
|
|
640
|
+
send(`Filtered ${validation.stats.failed} finding(s) with invalid references`);
|
|
641
|
+
}
|
|
274
642
|
|
|
275
|
-
|
|
276
|
-
|
|
643
|
+
// ── Phase 2: LLM verification pass ──────────────────────
|
|
644
|
+
let finalFindings = validation.validated;
|
|
645
|
+
if (finalFindings.length > 0) {
|
|
646
|
+
try {
|
|
647
|
+
finalFindings = await runVerificationPass(dirPath, finalFindings, send);
|
|
648
|
+
} catch {
|
|
649
|
+
send('Verification pass skipped (timeout or error)');
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// ── Persist and send results ─────────────────────────────
|
|
654
|
+
send('Generating review report...');
|
|
655
|
+
const verifiedReviewResult: CodeReviewResult = { ...reviewResult, findings: finalFindings };
|
|
277
656
|
|
|
278
|
-
// Use AI-determined score if available, otherwise fall back to recomputation
|
|
279
657
|
let updatedResults: import('./quality-service.js').QualityResults | null = null;
|
|
280
658
|
try {
|
|
281
|
-
|
|
282
|
-
const existingReport = persistence.loadReport(reportPath);
|
|
283
|
-
if (existingReport) {
|
|
284
|
-
if (reviewResult.score !== null && reviewResult.grade !== null) {
|
|
285
|
-
// Use the AI-determined score and grade directly
|
|
286
|
-
updatedResults = {
|
|
287
|
-
...existingReport,
|
|
288
|
-
overall: reviewResult.score,
|
|
289
|
-
grade: reviewResult.grade,
|
|
290
|
-
codeReview: reviewResult.findings as unknown as typeof existingReport.codeReview,
|
|
291
|
-
scoreRationale: reviewResult.scoreRationale ?? undefined,
|
|
292
|
-
};
|
|
293
|
-
} else {
|
|
294
|
-
// Fallback: recompute with weighted formula
|
|
295
|
-
updatedResults = recomputeWithAiReview(existingReport, reviewResult.findings);
|
|
296
|
-
updatedResults = { ...updatedResults, codeReview: reviewResult.findings as unknown as typeof updatedResults.codeReview };
|
|
297
|
-
}
|
|
298
|
-
persistence.saveReport(reportPath, updatedResults);
|
|
299
|
-
persistence.appendHistory(updatedResults, reportPath);
|
|
300
|
-
}
|
|
301
|
-
persistence.saveCodeReview(reportPath, reviewResult.findings as unknown as Record<string, unknown>[], reviewResult.summary);
|
|
659
|
+
updatedResults = persistReviewResults(verifiedReviewResult, reportPath, getPersistence, workingDir);
|
|
302
660
|
} catch {
|
|
303
661
|
// Persistence failure should not break the review flow
|
|
304
662
|
}
|
|
305
663
|
|
|
306
664
|
ctx.send(ws, {
|
|
307
665
|
type: 'qualityCodeReview',
|
|
308
|
-
data: { path: reportPath, findings:
|
|
666
|
+
data: { path: reportPath, findings: verifiedReviewResult.findings, summary: verifiedReviewResult.summary, results: updatedResults },
|
|
309
667
|
});
|
|
310
668
|
} catch (error) {
|
|
311
|
-
ctx.send(ws, {
|
|
312
|
-
type: 'qualityError',
|
|
313
|
-
data: { path: reportPath, error: error instanceof Error ? error.message : String(error) },
|
|
314
|
-
});
|
|
669
|
+
ctx.send(ws, { type: 'qualityError', data: { path: reportPath, error: error instanceof Error ? error.message : String(error) } });
|
|
315
670
|
} finally {
|
|
316
671
|
activeReviews.delete(dirPath);
|
|
317
672
|
}
|