mstro-app 0.4.28 → 0.4.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/cli/headless/claude-invoker-process.d.ts.map +1 -1
- package/dist/server/cli/headless/claude-invoker-process.js +5 -1
- package/dist/server/cli/headless/claude-invoker-process.js.map +1 -1
- package/dist/server/cli/headless/haiku-assessments.d.ts.map +1 -1
- package/dist/server/cli/headless/haiku-assessments.js +20 -28
- package/dist/server/cli/headless/haiku-assessments.js.map +1 -1
- package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
- package/dist/server/cli/headless/stall-assessor.js +17 -3
- package/dist/server/cli/headless/stall-assessor.js.map +1 -1
- package/dist/server/cli/prompt-builders.d.ts.map +1 -1
- package/dist/server/cli/prompt-builders.js +35 -19
- package/dist/server/cli/prompt-builders.js.map +1 -1
- package/dist/server/mcp/bouncer-haiku.d.ts.map +1 -1
- package/dist/server/mcp/bouncer-haiku.js +5 -30
- package/dist/server/mcp/bouncer-haiku.js.map +1 -1
- package/dist/server/mcp/security-analysis.d.ts.map +1 -1
- package/dist/server/mcp/security-analysis.js +19 -11
- package/dist/server/mcp/security-analysis.js.map +1 -1
- package/dist/server/services/deploy/headless-session-handler.d.ts.map +1 -1
- package/dist/server/services/deploy/headless-session-handler.js +61 -69
- package/dist/server/services/deploy/headless-session-handler.js.map +1 -1
- package/dist/server/services/pathUtils.d.ts.map +1 -1
- package/dist/server/services/pathUtils.js +46 -38
- package/dist/server/services/pathUtils.js.map +1 -1
- package/dist/server/services/plan/agent-loader.d.ts +20 -4
- package/dist/server/services/plan/agent-loader.d.ts.map +1 -1
- package/dist/server/services/plan/agent-loader.js +85 -16
- package/dist/server/services/plan/agent-loader.js.map +1 -1
- package/dist/server/services/plan/issue-retry.d.ts +0 -8
- package/dist/server/services/plan/issue-retry.d.ts.map +1 -1
- package/dist/server/services/plan/issue-retry.js +72 -63
- package/dist/server/services/plan/issue-retry.js.map +1 -1
- package/dist/server/services/plan/review-gate.js +16 -88
- package/dist/server/services/plan/review-gate.js.map +1 -1
- package/dist/server/services/websocket/git-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/git-handlers.js +6 -19
- package/dist/server/services/websocket/git-handlers.js.map +1 -1
- package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/git-pr-handlers.js +5 -21
- package/dist/server/services/websocket/git-pr-handlers.js.map +1 -1
- package/dist/server/services/websocket/handlers/deploy-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/handlers/deploy-handlers.js +28 -33
- package/dist/server/services/websocket/handlers/deploy-handlers.js.map +1 -1
- package/dist/server/services/websocket/plan-board-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/plan-board-handlers.js +31 -25
- package/dist/server/services/websocket/plan-board-handlers.js.map +1 -1
- package/dist/server/services/websocket/quality-fix-agent.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-fix-agent.js +11 -18
- package/dist/server/services/websocket/quality-fix-agent.js.map +1 -1
- package/dist/server/services/websocket/quality-review-agent.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-review-agent.js +13 -150
- package/dist/server/services/websocket/quality-review-agent.js.map +1 -1
- package/package.json +1 -1
- package/server/cli/headless/claude-invoker-process.ts +5 -1
- package/server/cli/headless/haiku-assessments.ts +21 -28
- package/server/cli/headless/stall-assessor.ts +17 -3
- package/server/cli/prompt-builders.ts +34 -23
- package/server/mcp/bouncer-haiku.ts +5 -30
- package/server/mcp/security-analysis.ts +19 -12
- package/server/services/deploy/headless-session-handler.ts +75 -76
- package/server/services/pathUtils.ts +55 -42
- package/server/services/plan/agent-loader.ts +88 -15
- package/server/services/plan/issue-retry.ts +93 -68
- package/server/services/plan/review-gate.ts +13 -89
- package/server/services/websocket/git-handlers.ts +6 -18
- package/server/services/websocket/git-pr-handlers.ts +5 -20
- package/server/services/websocket/handlers/deploy-handlers.ts +34 -37
- package/server/services/websocket/plan-board-handlers.ts +36 -21
- package/server/services/websocket/quality-fix-agent.ts +10 -17
- package/server/services/websocket/quality-review-agent.ts +12 -149
|
@@ -97,6 +97,40 @@ paused: false
|
|
|
97
97
|
}
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
+
/** Update front-matter fields in the board.md file. */
|
|
101
|
+
function applyBoardFieldUpdates(
|
|
102
|
+
boardMdPath: string,
|
|
103
|
+
fields: Record<string, unknown>,
|
|
104
|
+
): void {
|
|
105
|
+
let content = readFileSync(boardMdPath, 'utf-8');
|
|
106
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
107
|
+
const yamlKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
|
|
108
|
+
content = replaceFrontMatterField(content, yamlKey, formatYamlValue(value));
|
|
109
|
+
}
|
|
110
|
+
writeFileSync(boardMdPath, content, 'utf-8');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Sync the review-custom agent file when review criteria change. */
|
|
114
|
+
function syncReviewCriteriaAgent(
|
|
115
|
+
fields: Record<string, unknown>,
|
|
116
|
+
pmDir: string,
|
|
117
|
+
boardId: string,
|
|
118
|
+
): void {
|
|
119
|
+
if (!('reviewCriteria' in fields)) return;
|
|
120
|
+
|
|
121
|
+
const boardDir = join(pmDir, 'boards', boardId);
|
|
122
|
+
const agentsDir = join(boardDir, 'agents');
|
|
123
|
+
const agentPath = join(agentsDir, 'review-custom.md');
|
|
124
|
+
const criteriaValue = String(fields.reviewCriteria ?? '').trim();
|
|
125
|
+
|
|
126
|
+
if (criteriaValue) {
|
|
127
|
+
if (!existsSync(agentsDir)) mkdirSync(agentsDir, { recursive: true });
|
|
128
|
+
writeFileSync(agentPath, buildBoardReviewAgent(criteriaValue), 'utf-8');
|
|
129
|
+
} else if (existsSync(agentPath)) {
|
|
130
|
+
try { unlinkSync(agentPath); } catch { /* non-fatal */ }
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
100
134
|
export function handleUpdateBoard(
|
|
101
135
|
ctx: HandlerContext, ws: WSContext, msg: WebSocketMessage,
|
|
102
136
|
workingDir: string, permission?: 'view',
|
|
@@ -118,30 +152,11 @@ export function handleUpdateBoard(
|
|
|
118
152
|
return;
|
|
119
153
|
}
|
|
120
154
|
|
|
121
|
-
|
|
122
|
-
for (const [key, value] of Object.entries(fields as Record<string, unknown>)) {
|
|
123
|
-
const yamlKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
|
|
124
|
-
content = replaceFrontMatterField(content, yamlKey, formatYamlValue(value));
|
|
125
|
-
}
|
|
126
|
-
writeFileSync(boardMdPath, content, 'utf-8');
|
|
155
|
+
applyBoardFieldUpdates(boardMdPath, fields as Record<string, unknown>);
|
|
127
156
|
|
|
128
157
|
// When review criteria are set, also write a board-level review agent file
|
|
129
158
|
// so users can discover and edit the full prompt as markdown.
|
|
130
|
-
|
|
131
|
-
if ('reviewCriteria' in typedFields) {
|
|
132
|
-
const boardDir = join(pmDir, 'boards', boardId);
|
|
133
|
-
const agentsDir = join(boardDir, 'agents');
|
|
134
|
-
const agentPath = join(agentsDir, 'review-custom.md');
|
|
135
|
-
const criteriaValue = String(typedFields.reviewCriteria ?? '').trim();
|
|
136
|
-
|
|
137
|
-
if (criteriaValue) {
|
|
138
|
-
if (!existsSync(agentsDir)) mkdirSync(agentsDir, { recursive: true });
|
|
139
|
-
writeFileSync(agentPath, buildBoardReviewAgent(criteriaValue), 'utf-8');
|
|
140
|
-
} else if (existsSync(agentPath)) {
|
|
141
|
-
// Clear the agent file when criteria are removed
|
|
142
|
-
try { unlinkSync(agentPath); } catch { /* non-fatal */ }
|
|
143
|
-
}
|
|
144
|
-
}
|
|
159
|
+
syncReviewCriteriaAgent(fields as Record<string, unknown>, pmDir, boardId);
|
|
145
160
|
|
|
146
161
|
const boardState = parseBoardDirectory(pmDir, boardId);
|
|
147
162
|
if (boardState) {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
|
|
11
11
|
import { HeadlessRunner } from '../../cli/headless/index.js';
|
|
12
12
|
import type { ToolUseEvent } from '../../cli/headless/types.js';
|
|
13
|
+
import { loadSkillPrompt } from '../plan/agent-loader.js';
|
|
13
14
|
import type { HandlerContext } from './handler-context.js';
|
|
14
15
|
import type { QualityPersistence } from './quality-persistence.js';
|
|
15
16
|
import { detectTools, runQualityScan } from './quality-service.js';
|
|
@@ -58,7 +59,7 @@ export function createToolProgressCallback(ctx: HandlerContext, ws: WSContext, r
|
|
|
58
59
|
|
|
59
60
|
// ── Prompt ────────────────────────────────────────────────────
|
|
60
61
|
|
|
61
|
-
function buildFixPrompt(findings: FindingForFix[], section?: string): string {
|
|
62
|
+
function buildFixPrompt(findings: FindingForFix[], section?: string, workingDir?: string): string {
|
|
62
63
|
const filtered = section ? findings.filter((f) => f.category === section) : findings;
|
|
63
64
|
const sorted = filtered.sort((a, b) => {
|
|
64
65
|
const order: Record<string, number> = { critical: 0, high: 1, medium: 2, low: 3 };
|
|
@@ -73,22 +74,14 @@ function buildFixPrompt(findings: FindingForFix[], section?: string): string {
|
|
|
73
74
|
return parts.join('\n');
|
|
74
75
|
}).join('\n\n');
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
const fromSkill = loadSkillPrompt('fix-quality', {
|
|
78
|
+
issueList,
|
|
79
|
+
issueCount: String(sorted.length),
|
|
80
|
+
showCount: String(Math.min(30, sorted.length)),
|
|
81
|
+
}, workingDir);
|
|
82
|
+
if (fromSkill) return fromSkill;
|
|
77
83
|
|
|
78
|
-
## Issues to Fix (${sorted.length} total, showing top ${Math.min(30, sorted.length)})
|
|
79
|
-
|
|
80
|
-
${issueList}
|
|
81
|
-
|
|
82
|
-
## Rules
|
|
83
|
-
|
|
84
|
-
- Fix each issue by editing the relevant file at the specified location.
|
|
85
|
-
- For complexity issues: refactor into smaller functions. For long files: split or extract modules. For long functions: break into smaller functions.
|
|
86
|
-
- For security issues: apply the suggested fix or use secure coding best practices.
|
|
87
|
-
- For bugs: fix the root cause, not just the symptom.
|
|
88
|
-
- For linting/formatting: apply the standard for the project.
|
|
89
|
-
- Do NOT introduce new issues. Make minimal, focused changes.
|
|
90
|
-
- After fixing, verify the changes compile/pass linting if tools are available.
|
|
91
|
-
- Work through the issues systematically from most to least severe.`;
|
|
84
|
+
return `You are a code quality fix agent. Fix the following quality issues in the codebase.\n\n## Issues to Fix (${sorted.length} total, showing top ${Math.min(30, sorted.length)})\n\n${issueList}\n\nFix each issue by editing the relevant file. Work from most to least severe. Do NOT introduce new issues.`;
|
|
92
85
|
}
|
|
93
86
|
|
|
94
87
|
// ── Handler ───────────────────────────────────────────────────
|
|
@@ -128,7 +121,7 @@ export async function handleFixIssues(
|
|
|
128
121
|
data: { path: reportPath, message: 'Starting Claude Code to fix issues...' },
|
|
129
122
|
});
|
|
130
123
|
|
|
131
|
-
const prompt = buildFixPrompt(findings, section);
|
|
124
|
+
const prompt = buildFixPrompt(findings, section, workingDir);
|
|
132
125
|
|
|
133
126
|
const runner = new HeadlessRunner({
|
|
134
127
|
workingDir: dirPath,
|
|
@@ -12,6 +12,7 @@ import { isAbsolute, join } from 'node:path';
|
|
|
12
12
|
import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
|
|
13
13
|
import { HeadlessRunner } from '../../cli/headless/index.js';
|
|
14
14
|
import type { ToolUseEvent } from '../../cli/headless/types.js';
|
|
15
|
+
import { loadSkillPrompt } from '../plan/agent-loader.js';
|
|
15
16
|
import type { HandlerContext } from './handler-context.js';
|
|
16
17
|
import type { QualityPersistence } from './quality-persistence.js';
|
|
17
18
|
import { recomputeWithAiReview } from './quality-service.js';
|
|
@@ -39,106 +40,11 @@ export function buildCodeReviewPrompt(dirPath: string, cliFindings?: Array<{ sev
|
|
|
39
40
|
? `\n## CLI Tool Findings (already detected)\n\nThe following issues were found by automated CLI tools (linters, formatters, complexity analyzers). Review these for context — they are already included in the final report. Focus your analysis on DEEPER issues these tools cannot detect.\n\n${cliFindings.slice(0, 50).map((f, i) => `${i + 1}. [${f.severity.toUpperCase()}] ${f.category} — ${f.file}${f.line ? `:${f.line}` : ''} — ${f.title}: ${f.description}`).join('\n')}\n${cliFindings.length > 50 ? `\n...and ${cliFindings.length - 50} more issues from CLI tools.\n` : ''}`
|
|
40
41
|
: '';
|
|
41
42
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
1. **Discover**: Use Glob to find source files (e.g. "**/*.{ts,tsx,js,py,rs,go,java,rb,php}"). Understand the project structure.
|
|
49
|
-
2. **Read**: Read the most important files — entry points, core modules, handlers, services. Prioritize files with recent git changes (\`git diff --name-only HEAD~5\` via Bash if available).
|
|
50
|
-
3. **Analyze**: Look for real, actionable issues across ALL of these categories:
|
|
51
|
-
|
|
52
|
-
### Architecture
|
|
53
|
-
- What is the current architecture (monolith, microservices, layered, etc.)?
|
|
54
|
-
- Are there architectural violations? (e.g., presentation layer directly accessing data layer, circular dependencies between modules)
|
|
55
|
-
- Is there proper separation of concerns?
|
|
56
|
-
- Are there god objects or god modules that do too much?
|
|
57
|
-
|
|
58
|
-
### SOLID / OOP Principles
|
|
59
|
-
- **SRP**: Classes/modules with multiple unrelated responsibilities
|
|
60
|
-
- **OCP**: Code that requires modification instead of extension for new features
|
|
61
|
-
- **LSP**: Subtypes that don't properly substitute for their base types
|
|
62
|
-
- **ISP**: Interfaces/contracts that force implementations to depend on methods they don't use
|
|
63
|
-
- **DIP**: High-level modules directly depending on low-level modules instead of abstractions
|
|
64
|
-
|
|
65
|
-
### Security
|
|
66
|
-
- Injection vulnerabilities (SQL, XSS, command), hardcoded secrets/credentials, auth bypasses, insecure crypto, path traversal, SSRF, unsafe deserialization
|
|
67
|
-
|
|
68
|
-
### Bugs & Logic
|
|
69
|
-
- Null/undefined errors, race conditions, logic errors, unhandled edge cases, off-by-one errors, resource leaks, incorrect error handling, incorrect algorithms
|
|
70
|
-
|
|
71
|
-
### Performance
|
|
72
|
-
- N+1 queries, unnecessary re-renders, missing memoization, blocking I/O in hot paths, unbounded data structures, missing pagination
|
|
73
|
-
|
|
74
|
-
## CRITICAL — Structured Evidence Requirement
|
|
75
|
-
|
|
76
|
-
For EACH finding, you MUST provide structured evidence that grounds the finding in actual code. This is required to prevent false positives.
|
|
77
|
-
|
|
78
|
-
For each finding, use this reasoning process:
|
|
79
|
-
|
|
80
|
-
1. **PREMISE**: State the observable fact from the code. Quote the exact code you see.
|
|
81
|
-
2. **CONTEXT**: What is the surrounding code doing? Are there guards, fixes, or patterns elsewhere that might handle this?
|
|
82
|
-
3. **COUNTER-CHECK**: Actively look for evidence that CONTRADICTS your finding. Check for:
|
|
83
|
-
- Guards or validation earlier in the call chain
|
|
84
|
-
- Error handling wrapping the code
|
|
85
|
-
- Configuration that changes behavior (e.g., NODE_ENV checks)
|
|
86
|
-
- Comments explaining intentional design choices
|
|
87
|
-
4. **CONCLUSION**: Only report the finding if you could not find contradicting evidence.
|
|
88
|
-
|
|
89
|
-
### Common False Positive Patterns to AVOID
|
|
90
|
-
|
|
91
|
-
- Claiming a function uses API X when it actually uses API Y (e.g., claiming Math.random() when code uses crypto.randomInt()) — ALWAYS quote the actual function call
|
|
92
|
-
- Claiming a header/value is leaked when code already deletes/filters it — READ the full function
|
|
93
|
-
- Claiming there's no guard when a condition check exists nearby — READ surrounding lines
|
|
94
|
-
- Claiming N fields/methods when the actual count differs — COUNT explicitly
|
|
95
|
-
- Claiming a resource leaks when cleanup exists in a different handler — SEARCH for the cleanup code
|
|
96
|
-
|
|
97
|
-
## Rules
|
|
98
|
-
|
|
99
|
-
- Only report findings you are >90% confident about after completing the counter-check step.
|
|
100
|
-
- Focus on architecture, SOLID violations, bugs, and security over style nits.
|
|
101
|
-
- Each finding MUST reference a specific file and line number. Do not report vague or file-level issues.
|
|
102
|
-
- Each finding MUST include an "evidence" field with the exact code snippet (1-5 lines) proving the issue exists.
|
|
103
|
-
- Limit to the 25 most important findings, ranked by severity.
|
|
104
|
-
- Do NOT modify any files. This is a read-only review.
|
|
105
|
-
- Be HONEST about the overall quality. A codebase with serious issues should score low.
|
|
106
|
-
|
|
107
|
-
## Scoring Guidelines
|
|
108
|
-
|
|
109
|
-
After your analysis, provide an honest overall quality score (0-100) and letter grade:
|
|
110
|
-
- **A (90-100)**: Excellent — clean architecture, minimal issues, well-tested, follows best practices
|
|
111
|
-
- **B (80-89)**: Good — solid code with minor issues, mostly well-structured
|
|
112
|
-
- **C (70-79)**: Adequate — functional but has notable quality issues that should be addressed
|
|
113
|
-
- **D (60-69)**: Below average — significant issues in architecture, testing, or code quality
|
|
114
|
-
- **F (0-59)**: Poor — serious problems: security vulnerabilities, broken architecture, major bugs, or unmaintainable code
|
|
115
|
-
|
|
116
|
-
Consider ALL findings (both CLI tool findings and your own) when determining the score. The score should reflect the overall state of the codebase honestly. A project with 50+ linting errors, formatting issues, complex functions, AND architectural problems should NOT score above 70.
|
|
117
|
-
|
|
118
|
-
## Output
|
|
119
|
-
|
|
120
|
-
After your analysis, output EXACTLY one JSON code block with your findings. No other text after the JSON block.
|
|
121
|
-
|
|
122
|
-
\`\`\`json
|
|
123
|
-
{
|
|
124
|
-
"score": 72,
|
|
125
|
-
"grade": "C",
|
|
126
|
-
"scoreRationale": "Brief explanation of why this score was given, referencing key issues",
|
|
127
|
-
"findings": [
|
|
128
|
-
{
|
|
129
|
-
"severity": "critical|high|medium|low",
|
|
130
|
-
"category": "architecture|oop|security|bugs|performance|logic",
|
|
131
|
-
"file": "relative/path/to/file.ts",
|
|
132
|
-
"line": 42,
|
|
133
|
-
"title": "Short title describing the issue",
|
|
134
|
-
"description": "What the problem is and why it matters.",
|
|
135
|
-
"suggestion": "How to fix it.",
|
|
136
|
-
"evidence": "const token = Math.random().toString(36) // exact code from file proving the issue"
|
|
137
|
-
}
|
|
138
|
-
],
|
|
139
|
-
"summary": "Brief 1-2 sentence summary of overall code quality."
|
|
140
|
-
}
|
|
141
|
-
\`\`\``;
|
|
43
|
+
const fromSkill = loadSkillPrompt('code-review', { dirPath, cliFindingsSection }, dirPath);
|
|
44
|
+
if (fromSkill) return fromSkill;
|
|
45
|
+
|
|
46
|
+
// Inline fallback when Skill file is not available (e.g., standalone CLI install)
|
|
47
|
+
return `You are a senior staff engineer performing a rigorous code review.\n\nIMPORTANT: Your current working directory is "${dirPath}". Only review files within this directory.\n${cliFindingsSection}\nDiscover source files with Glob, read important files, analyze for architecture, SOLID, security, bugs, and performance issues. Each finding needs file, line, evidence. Output one JSON code block with score, grade, findings array, and summary.`;
|
|
142
48
|
}
|
|
143
49
|
|
|
144
50
|
// ── Response parsing ──────────────────────────────────────────
|
|
@@ -343,7 +249,7 @@ export function buildVerificationPrompt(
|
|
|
343
249
|
dirPath: string,
|
|
344
250
|
findings: CodeReviewFinding[],
|
|
345
251
|
): string {
|
|
346
|
-
const findingsJson = findings.map((f, i) => ({
|
|
252
|
+
const findingsJson = JSON.stringify(findings.map((f, i) => ({
|
|
347
253
|
id: i + 1,
|
|
348
254
|
severity: f.severity,
|
|
349
255
|
category: f.category,
|
|
@@ -352,56 +258,13 @@ export function buildVerificationPrompt(
|
|
|
352
258
|
title: f.title,
|
|
353
259
|
description: f.description,
|
|
354
260
|
evidence: f.evidence || '(none provided)',
|
|
355
|
-
}));
|
|
356
|
-
|
|
357
|
-
return `You are an independent code review VERIFIER. A separate reviewer produced the findings below. Your job is to VERIFY each finding against the actual code. You are a skeptic — do NOT trust the original reviewer's claims.
|
|
358
|
-
|
|
359
|
-
IMPORTANT: Your current working directory is "${dirPath}". Only read files within this directory.
|
|
360
|
-
|
|
361
|
-
## Findings to Verify
|
|
362
|
-
|
|
363
|
-
${JSON.stringify(findingsJson, null, 2)}
|
|
364
|
-
|
|
365
|
-
## Verification Process
|
|
261
|
+
})), null, 2);
|
|
366
262
|
|
|
367
|
-
|
|
263
|
+
const fromSkill = loadSkillPrompt('verify-review', { dirPath, findingsJson }, dirPath);
|
|
264
|
+
if (fromSkill) return fromSkill;
|
|
368
265
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
3. **Search for counter-evidence**:
|
|
372
|
-
- If the finding claims something is missing (no validation, no cleanup, no guard): search for it with Grep
|
|
373
|
-
- If the finding claims an API is used: verify the actual API call at that line
|
|
374
|
-
- If the finding claims a value is leaked/exposed: check if it's filtered/deleted elsewhere in the same function
|
|
375
|
-
4. **Verdict**: Mark as "confirmed" or "rejected" with a brief explanation
|
|
376
|
-
|
|
377
|
-
## Rules
|
|
378
|
-
|
|
379
|
-
- You MUST actually Read each cited file. Do not rely on memory or assumptions.
|
|
380
|
-
- Use Grep to search for patterns the finding claims exist (or don't exist).
|
|
381
|
-
- A finding is "rejected" if:
|
|
382
|
-
- The code does NOT match what the description claims
|
|
383
|
-
- There IS a guard/fix that the finding claims is missing
|
|
384
|
-
- The line number doesn't contain the relevant code
|
|
385
|
-
- The finding is about a different version of the code than what exists now
|
|
386
|
-
- A finding is "confirmed" if you can independently verify the issue exists in the current code.
|
|
387
|
-
- Be thorough but efficient — focus verification effort on high/critical severity findings.
|
|
388
|
-
|
|
389
|
-
## Output
|
|
390
|
-
|
|
391
|
-
Output EXACTLY one JSON code block. No other text after the JSON block.
|
|
392
|
-
|
|
393
|
-
\`\`\`json
|
|
394
|
-
{
|
|
395
|
-
"verifications": [
|
|
396
|
-
{
|
|
397
|
-
"id": 1,
|
|
398
|
-
"verdict": "confirmed|rejected",
|
|
399
|
-
"confidence": 0.95,
|
|
400
|
-
"note": "Brief explanation of what you found when checking the code"
|
|
401
|
-
}
|
|
402
|
-
]
|
|
403
|
-
}
|
|
404
|
-
\`\`\``;
|
|
266
|
+
// Inline fallback
|
|
267
|
+
return `You are an independent code review VERIFIER. Verify each finding below against actual code in "${dirPath}".\n\n## Findings to Verify\n\n${findingsJson}\n\nFor each finding: Read the cited file, check the claim, search for counter-evidence. Output one JSON code block with verifications array containing id, verdict (confirmed|rejected), confidence, and note.`;
|
|
405
268
|
}
|
|
406
269
|
|
|
407
270
|
interface VerificationVerdict {
|