@yasserkhanorg/impact-gate 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli/commands/install_skill.d.ts +2 -0
- package/dist/cli/commands/install_skill.d.ts.map +1 -0
- package/dist/cli/commands/install_skill.js +60 -0
- package/dist/cli/parse_args.js +1 -1
- package/dist/cli/types.d.ts +1 -1
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/cli/usage.d.ts.map +1 -1
- package/dist/cli/usage.js +1 -0
- package/dist/cli.js +7 -1
- package/dist/esm/cli/commands/install_skill.js +57 -0
- package/dist/esm/cli/parse_args.js +1 -1
- package/dist/esm/cli/usage.js +1 -0
- package/dist/esm/cli.js +7 -1
- package/dist/esm/qa-agent/cli.js +26 -0
- package/dist/esm/qa-agent/finding_taxonomy.js +102 -0
- package/dist/esm/qa-agent/health_score.js +99 -0
- package/dist/esm/qa-agent/orchestrator.js +67 -9
- package/dist/esm/qa-agent/phase2/agent_loop.js +13 -1
- package/dist/esm/qa-agent/phase2/tools.js +10 -4
- package/dist/esm/qa-agent/phase25/fix_loop.js +238 -0
- package/dist/esm/qa-agent/phase25/fix_tools.js +262 -0
- package/dist/esm/qa-agent/phase25/wtf_heuristic.js +60 -0
- package/dist/esm/qa-agent/phase3/reporter.js +100 -30
- package/dist/esm/qa-agent/phase3/verdict.js +21 -3
- package/dist/esm/qa-agent/regression/baseline.js +89 -0
- package/dist/qa-agent/cli.js +26 -0
- package/dist/qa-agent/finding_taxonomy.d.ts +23 -0
- package/dist/qa-agent/finding_taxonomy.d.ts.map +1 -0
- package/dist/qa-agent/finding_taxonomy.js +108 -0
- package/dist/qa-agent/health_score.d.ts +19 -0
- package/dist/qa-agent/health_score.d.ts.map +1 -0
- package/dist/qa-agent/health_score.js +104 -0
- package/dist/qa-agent/orchestrator.d.ts.map +1 -1
- package/dist/qa-agent/orchestrator.js +67 -9
- package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -1
- package/dist/qa-agent/phase2/agent_loop.js +13 -1
- package/dist/qa-agent/phase2/tools.d.ts.map +1 -1
- package/dist/qa-agent/phase2/tools.js +10 -4
- package/dist/qa-agent/phase25/fix_loop.d.ts +4 -0
- package/dist/qa-agent/phase25/fix_loop.d.ts.map +1 -0
- package/dist/qa-agent/phase25/fix_loop.js +244 -0
- package/dist/qa-agent/phase25/fix_tools.d.ts +18 -0
- package/dist/qa-agent/phase25/fix_tools.d.ts.map +1 -0
- package/dist/qa-agent/phase25/fix_tools.js +266 -0
- package/dist/qa-agent/phase25/wtf_heuristic.d.ts +27 -0
- package/dist/qa-agent/phase25/wtf_heuristic.d.ts.map +1 -0
- package/dist/qa-agent/phase25/wtf_heuristic.js +64 -0
- package/dist/qa-agent/phase3/reporter.d.ts +2 -2
- package/dist/qa-agent/phase3/reporter.d.ts.map +1 -1
- package/dist/qa-agent/phase3/reporter.js +100 -30
- package/dist/qa-agent/phase3/verdict.d.ts +2 -2
- package/dist/qa-agent/phase3/verdict.d.ts.map +1 -1
- package/dist/qa-agent/phase3/verdict.js +21 -3
- package/dist/qa-agent/regression/baseline.d.ts +14 -0
- package/dist/qa-agent/regression/baseline.d.ts.map +1 -0
- package/dist/qa-agent/regression/baseline.js +94 -0
- package/dist/qa-agent/types.d.ts +65 -2
- package/dist/qa-agent/types.d.ts.map +1 -1
- package/package.json +2 -1
- package/skills/qa/SKILL.md +138 -0
|
@@ -6,6 +6,7 @@ import { AgentBrowser } from './agent_browser.js';
|
|
|
6
6
|
import { TOOL_DEFINITIONS, executeTool } from './tools.js';
|
|
7
7
|
import { createExplorationState, recordAction, recordFinding, markFlowExplored, nextFlow, isStuck, isBudgetExhausted, allFlowsExplored, updateCost, compressActionsLog, } from './exploration_state.js';
|
|
8
8
|
import { analyzeScreenshot } from './vision.js';
|
|
9
|
+
import { computeHealthScore } from '../health_score.js';
|
|
9
10
|
const MAX_ITERATIONS = 200;
|
|
10
11
|
const COMPRESS_EVERY = 20;
|
|
11
12
|
const MAX_LLM_RETRIES = 2;
|
|
@@ -45,11 +46,21 @@ For each flow, pick 3-4 of the most relevant dimensions based on what the flow d
|
|
|
45
46
|
|
|
46
47
|
Pick dimensions that matter for THIS flow. Example: for "channel settings" → permissions + edge cases + state persistence. For "messaging" → happy path + error recovery + console health. Do NOT mechanically follow all 7.
|
|
47
48
|
|
|
49
|
+
## Finding Categories
|
|
50
|
+
When reporting findings, use the most specific category:
|
|
51
|
+
- **visual** — Layout breaks, broken images, z-index issues, alignment, animation glitches, dark mode problems
|
|
52
|
+
- **functional** — Broken links, dead buttons, form validation failures, incorrect redirects, race conditions, state not persisting
|
|
53
|
+
- **ux** — Confusing navigation, missing loading indicators, slow interactions (>500ms), unclear error messages, no confirmation before destructive actions
|
|
54
|
+
- **content** — Typos, grammar errors, placeholder/lorem ipsum left in, truncated text, wrong labels
|
|
55
|
+
- **performance** — Slow page loads (>3s), janky scrolling, layout shifts (CLS), excessive network requests
|
|
56
|
+
- **console** — JavaScript exceptions, failed network requests (4xx/5xx), CORS errors, mixed content warnings
|
|
57
|
+
- **accessibility** — Missing alt text, unlabeled inputs, broken keyboard navigation, focus traps, insufficient contrast
|
|
58
|
+
|
|
48
59
|
## Rules
|
|
49
60
|
1. Use the accessibility snapshot (provided after each action) to understand the page.
|
|
50
61
|
2. Use click/fill/press_key to interact. References look like @e1, @e2, etc.
|
|
51
62
|
3. Use wait_for to wait for elements to appear/disappear or for the page to settle after actions.
|
|
52
|
-
4. Report findings immediately with report_finding — include severity, expected vs actual behavior, and repro steps.
|
|
63
|
+
4. Report findings immediately with report_finding — use the specific category above, include severity, expected vs actual behavior, and repro steps.
|
|
53
64
|
5. When you find a bug: take a screenshot BEFORE triggering the action and AFTER. Include expected vs actual behavior in the finding.
|
|
54
65
|
6. Mark flows done with mark_flow_done when you've tested them thoroughly.
|
|
55
66
|
7. Use take_screenshot sparingly — only for evidence of bugs or new flow entry.
|
|
@@ -329,6 +340,7 @@ export async function runAgentLoop(config, flows) {
|
|
|
329
340
|
tokensUsed: state.tokensUsed,
|
|
330
341
|
costUSD: state.costUSD,
|
|
331
342
|
durationMs: Date.now() - state.startTime,
|
|
343
|
+
healthScore: computeHealthScore(state.findings),
|
|
332
344
|
};
|
|
333
345
|
}
|
|
334
346
|
async function runVisionPass(config, state, browser, screenshotDir) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
2
|
// See LICENSE.txt for license information.
|
|
3
|
+
import { normalizeFindingType } from '../finding_taxonomy.js';
|
|
3
4
|
// ---------------------------------------------------------------------------
|
|
4
5
|
// Tool definitions (Anthropic tool_use schema)
|
|
5
6
|
// ---------------------------------------------------------------------------
|
|
@@ -94,11 +95,11 @@ export const TOOL_DEFINITIONS = [
|
|
|
94
95
|
},
|
|
95
96
|
{
|
|
96
97
|
name: 'report_finding',
|
|
97
|
-
description: 'Report a
|
|
98
|
+
description: 'Report a finding. Categories: visual (layout/images/alignment), functional (broken links/buttons/forms/state), ux (navigation/loading/error messages), content (typos/placeholder text/labels), performance (slow loads/layout shifts), console (JS errors/network failures), accessibility (alt text/keyboard nav/ARIA/contrast). Legacy types (bug, visual-regression, ux-issue, gap) are also accepted.',
|
|
98
99
|
input_schema: {
|
|
99
100
|
type: 'object',
|
|
100
101
|
properties: {
|
|
101
|
-
type: { type: 'string', enum: ['bug', 'visual-regression', 'ux-issue', 'gap'] },
|
|
102
|
+
type: { type: 'string', enum: ['visual', 'functional', 'ux', 'content', 'performance', 'console', 'accessibility', 'bug', 'visual-regression', 'ux-issue', 'gap'] },
|
|
102
103
|
severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
|
|
103
104
|
summary: { type: 'string', description: 'What you found' },
|
|
104
105
|
repro_steps: {
|
|
@@ -215,13 +216,18 @@ export function executeTool(ctx, name, input) {
|
|
|
215
216
|
return { output: text || '(empty)' };
|
|
216
217
|
}
|
|
217
218
|
case 'report_finding': {
|
|
218
|
-
const VALID_TYPES = new Set([
|
|
219
|
+
const VALID_TYPES = new Set([
|
|
220
|
+
'visual', 'functional', 'ux', 'content', 'performance', 'console', 'accessibility',
|
|
221
|
+
'bug', 'visual-regression', 'ux-issue', 'gap',
|
|
222
|
+
]);
|
|
219
223
|
const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
|
|
220
224
|
const rawType = String(input.type);
|
|
221
225
|
const rawSeverity = String(input.severity);
|
|
222
226
|
if (!VALID_TYPES.has(rawType)) {
|
|
223
227
|
return { output: `Invalid finding type "${rawType}". Must be one of: ${[...VALID_TYPES].join(', ')}.` };
|
|
224
228
|
}
|
|
229
|
+
// Normalize legacy types to canonical categories for health scoring
|
|
230
|
+
const canonicalType = normalizeFindingType(rawType);
|
|
225
231
|
if (!VALID_SEVERITIES.has(rawSeverity)) {
|
|
226
232
|
return { output: `Invalid severity "${rawSeverity}". Must be one of: ${[...VALID_SEVERITIES].join(', ')}.` };
|
|
227
233
|
}
|
|
@@ -260,7 +266,7 @@ export function executeTool(ctx, name, input) {
|
|
|
260
266
|
}
|
|
261
267
|
const finding = {
|
|
262
268
|
id: `f-${crypto.randomUUID()}`,
|
|
263
|
-
type:
|
|
269
|
+
type: canonicalType,
|
|
264
270
|
severity: rawSeverity,
|
|
265
271
|
summary: String(input.summary),
|
|
266
272
|
flow: ctx.currentFlow,
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
4
|
+
import { logger } from '../../logger.js';
|
|
5
|
+
import { computeHealthScore } from '../health_score.js';
|
|
6
|
+
import { isFixable } from '../finding_taxonomy.js';
|
|
7
|
+
import { FIX_TOOL_DEFINITIONS, executeFixTool } from './fix_tools.js';
|
|
8
|
+
import { WTFTracker } from './wtf_heuristic.js';
|
|
9
|
+
const MAX_ITERATIONS_PER_FIX = 15;
|
|
10
|
+
// Pricing per 1M tokens by model prefix
|
|
11
|
+
const MODEL_PRICING = {
|
|
12
|
+
'claude-sonnet': { input: 3, output: 15 },
|
|
13
|
+
'claude-haiku': { input: 0.25, output: 1.25 },
|
|
14
|
+
'claude-opus': { input: 15, output: 75 },
|
|
15
|
+
};
|
|
16
|
+
function getPricing(model) {
|
|
17
|
+
for (const [prefix, pricing] of Object.entries(MODEL_PRICING)) {
|
|
18
|
+
if (model.startsWith(prefix))
|
|
19
|
+
return pricing;
|
|
20
|
+
}
|
|
21
|
+
return { input: 3, output: 15 };
|
|
22
|
+
}
|
|
23
|
+
function buildFixSystemPrompt(finding, baseUrl) {
|
|
24
|
+
const evidence = finding.evidence;
|
|
25
|
+
return `You are a bug-fix engineer. Fix the following QA finding with the MINIMAL code change needed.
|
|
26
|
+
|
|
27
|
+
## Finding
|
|
28
|
+
- **ID:** ${finding.id}
|
|
29
|
+
- **Type:** ${finding.type}
|
|
30
|
+
- **Severity:** ${finding.severity}
|
|
31
|
+
- **Summary:** ${finding.summary}
|
|
32
|
+
- **URL:** ${evidence.url}
|
|
33
|
+
- **Expected:** ${evidence.expectedBehavior || 'Not specified'}
|
|
34
|
+
- **Actual:** ${evidence.actualBehavior || 'Not specified'}
|
|
35
|
+
- **Repro steps:** ${evidence.reproSteps.map((s, i) => `${i + 1}. ${s}`).join('\n')}
|
|
36
|
+
${evidence.consoleErrors?.length ? `- **Console errors:** ${evidence.consoleErrors.join('; ')}` : ''}
|
|
37
|
+
|
|
38
|
+
## Workflow
|
|
39
|
+
1. Use search_code to find the responsible source file(s)
|
|
40
|
+
2. Use read_file to understand the code
|
|
41
|
+
3. Use write_file to make the minimal fix
|
|
42
|
+
4. Use run_command to check types (npx tsc --noEmit) or lint
|
|
43
|
+
5. Use git_commit to create an atomic commit: fix(qa): ${finding.id} — {description}
|
|
44
|
+
6. Use verify_in_browser to navigate to ${evidence.url} and check the fix worked
|
|
45
|
+
|
|
46
|
+
## Rules
|
|
47
|
+
- Make the SMALLEST change that fixes the issue. Do NOT refactor surrounding code.
|
|
48
|
+
- Only modify files directly related to the bug.
|
|
49
|
+
- If you can't find the source after 3 search attempts, report that the fix is not possible.
|
|
50
|
+
- If type checking fails after your fix, revert with git_revert.
|
|
51
|
+
- The base URL is ${baseUrl}.
|
|
52
|
+
- When done, respond with text only (no tool use) explaining the result.`;
|
|
53
|
+
}
|
|
54
|
+
export async function runFixLoop(config, findings, browser, projectRoot) {
|
|
55
|
+
const startTime = Date.now();
|
|
56
|
+
const tier = config.fixTier || 'standard';
|
|
57
|
+
const fixes = [];
|
|
58
|
+
const wtf = new WTFTracker();
|
|
59
|
+
let tokensUsed = 0;
|
|
60
|
+
let costUSD = 0;
|
|
61
|
+
// Budget: 40% of remaining total budget
|
|
62
|
+
const budgetUSD = config.budgetUSD * 0.4;
|
|
63
|
+
const healthScoreBefore = computeHealthScore(findings);
|
|
64
|
+
// Sort by severity (critical first) and filter by tier
|
|
65
|
+
const fixable = findings
|
|
66
|
+
.filter((f) => isFixable(f, tier))
|
|
67
|
+
.sort((a, b) => severityOrder(a.severity) - severityOrder(b.severity));
|
|
68
|
+
if (fixable.length === 0) {
|
|
69
|
+
logger.info('No fixable findings for tier', { tier });
|
|
70
|
+
return {
|
|
71
|
+
fixes: [],
|
|
72
|
+
fixesAttempted: 0,
|
|
73
|
+
fixesVerified: 0,
|
|
74
|
+
fixesBestEffort: 0,
|
|
75
|
+
fixesReverted: 0,
|
|
76
|
+
fixesSkipped: 0,
|
|
77
|
+
healthScoreBefore,
|
|
78
|
+
healthScoreAfter: healthScoreBefore,
|
|
79
|
+
durationMs: 0,
|
|
80
|
+
tokensUsed: 0,
|
|
81
|
+
costUSD: 0,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
logger.info(`Fix loop: ${fixable.length} findings to fix (tier: ${tier})`);
|
|
85
|
+
const client = new Anthropic();
|
|
86
|
+
const model = process.env.QA_AGENT_MODEL || 'claude-sonnet-4-5-20250929';
|
|
87
|
+
const screenshotDir = config.screenshotDir || '.e2e-ai-agents/qa-screenshots';
|
|
88
|
+
const toolCtx = {
|
|
89
|
+
projectRoot,
|
|
90
|
+
browser,
|
|
91
|
+
baseUrl: config.baseUrl,
|
|
92
|
+
screenshotDir,
|
|
93
|
+
screenshotCounter: 100, // Start at 100 to avoid collisions with Phase 2 screenshots
|
|
94
|
+
};
|
|
95
|
+
for (const finding of fixable) {
|
|
96
|
+
if (wtf.shouldStop()) {
|
|
97
|
+
logger.warn(`WTF heuristic triggered (score: ${wtf.score}), stopping fix loop`);
|
|
98
|
+
// Mark remaining as skipped
|
|
99
|
+
fixes.push({ findingId: finding.id, status: 'skipped' });
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
if (costUSD >= budgetUSD) {
|
|
103
|
+
logger.info('Fix loop budget exhausted');
|
|
104
|
+
fixes.push({ findingId: finding.id, status: 'skipped' });
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
logger.info(`Fixing: [${finding.severity}] ${finding.summary}`);
|
|
108
|
+
const result = await fixSingleFinding(client, model, config, finding, toolCtx);
|
|
109
|
+
fixes.push(result.fix);
|
|
110
|
+
tokensUsed += result.tokensUsed;
|
|
111
|
+
costUSD += result.costUSD;
|
|
112
|
+
wtf.recordAttempt(result.fix.status, result.fix.filesChanged?.length || 0);
|
|
113
|
+
}
|
|
114
|
+
const healthScoreAfter = computeHealthScore(findings);
|
|
115
|
+
return {
|
|
116
|
+
fixes,
|
|
117
|
+
fixesAttempted: fixes.filter((f) => f.status !== 'skipped').length,
|
|
118
|
+
fixesVerified: fixes.filter((f) => f.status === 'verified').length,
|
|
119
|
+
fixesBestEffort: fixes.filter((f) => f.status === 'best-effort').length,
|
|
120
|
+
fixesReverted: fixes.filter((f) => f.status === 'reverted').length,
|
|
121
|
+
fixesSkipped: fixes.filter((f) => f.status === 'skipped').length,
|
|
122
|
+
healthScoreBefore,
|
|
123
|
+
healthScoreAfter,
|
|
124
|
+
durationMs: Date.now() - startTime,
|
|
125
|
+
tokensUsed,
|
|
126
|
+
costUSD,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
async function fixSingleFinding(client, model, config, finding, toolCtx) {
|
|
130
|
+
const messages = [];
|
|
131
|
+
let tokensUsed = 0;
|
|
132
|
+
let costUSD = 0;
|
|
133
|
+
let commitHash;
|
|
134
|
+
let filesChanged = [];
|
|
135
|
+
let beforeScreenshot;
|
|
136
|
+
let afterScreenshot;
|
|
137
|
+
let status = 'skipped';
|
|
138
|
+
// Take "before" screenshot
|
|
139
|
+
try {
|
|
140
|
+
toolCtx.screenshotCounter++;
|
|
141
|
+
const label = `before-fix-${finding.id.slice(-6)}`;
|
|
142
|
+
const path = `${toolCtx.screenshotDir}/${String(toolCtx.screenshotCounter).padStart(3, '0')}-${label}.png`;
|
|
143
|
+
toolCtx.browser.open(finding.evidence.url.startsWith('http') ? finding.evidence.url : `${config.baseUrl}${finding.evidence.url}`);
|
|
144
|
+
toolCtx.browser.screenshot(path);
|
|
145
|
+
beforeScreenshot = path;
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
// Non-critical
|
|
149
|
+
}
|
|
150
|
+
messages.push({ role: 'user', content: 'Fix the finding described in the system prompt. Start by searching for the relevant source code.' });
|
|
151
|
+
for (let iteration = 0; iteration < MAX_ITERATIONS_PER_FIX; iteration++) {
|
|
152
|
+
let response;
|
|
153
|
+
try {
|
|
154
|
+
response = await client.messages.create({
|
|
155
|
+
model,
|
|
156
|
+
max_tokens: 4096,
|
|
157
|
+
system: buildFixSystemPrompt(finding, config.baseUrl),
|
|
158
|
+
tools: FIX_TOOL_DEFINITIONS,
|
|
159
|
+
messages,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
logger.warn('Fix LLM call failed', { error: String(err) });
|
|
164
|
+
status = 'skipped';
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
// Track cost
|
|
168
|
+
const usage = response.usage;
|
|
169
|
+
const pricing = getPricing(model);
|
|
170
|
+
const inputCost = (usage.input_tokens / 1000000) * pricing.input;
|
|
171
|
+
const outputCost = (usage.output_tokens / 1000000) * pricing.output;
|
|
172
|
+
tokensUsed += usage.input_tokens + usage.output_tokens;
|
|
173
|
+
costUSD += inputCost + outputCost;
|
|
174
|
+
const assistantContent = response.content;
|
|
175
|
+
messages.push({ role: 'assistant', content: assistantContent });
|
|
176
|
+
// If no tool use, the agent is done
|
|
177
|
+
const toolUseBlocks = assistantContent.filter((b) => b.type === 'tool_use');
|
|
178
|
+
if (toolUseBlocks.length === 0) {
|
|
179
|
+
// Determine status from what happened
|
|
180
|
+
if (commitHash) {
|
|
181
|
+
status = afterScreenshot ? 'verified' : 'best-effort';
|
|
182
|
+
}
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
// Execute tools
|
|
186
|
+
const toolResults = [];
|
|
187
|
+
for (const block of toolUseBlocks) {
|
|
188
|
+
if (block.type !== 'tool_use')
|
|
189
|
+
continue;
|
|
190
|
+
const result = executeFixTool(toolCtx, block.name, block.input);
|
|
191
|
+
if (result.commitHash && block.name === 'git_commit') {
|
|
192
|
+
commitHash = result.commitHash;
|
|
193
|
+
}
|
|
194
|
+
if (result.filesChanged) {
|
|
195
|
+
filesChanged = [...filesChanged, ...result.filesChanged];
|
|
196
|
+
}
|
|
197
|
+
if (result.screenshotPath && block.name === 'verify_in_browser') {
|
|
198
|
+
afterScreenshot = result.screenshotPath;
|
|
199
|
+
}
|
|
200
|
+
if (block.name === 'git_revert') {
|
|
201
|
+
status = 'reverted';
|
|
202
|
+
commitHash = undefined;
|
|
203
|
+
filesChanged = [];
|
|
204
|
+
}
|
|
205
|
+
toolResults.push({
|
|
206
|
+
type: 'tool_result',
|
|
207
|
+
tool_use_id: block.id,
|
|
208
|
+
content: result.output,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
messages.push({ role: 'user', content: toolResults });
|
|
212
|
+
}
|
|
213
|
+
// If we have a commit but didn't get classified yet
|
|
214
|
+
if (status === 'skipped' && commitHash) {
|
|
215
|
+
status = 'best-effort';
|
|
216
|
+
}
|
|
217
|
+
return {
|
|
218
|
+
fix: {
|
|
219
|
+
findingId: finding.id,
|
|
220
|
+
status,
|
|
221
|
+
commitHash,
|
|
222
|
+
filesChanged: [...new Set(filesChanged)],
|
|
223
|
+
beforeScreenshot,
|
|
224
|
+
afterScreenshot,
|
|
225
|
+
},
|
|
226
|
+
tokensUsed,
|
|
227
|
+
costUSD,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
function severityOrder(severity) {
|
|
231
|
+
switch (severity) {
|
|
232
|
+
case 'critical': return 0;
|
|
233
|
+
case 'high': return 1;
|
|
234
|
+
case 'medium': return 2;
|
|
235
|
+
case 'low': return 3;
|
|
236
|
+
default: return 4;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { execFileSync } from 'child_process';
|
|
4
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
5
|
+
import { resolve, relative, sep } from 'path';
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Tool definitions for the fix agent (Anthropic tool_use schema)
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
export const FIX_TOOL_DEFINITIONS = [
|
|
10
|
+
{
|
|
11
|
+
name: 'read_file',
|
|
12
|
+
description: 'Read the contents of a source file. Use startLine/endLine for large files.',
|
|
13
|
+
input_schema: {
|
|
14
|
+
type: 'object',
|
|
15
|
+
properties: {
|
|
16
|
+
path: { type: 'string', description: 'Relative path from project root' },
|
|
17
|
+
start_line: { type: 'number', description: 'First line to read (1-based, optional)' },
|
|
18
|
+
end_line: { type: 'number', description: 'Last line to read (inclusive, optional)' },
|
|
19
|
+
},
|
|
20
|
+
required: ['path'],
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
name: 'write_file',
|
|
25
|
+
description: 'Write content to a file. For patches, read the file first, modify, and write back.',
|
|
26
|
+
input_schema: {
|
|
27
|
+
type: 'object',
|
|
28
|
+
properties: {
|
|
29
|
+
path: { type: 'string', description: 'Relative path from project root' },
|
|
30
|
+
content: { type: 'string', description: 'Full file content to write' },
|
|
31
|
+
},
|
|
32
|
+
required: ['path', 'content'],
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: 'search_code',
|
|
37
|
+
description: 'Search for a pattern in the codebase using grep. Returns matching lines with file paths and line numbers.',
|
|
38
|
+
input_schema: {
|
|
39
|
+
type: 'object',
|
|
40
|
+
properties: {
|
|
41
|
+
pattern: { type: 'string', description: 'Search pattern (regex supported)' },
|
|
42
|
+
glob: { type: 'string', description: 'File glob to restrict search (e.g. "*.tsx", "src/**/*.ts")' },
|
|
43
|
+
},
|
|
44
|
+
required: ['pattern'],
|
|
45
|
+
},
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: 'run_command',
|
|
49
|
+
description: 'Run an allowlisted shell command (e.g. type checking, build, lint). Not for arbitrary commands.',
|
|
50
|
+
input_schema: {
|
|
51
|
+
type: 'object',
|
|
52
|
+
properties: {
|
|
53
|
+
command: { type: 'string', description: 'Command to run (must be allowlisted)' },
|
|
54
|
+
},
|
|
55
|
+
required: ['command'],
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: 'git_commit',
|
|
60
|
+
description: 'Stage changed files and create an atomic commit.',
|
|
61
|
+
input_schema: {
|
|
62
|
+
type: 'object',
|
|
63
|
+
properties: {
|
|
64
|
+
message: { type: 'string', description: 'Commit message (format: fix(qa): ISSUE-{id} — {description})' },
|
|
65
|
+
files: {
|
|
66
|
+
type: 'array',
|
|
67
|
+
items: { type: 'string' },
|
|
68
|
+
description: 'Files to stage (relative paths)',
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
required: ['message', 'files'],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'git_revert',
|
|
76
|
+
description: 'Revert the most recent commit (HEAD).',
|
|
77
|
+
input_schema: {
|
|
78
|
+
type: 'object',
|
|
79
|
+
properties: {},
|
|
80
|
+
required: [],
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
name: 'verify_in_browser',
|
|
85
|
+
description: 'Navigate to a URL and take a screenshot to verify a fix. Returns the screenshot path and any console errors.',
|
|
86
|
+
input_schema: {
|
|
87
|
+
type: 'object',
|
|
88
|
+
properties: {
|
|
89
|
+
url: { type: 'string', description: 'URL to navigate to for verification' },
|
|
90
|
+
label: { type: 'string', description: 'Label for the screenshot (e.g. "after-fix-001")' },
|
|
91
|
+
},
|
|
92
|
+
required: ['url', 'label'],
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Security: path and command validation
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
const BLOCKED_PATHS = new Set(['.env', '.env.local', '.env.production', 'node_modules']);
|
|
100
|
+
function isPathSafe(projectRoot, filePath) {
|
|
101
|
+
const resolved = resolve(projectRoot, filePath);
|
|
102
|
+
const rel = relative(projectRoot, resolved);
|
|
103
|
+
// Must stay within project
|
|
104
|
+
if (rel.startsWith('..') || rel.startsWith(sep)) {
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
// Block sensitive files and directories
|
|
108
|
+
const parts = rel.split(sep);
|
|
109
|
+
for (const part of parts) {
|
|
110
|
+
if (BLOCKED_PATHS.has(part)) {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
const COMMAND_ALLOWLIST = [
|
|
117
|
+
/^npx tsc\b/,
|
|
118
|
+
/^npx eslint\b/,
|
|
119
|
+
/^npm run (build|lint|typecheck|check)\b/,
|
|
120
|
+
/^npx playwright test\b/,
|
|
121
|
+
];
|
|
122
|
+
function isCommandAllowed(command) {
|
|
123
|
+
return COMMAND_ALLOWLIST.some((re) => re.test(command.trim()));
|
|
124
|
+
}
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// Tool execution
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
export function executeFixTool(ctx, name, input) {
|
|
129
|
+
switch (name) {
|
|
130
|
+
case 'read_file': {
|
|
131
|
+
const filePath = String(input.path);
|
|
132
|
+
if (!isPathSafe(ctx.projectRoot, filePath)) {
|
|
133
|
+
return { output: `Blocked: "${filePath}" is outside the project or a restricted path.` };
|
|
134
|
+
}
|
|
135
|
+
const fullPath = resolve(ctx.projectRoot, filePath);
|
|
136
|
+
if (!existsSync(fullPath)) {
|
|
137
|
+
return { output: `File not found: ${filePath}` };
|
|
138
|
+
}
|
|
139
|
+
const content = readFileSync(fullPath, 'utf-8');
|
|
140
|
+
const lines = content.split('\n');
|
|
141
|
+
const startLine = Math.max(1, Number(input.start_line) || 1);
|
|
142
|
+
const endLine = Math.min(lines.length, Number(input.end_line) || lines.length);
|
|
143
|
+
const slice = lines.slice(startLine - 1, endLine);
|
|
144
|
+
const numbered = slice.map((l, i) => `${startLine + i}: ${l}`).join('\n');
|
|
145
|
+
return { output: numbered };
|
|
146
|
+
}
|
|
147
|
+
case 'write_file': {
|
|
148
|
+
const filePath = String(input.path);
|
|
149
|
+
if (!isPathSafe(ctx.projectRoot, filePath)) {
|
|
150
|
+
return { output: `Blocked: "${filePath}" is outside the project or a restricted path.` };
|
|
151
|
+
}
|
|
152
|
+
const fullPath = resolve(ctx.projectRoot, filePath);
|
|
153
|
+
writeFileSync(fullPath, String(input.content), 'utf-8');
|
|
154
|
+
return { output: `Written: ${filePath}`, filesChanged: [filePath] };
|
|
155
|
+
}
|
|
156
|
+
case 'search_code': {
|
|
157
|
+
const pattern = String(input.pattern);
|
|
158
|
+
const glob = input.glob ? String(input.glob) : undefined;
|
|
159
|
+
try {
|
|
160
|
+
const args = ['-rn', '--max-count=20', pattern];
|
|
161
|
+
if (glob) {
|
|
162
|
+
args.push('--include', glob);
|
|
163
|
+
}
|
|
164
|
+
args.push('.');
|
|
165
|
+
const result = execFileSync('grep', args, {
|
|
166
|
+
cwd: ctx.projectRoot,
|
|
167
|
+
encoding: 'utf-8',
|
|
168
|
+
timeout: 10000,
|
|
169
|
+
maxBuffer: 1024 * 1024,
|
|
170
|
+
});
|
|
171
|
+
return { output: result.trim() || 'No matches found.' };
|
|
172
|
+
}
|
|
173
|
+
catch (err) {
|
|
174
|
+
const error = err;
|
|
175
|
+
if (error.status === 1) {
|
|
176
|
+
return { output: 'No matches found.' };
|
|
177
|
+
}
|
|
178
|
+
return { output: `Search error: ${String(err)}` };
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
case 'run_command': {
|
|
182
|
+
const command = String(input.command).trim();
|
|
183
|
+
if (!isCommandAllowed(command)) {
|
|
184
|
+
return { output: `Blocked: "${command}" is not in the allowlist. Allowed: npx tsc, npx eslint, npm run build/lint/typecheck/check, npx playwright test.` };
|
|
185
|
+
}
|
|
186
|
+
try {
|
|
187
|
+
const parts = command.split(/\s+/);
|
|
188
|
+
const result = execFileSync(parts[0], parts.slice(1), {
|
|
189
|
+
cwd: ctx.projectRoot,
|
|
190
|
+
encoding: 'utf-8',
|
|
191
|
+
timeout: 60000,
|
|
192
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
193
|
+
});
|
|
194
|
+
return { output: result.trim() || '(no output)' };
|
|
195
|
+
}
|
|
196
|
+
catch (err) {
|
|
197
|
+
const error = err;
|
|
198
|
+
const stdout = error.stdout || '';
|
|
199
|
+
const stderr = error.stderr || '';
|
|
200
|
+
return { output: `Command failed:\n${stdout}\n${stderr}`.trim() };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
case 'git_commit': {
|
|
204
|
+
const message = String(input.message);
|
|
205
|
+
const files = Array.isArray(input.files) ? input.files.map(String) : [];
|
|
206
|
+
if (files.length === 0) {
|
|
207
|
+
return { output: 'No files specified for commit.' };
|
|
208
|
+
}
|
|
209
|
+
// Validate all files are safe
|
|
210
|
+
for (const f of files) {
|
|
211
|
+
if (!isPathSafe(ctx.projectRoot, f)) {
|
|
212
|
+
return { output: `Blocked: "${f}" is outside the project or a restricted path.` };
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
try {
|
|
216
|
+
execFileSync('git', ['add', ...files], { cwd: ctx.projectRoot, encoding: 'utf-8' });
|
|
217
|
+
execFileSync('git', ['commit', '-m', message], { cwd: ctx.projectRoot, encoding: 'utf-8' });
|
|
218
|
+
const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' }).trim();
|
|
219
|
+
return { output: `Committed: ${hash} — ${message}`, commitHash: hash, filesChanged: files };
|
|
220
|
+
}
|
|
221
|
+
catch (err) {
|
|
222
|
+
const error = err;
|
|
223
|
+
return { output: `Git commit failed: ${error.stderr || String(err)}` };
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
case 'git_revert': {
|
|
227
|
+
try {
|
|
228
|
+
execFileSync('git', ['revert', '--no-edit', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' });
|
|
229
|
+
const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' }).trim();
|
|
230
|
+
return { output: `Reverted HEAD. New HEAD: ${hash}`, commitHash: hash };
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
const error = err;
|
|
234
|
+
return { output: `Git revert failed: ${error.stderr || String(err)}` };
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
case 'verify_in_browser': {
|
|
238
|
+
const url = String(input.url);
|
|
239
|
+
const label = String(input.label || 'verify').replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
240
|
+
ctx.screenshotCounter++;
|
|
241
|
+
const filename = `${String(ctx.screenshotCounter).padStart(3, '0')}-${label}.png`;
|
|
242
|
+
const screenshotPath = `${ctx.screenshotDir}/${filename}`;
|
|
243
|
+
ctx.browser.open(url.startsWith('http') ? url : `${ctx.baseUrl}${url}`);
|
|
244
|
+
ctx.browser.screenshot(screenshotPath);
|
|
245
|
+
// Capture console errors
|
|
246
|
+
let consoleErrors = '';
|
|
247
|
+
try {
|
|
248
|
+
const raw = ctx.browser.evaluateInternal('JSON.stringify(window.__consoleErrors || [])');
|
|
249
|
+
const errors = JSON.parse(raw);
|
|
250
|
+
if (Array.isArray(errors) && errors.length > 0) {
|
|
251
|
+
consoleErrors = `\nConsole errors: ${errors.slice(-5).join('; ')}`;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
// Not available
|
|
256
|
+
}
|
|
257
|
+
return { output: `Screenshot saved: ${screenshotPath}${consoleErrors}`, screenshotPath };
|
|
258
|
+
}
|
|
259
|
+
default:
|
|
260
|
+
return { output: `Unknown fix tool: ${name}` };
|
|
261
|
+
}
|
|
262
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
/**
|
|
4
|
+
* Tracks fix-loop health using a WTF-likelihood heuristic.
|
|
5
|
+
*
|
|
6
|
+
* Accumulates risk based on:
|
|
7
|
+
* - Each revert: +15%
|
|
8
|
+
* - Each fix touching >3 files: +5%
|
|
9
|
+
* - After fix #15: +1% per additional fix
|
|
10
|
+
* - All-low-severity batch: +10%
|
|
11
|
+
* - Touching files outside the affected area: +20%
|
|
12
|
+
*
|
|
13
|
+
* When WTF > 20%, the fix loop should stop.
|
|
14
|
+
*/
|
|
15
|
+
export class WTFTracker {
|
|
16
|
+
constructor() {
|
|
17
|
+
this.wtf = 0;
|
|
18
|
+
this.totalFixes = 0;
|
|
19
|
+
this.consecutiveReverts = 0;
|
|
20
|
+
}
|
|
21
|
+
recordAttempt(status, filesChanged) {
|
|
22
|
+
this.totalFixes++;
|
|
23
|
+
if (status === 'reverted') {
|
|
24
|
+
this.wtf += 15;
|
|
25
|
+
this.consecutiveReverts++;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
this.consecutiveReverts = 0;
|
|
29
|
+
}
|
|
30
|
+
if (filesChanged > 3) {
|
|
31
|
+
this.wtf += 5;
|
|
32
|
+
}
|
|
33
|
+
if (this.totalFixes > 15) {
|
|
34
|
+
this.wtf += 1;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
recordUnrelatedFileTouch() {
|
|
38
|
+
this.wtf += 20;
|
|
39
|
+
}
|
|
40
|
+
recordAllLowSeverityBatch() {
|
|
41
|
+
this.wtf += 10;
|
|
42
|
+
}
|
|
43
|
+
shouldStop() {
|
|
44
|
+
if (this.totalFixes >= WTFTracker.MAX_FIXES) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
if (this.consecutiveReverts >= 3) {
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
return this.wtf > 20;
|
|
51
|
+
}
|
|
52
|
+
get score() {
|
|
53
|
+
return this.wtf;
|
|
54
|
+
}
|
|
55
|
+
get fixes() {
|
|
56
|
+
return this.totalFixes;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/** Hard cap — stop regardless after this many fixes. */
|
|
60
|
+
WTFTracker.MAX_FIXES = 50;
|