@yasserkhanorg/impact-gate 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +1 -1
  2. package/dist/cli/commands/install_skill.d.ts +2 -0
  3. package/dist/cli/commands/install_skill.d.ts.map +1 -0
  4. package/dist/cli/commands/install_skill.js +60 -0
  5. package/dist/cli/parse_args.js +1 -1
  6. package/dist/cli/types.d.ts +1 -1
  7. package/dist/cli/types.d.ts.map +1 -1
  8. package/dist/cli/usage.d.ts.map +1 -1
  9. package/dist/cli/usage.js +1 -0
  10. package/dist/cli.js +7 -1
  11. package/dist/esm/cli/commands/install_skill.js +57 -0
  12. package/dist/esm/cli/parse_args.js +1 -1
  13. package/dist/esm/cli/usage.js +1 -0
  14. package/dist/esm/cli.js +7 -1
  15. package/dist/esm/qa-agent/cli.js +26 -0
  16. package/dist/esm/qa-agent/finding_taxonomy.js +102 -0
  17. package/dist/esm/qa-agent/health_score.js +99 -0
  18. package/dist/esm/qa-agent/orchestrator.js +67 -9
  19. package/dist/esm/qa-agent/phase2/agent_loop.js +13 -1
  20. package/dist/esm/qa-agent/phase2/tools.js +10 -4
  21. package/dist/esm/qa-agent/phase25/fix_loop.js +238 -0
  22. package/dist/esm/qa-agent/phase25/fix_tools.js +262 -0
  23. package/dist/esm/qa-agent/phase25/wtf_heuristic.js +60 -0
  24. package/dist/esm/qa-agent/phase3/reporter.js +100 -30
  25. package/dist/esm/qa-agent/phase3/verdict.js +21 -3
  26. package/dist/esm/qa-agent/regression/baseline.js +89 -0
  27. package/dist/qa-agent/cli.js +26 -0
  28. package/dist/qa-agent/finding_taxonomy.d.ts +23 -0
  29. package/dist/qa-agent/finding_taxonomy.d.ts.map +1 -0
  30. package/dist/qa-agent/finding_taxonomy.js +108 -0
  31. package/dist/qa-agent/health_score.d.ts +19 -0
  32. package/dist/qa-agent/health_score.d.ts.map +1 -0
  33. package/dist/qa-agent/health_score.js +104 -0
  34. package/dist/qa-agent/orchestrator.d.ts.map +1 -1
  35. package/dist/qa-agent/orchestrator.js +67 -9
  36. package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -1
  37. package/dist/qa-agent/phase2/agent_loop.js +13 -1
  38. package/dist/qa-agent/phase2/tools.d.ts.map +1 -1
  39. package/dist/qa-agent/phase2/tools.js +10 -4
  40. package/dist/qa-agent/phase25/fix_loop.d.ts +4 -0
  41. package/dist/qa-agent/phase25/fix_loop.d.ts.map +1 -0
  42. package/dist/qa-agent/phase25/fix_loop.js +244 -0
  43. package/dist/qa-agent/phase25/fix_tools.d.ts +18 -0
  44. package/dist/qa-agent/phase25/fix_tools.d.ts.map +1 -0
  45. package/dist/qa-agent/phase25/fix_tools.js +266 -0
  46. package/dist/qa-agent/phase25/wtf_heuristic.d.ts +27 -0
  47. package/dist/qa-agent/phase25/wtf_heuristic.d.ts.map +1 -0
  48. package/dist/qa-agent/phase25/wtf_heuristic.js +64 -0
  49. package/dist/qa-agent/phase3/reporter.d.ts +2 -2
  50. package/dist/qa-agent/phase3/reporter.d.ts.map +1 -1
  51. package/dist/qa-agent/phase3/reporter.js +100 -30
  52. package/dist/qa-agent/phase3/verdict.d.ts +2 -2
  53. package/dist/qa-agent/phase3/verdict.d.ts.map +1 -1
  54. package/dist/qa-agent/phase3/verdict.js +21 -3
  55. package/dist/qa-agent/regression/baseline.d.ts +14 -0
  56. package/dist/qa-agent/regression/baseline.d.ts.map +1 -0
  57. package/dist/qa-agent/regression/baseline.js +94 -0
  58. package/dist/qa-agent/types.d.ts +65 -2
  59. package/dist/qa-agent/types.d.ts.map +1 -1
  60. package/package.json +2 -1
  61. package/skills/qa/SKILL.md +138 -0
@@ -6,6 +6,7 @@ import { AgentBrowser } from './agent_browser.js';
6
6
  import { TOOL_DEFINITIONS, executeTool } from './tools.js';
7
7
  import { createExplorationState, recordAction, recordFinding, markFlowExplored, nextFlow, isStuck, isBudgetExhausted, allFlowsExplored, updateCost, compressActionsLog, } from './exploration_state.js';
8
8
  import { analyzeScreenshot } from './vision.js';
9
+ import { computeHealthScore } from '../health_score.js';
9
10
  const MAX_ITERATIONS = 200;
10
11
  const COMPRESS_EVERY = 20;
11
12
  const MAX_LLM_RETRIES = 2;
@@ -45,11 +46,21 @@ For each flow, pick 3-4 of the most relevant dimensions based on what the flow d
45
46
 
46
47
  Pick dimensions that matter for THIS flow. Example: for "channel settings" → permissions + edge cases + state persistence. For "messaging" → happy path + error recovery + console health. Do NOT mechanically follow all 7.
47
48
 
49
+ ## Finding Categories
50
+ When reporting findings, use the most specific category:
51
+ - **visual** — Layout breaks, broken images, z-index issues, alignment, animation glitches, dark mode problems
52
+ - **functional** — Broken links, dead buttons, form validation failures, incorrect redirects, race conditions, state not persisting
53
+ - **ux** — Confusing navigation, missing loading indicators, slow interactions (>500ms), unclear error messages, no confirmation before destructive actions
54
+ - **content** — Typos, grammar errors, placeholder/lorem ipsum left in, truncated text, wrong labels
55
+ - **performance** — Slow page loads (>3s), janky scrolling, layout shifts (CLS), excessive network requests
56
+ - **console** — JavaScript exceptions, failed network requests (4xx/5xx), CORS errors, mixed content warnings
57
+ - **accessibility** — Missing alt text, unlabeled inputs, broken keyboard navigation, focus traps, insufficient contrast
58
+
48
59
  ## Rules
49
60
  1. Use the accessibility snapshot (provided after each action) to understand the page.
50
61
  2. Use click/fill/press_key to interact. References look like @e1, @e2, etc.
51
62
  3. Use wait_for to wait for elements to appear/disappear or for the page to settle after actions.
52
- 4. Report findings immediately with report_finding — include severity, expected vs actual behavior, and repro steps.
63
+ 4. Report findings immediately with report_finding — use the specific category above, include severity, expected vs actual behavior, and repro steps.
53
64
  5. When you find a bug: take a screenshot BEFORE triggering the action and AFTER. Include expected vs actual behavior in the finding.
54
65
  6. Mark flows done with mark_flow_done when you've tested them thoroughly.
55
66
  7. Use take_screenshot sparingly — only for evidence of bugs or new flow entry.
@@ -329,6 +340,7 @@ export async function runAgentLoop(config, flows) {
329
340
  tokensUsed: state.tokensUsed,
330
341
  costUSD: state.costUSD,
331
342
  durationMs: Date.now() - state.startTime,
343
+ healthScore: computeHealthScore(state.findings),
332
344
  };
333
345
  }
334
346
  async function runVisionPass(config, state, browser, screenshotDir) {
@@ -1,5 +1,6 @@
1
1
  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
2
  // See LICENSE.txt for license information.
3
+ import { normalizeFindingType } from '../finding_taxonomy.js';
3
4
  // ---------------------------------------------------------------------------
4
5
  // Tool definitions (Anthropic tool_use schema)
5
6
  // ---------------------------------------------------------------------------
@@ -94,11 +95,11 @@ export const TOOL_DEFINITIONS = [
94
95
  },
95
96
  {
96
97
  name: 'report_finding',
97
- description: 'Report a bug, visual issue, UX problem, or gap you discovered. Include expected/actual behavior and repro steps. Take before/after screenshots before calling this.',
98
+ description: 'Report a finding. Categories: visual (layout/images/alignment), functional (broken links/buttons/forms/state), ux (navigation/loading/error messages), content (typos/placeholder text/labels), performance (slow loads/layout shifts), console (JS errors/network failures), accessibility (alt text/keyboard nav/ARIA/contrast). Legacy types (bug, visual-regression, ux-issue, gap) are also accepted.',
98
99
  input_schema: {
99
100
  type: 'object',
100
101
  properties: {
101
- type: { type: 'string', enum: ['bug', 'visual-regression', 'ux-issue', 'gap'] },
102
+ type: { type: 'string', enum: ['visual', 'functional', 'ux', 'content', 'performance', 'console', 'accessibility', 'bug', 'visual-regression', 'ux-issue', 'gap'] },
102
103
  severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
103
104
  summary: { type: 'string', description: 'What you found' },
104
105
  repro_steps: {
@@ -215,13 +216,18 @@ export function executeTool(ctx, name, input) {
215
216
  return { output: text || '(empty)' };
216
217
  }
217
218
  case 'report_finding': {
218
- const VALID_TYPES = new Set(['bug', 'visual-regression', 'ux-issue', 'gap']);
219
+ const VALID_TYPES = new Set([
220
+ 'visual', 'functional', 'ux', 'content', 'performance', 'console', 'accessibility',
221
+ 'bug', 'visual-regression', 'ux-issue', 'gap',
222
+ ]);
219
223
  const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
220
224
  const rawType = String(input.type);
221
225
  const rawSeverity = String(input.severity);
222
226
  if (!VALID_TYPES.has(rawType)) {
223
227
  return { output: `Invalid finding type "${rawType}". Must be one of: ${[...VALID_TYPES].join(', ')}.` };
224
228
  }
229
+ // Normalize legacy types to canonical categories for health scoring
230
+ const canonicalType = normalizeFindingType(rawType);
225
231
  if (!VALID_SEVERITIES.has(rawSeverity)) {
226
232
  return { output: `Invalid severity "${rawSeverity}". Must be one of: ${[...VALID_SEVERITIES].join(', ')}.` };
227
233
  }
@@ -260,7 +266,7 @@ export function executeTool(ctx, name, input) {
260
266
  }
261
267
  const finding = {
262
268
  id: `f-${crypto.randomUUID()}`,
263
- type: rawType,
269
+ type: canonicalType,
264
270
  severity: rawSeverity,
265
271
  summary: String(input.summary),
266
272
  flow: ctx.currentFlow,
@@ -0,0 +1,238 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import Anthropic from '@anthropic-ai/sdk';
4
+ import { logger } from '../../logger.js';
5
+ import { computeHealthScore } from '../health_score.js';
6
+ import { isFixable } from '../finding_taxonomy.js';
7
+ import { FIX_TOOL_DEFINITIONS, executeFixTool } from './fix_tools.js';
8
+ import { WTFTracker } from './wtf_heuristic.js';
9
+ const MAX_ITERATIONS_PER_FIX = 15;
10
+ // Pricing per 1M tokens by model prefix
11
+ const MODEL_PRICING = {
12
+ 'claude-sonnet': { input: 3, output: 15 },
13
+ 'claude-haiku': { input: 0.25, output: 1.25 },
14
+ 'claude-opus': { input: 15, output: 75 },
15
+ };
16
+ function getPricing(model) {
17
+ for (const [prefix, pricing] of Object.entries(MODEL_PRICING)) {
18
+ if (model.startsWith(prefix))
19
+ return pricing;
20
+ }
21
+ return { input: 3, output: 15 };
22
+ }
23
+ function buildFixSystemPrompt(finding, baseUrl) {
24
+ const evidence = finding.evidence;
25
+ return `You are a bug-fix engineer. Fix the following QA finding with the MINIMAL code change needed.
26
+
27
+ ## Finding
28
+ - **ID:** ${finding.id}
29
+ - **Type:** ${finding.type}
30
+ - **Severity:** ${finding.severity}
31
+ - **Summary:** ${finding.summary}
32
+ - **URL:** ${evidence.url}
33
+ - **Expected:** ${evidence.expectedBehavior || 'Not specified'}
34
+ - **Actual:** ${evidence.actualBehavior || 'Not specified'}
35
+ - **Repro steps:** ${evidence.reproSteps.map((s, i) => `${i + 1}. ${s}`).join('\n')}
36
+ ${evidence.consoleErrors?.length ? `- **Console errors:** ${evidence.consoleErrors.join('; ')}` : ''}
37
+
38
+ ## Workflow
39
+ 1. Use search_code to find the responsible source file(s)
40
+ 2. Use read_file to understand the code
41
+ 3. Use write_file to make the minimal fix
42
+ 4. Use run_command to check types (npx tsc --noEmit) or lint
43
+ 5. Use git_commit to create an atomic commit: fix(qa): ${finding.id} — {description}
44
+ 6. Use verify_in_browser to navigate to ${evidence.url} and check the fix worked
45
+
46
+ ## Rules
47
+ - Make the SMALLEST change that fixes the issue. Do NOT refactor surrounding code.
48
+ - Only modify files directly related to the bug.
49
+ - If you can't find the source after 3 search attempts, report that the fix is not possible.
50
+ - If type checking fails after your fix, revert with git_revert.
51
+ - The base URL is ${baseUrl}.
52
+ - When done, respond with text only (no tool use) explaining the result.`;
53
+ }
54
+ export async function runFixLoop(config, findings, browser, projectRoot) {
55
+ const startTime = Date.now();
56
+ const tier = config.fixTier || 'standard';
57
+ const fixes = [];
58
+ const wtf = new WTFTracker();
59
+ let tokensUsed = 0;
60
+ let costUSD = 0;
61
+ // Budget: 40% of remaining total budget
62
+ const budgetUSD = config.budgetUSD * 0.4;
63
+ const healthScoreBefore = computeHealthScore(findings);
64
+ // Sort by severity (critical first) and filter by tier
65
+ const fixable = findings
66
+ .filter((f) => isFixable(f, tier))
67
+ .sort((a, b) => severityOrder(a.severity) - severityOrder(b.severity));
68
+ if (fixable.length === 0) {
69
+ logger.info('No fixable findings for tier', { tier });
70
+ return {
71
+ fixes: [],
72
+ fixesAttempted: 0,
73
+ fixesVerified: 0,
74
+ fixesBestEffort: 0,
75
+ fixesReverted: 0,
76
+ fixesSkipped: 0,
77
+ healthScoreBefore,
78
+ healthScoreAfter: healthScoreBefore,
79
+ durationMs: 0,
80
+ tokensUsed: 0,
81
+ costUSD: 0,
82
+ };
83
+ }
84
+ logger.info(`Fix loop: ${fixable.length} findings to fix (tier: ${tier})`);
85
+ const client = new Anthropic();
86
+ const model = process.env.QA_AGENT_MODEL || 'claude-sonnet-4-5-20250929';
87
+ const screenshotDir = config.screenshotDir || '.e2e-ai-agents/qa-screenshots';
88
+ const toolCtx = {
89
+ projectRoot,
90
+ browser,
91
+ baseUrl: config.baseUrl,
92
+ screenshotDir,
93
+ screenshotCounter: 100, // Start at 100 to avoid collisions with Phase 2 screenshots
94
+ };
95
+ for (const finding of fixable) {
96
+ if (wtf.shouldStop()) {
97
+ logger.warn(`WTF heuristic triggered (score: ${wtf.score}), stopping fix loop`);
98
+ // Mark remaining as skipped
99
+ fixes.push({ findingId: finding.id, status: 'skipped' });
100
+ continue;
101
+ }
102
+ if (costUSD >= budgetUSD) {
103
+ logger.info('Fix loop budget exhausted');
104
+ fixes.push({ findingId: finding.id, status: 'skipped' });
105
+ continue;
106
+ }
107
+ logger.info(`Fixing: [${finding.severity}] ${finding.summary}`);
108
+ const result = await fixSingleFinding(client, model, config, finding, toolCtx);
109
+ fixes.push(result.fix);
110
+ tokensUsed += result.tokensUsed;
111
+ costUSD += result.costUSD;
112
+ wtf.recordAttempt(result.fix.status, result.fix.filesChanged?.length || 0);
113
+ }
114
+ const healthScoreAfter = computeHealthScore(findings);
115
+ return {
116
+ fixes,
117
+ fixesAttempted: fixes.filter((f) => f.status !== 'skipped').length,
118
+ fixesVerified: fixes.filter((f) => f.status === 'verified').length,
119
+ fixesBestEffort: fixes.filter((f) => f.status === 'best-effort').length,
120
+ fixesReverted: fixes.filter((f) => f.status === 'reverted').length,
121
+ fixesSkipped: fixes.filter((f) => f.status === 'skipped').length,
122
+ healthScoreBefore,
123
+ healthScoreAfter,
124
+ durationMs: Date.now() - startTime,
125
+ tokensUsed,
126
+ costUSD,
127
+ };
128
+ }
129
+ async function fixSingleFinding(client, model, config, finding, toolCtx) {
130
+ const messages = [];
131
+ let tokensUsed = 0;
132
+ let costUSD = 0;
133
+ let commitHash;
134
+ let filesChanged = [];
135
+ let beforeScreenshot;
136
+ let afterScreenshot;
137
+ let status = 'skipped';
138
+ // Take "before" screenshot
139
+ try {
140
+ toolCtx.screenshotCounter++;
141
+ const label = `before-fix-${finding.id.slice(-6)}`;
142
+ const path = `${toolCtx.screenshotDir}/${String(toolCtx.screenshotCounter).padStart(3, '0')}-${label}.png`;
143
+ toolCtx.browser.open(finding.evidence.url.startsWith('http') ? finding.evidence.url : `${config.baseUrl}${finding.evidence.url}`);
144
+ toolCtx.browser.screenshot(path);
145
+ beforeScreenshot = path;
146
+ }
147
+ catch {
148
+ // Non-critical
149
+ }
150
+ messages.push({ role: 'user', content: 'Fix the finding described in the system prompt. Start by searching for the relevant source code.' });
151
+ for (let iteration = 0; iteration < MAX_ITERATIONS_PER_FIX; iteration++) {
152
+ let response;
153
+ try {
154
+ response = await client.messages.create({
155
+ model,
156
+ max_tokens: 4096,
157
+ system: buildFixSystemPrompt(finding, config.baseUrl),
158
+ tools: FIX_TOOL_DEFINITIONS,
159
+ messages,
160
+ });
161
+ }
162
+ catch (err) {
163
+ logger.warn('Fix LLM call failed', { error: String(err) });
164
+ status = 'skipped';
165
+ break;
166
+ }
167
+ // Track cost
168
+ const usage = response.usage;
169
+ const pricing = getPricing(model);
170
+ const inputCost = (usage.input_tokens / 1000000) * pricing.input;
171
+ const outputCost = (usage.output_tokens / 1000000) * pricing.output;
172
+ tokensUsed += usage.input_tokens + usage.output_tokens;
173
+ costUSD += inputCost + outputCost;
174
+ const assistantContent = response.content;
175
+ messages.push({ role: 'assistant', content: assistantContent });
176
+ // If no tool use, the agent is done
177
+ const toolUseBlocks = assistantContent.filter((b) => b.type === 'tool_use');
178
+ if (toolUseBlocks.length === 0) {
179
+ // Determine status from what happened
180
+ if (commitHash) {
181
+ status = afterScreenshot ? 'verified' : 'best-effort';
182
+ }
183
+ break;
184
+ }
185
+ // Execute tools
186
+ const toolResults = [];
187
+ for (const block of toolUseBlocks) {
188
+ if (block.type !== 'tool_use')
189
+ continue;
190
+ const result = executeFixTool(toolCtx, block.name, block.input);
191
+ if (result.commitHash && block.name === 'git_commit') {
192
+ commitHash = result.commitHash;
193
+ }
194
+ if (result.filesChanged) {
195
+ filesChanged = [...filesChanged, ...result.filesChanged];
196
+ }
197
+ if (result.screenshotPath && block.name === 'verify_in_browser') {
198
+ afterScreenshot = result.screenshotPath;
199
+ }
200
+ if (block.name === 'git_revert') {
201
+ status = 'reverted';
202
+ commitHash = undefined;
203
+ filesChanged = [];
204
+ }
205
+ toolResults.push({
206
+ type: 'tool_result',
207
+ tool_use_id: block.id,
208
+ content: result.output,
209
+ });
210
+ }
211
+ messages.push({ role: 'user', content: toolResults });
212
+ }
213
+ // If we have a commit but didn't get classified yet
214
+ if (status === 'skipped' && commitHash) {
215
+ status = 'best-effort';
216
+ }
217
+ return {
218
+ fix: {
219
+ findingId: finding.id,
220
+ status,
221
+ commitHash,
222
+ filesChanged: [...new Set(filesChanged)],
223
+ beforeScreenshot,
224
+ afterScreenshot,
225
+ },
226
+ tokensUsed,
227
+ costUSD,
228
+ };
229
+ }
230
+ function severityOrder(severity) {
231
+ switch (severity) {
232
+ case 'critical': return 0;
233
+ case 'high': return 1;
234
+ case 'medium': return 2;
235
+ case 'low': return 3;
236
+ default: return 4;
237
+ }
238
+ }
@@ -0,0 +1,262 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { execFileSync } from 'child_process';
4
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
5
+ import { resolve, relative, sep } from 'path';
6
+ // ---------------------------------------------------------------------------
7
+ // Tool definitions for the fix agent (Anthropic tool_use schema)
8
+ // ---------------------------------------------------------------------------
9
+ export const FIX_TOOL_DEFINITIONS = [
10
+ {
11
+ name: 'read_file',
12
+ description: 'Read the contents of a source file. Use startLine/endLine for large files.',
13
+ input_schema: {
14
+ type: 'object',
15
+ properties: {
16
+ path: { type: 'string', description: 'Relative path from project root' },
17
+ start_line: { type: 'number', description: 'First line to read (1-based, optional)' },
18
+ end_line: { type: 'number', description: 'Last line to read (inclusive, optional)' },
19
+ },
20
+ required: ['path'],
21
+ },
22
+ },
23
+ {
24
+ name: 'write_file',
25
+ description: 'Write content to a file. For patches, read the file first, modify, and write back.',
26
+ input_schema: {
27
+ type: 'object',
28
+ properties: {
29
+ path: { type: 'string', description: 'Relative path from project root' },
30
+ content: { type: 'string', description: 'Full file content to write' },
31
+ },
32
+ required: ['path', 'content'],
33
+ },
34
+ },
35
+ {
36
+ name: 'search_code',
37
+ description: 'Search for a pattern in the codebase using grep. Returns matching lines with file paths and line numbers.',
38
+ input_schema: {
39
+ type: 'object',
40
+ properties: {
41
+ pattern: { type: 'string', description: 'Search pattern (regex supported)' },
42
+ glob: { type: 'string', description: 'File glob to restrict search (e.g. "*.tsx", "src/**/*.ts")' },
43
+ },
44
+ required: ['pattern'],
45
+ },
46
+ },
47
+ {
48
+ name: 'run_command',
49
+ description: 'Run an allowlisted shell command (e.g. type checking, build, lint). Not for arbitrary commands.',
50
+ input_schema: {
51
+ type: 'object',
52
+ properties: {
53
+ command: { type: 'string', description: 'Command to run (must be allowlisted)' },
54
+ },
55
+ required: ['command'],
56
+ },
57
+ },
58
+ {
59
+ name: 'git_commit',
60
+ description: 'Stage changed files and create an atomic commit.',
61
+ input_schema: {
62
+ type: 'object',
63
+ properties: {
64
+ message: { type: 'string', description: 'Commit message (format: fix(qa): ISSUE-{id} — {description})' },
65
+ files: {
66
+ type: 'array',
67
+ items: { type: 'string' },
68
+ description: 'Files to stage (relative paths)',
69
+ },
70
+ },
71
+ required: ['message', 'files'],
72
+ },
73
+ },
74
+ {
75
+ name: 'git_revert',
76
+ description: 'Revert the most recent commit (HEAD).',
77
+ input_schema: {
78
+ type: 'object',
79
+ properties: {},
80
+ required: [],
81
+ },
82
+ },
83
+ {
84
+ name: 'verify_in_browser',
85
+ description: 'Navigate to a URL and take a screenshot to verify a fix. Returns the screenshot path and any console errors.',
86
+ input_schema: {
87
+ type: 'object',
88
+ properties: {
89
+ url: { type: 'string', description: 'URL to navigate to for verification' },
90
+ label: { type: 'string', description: 'Label for the screenshot (e.g. "after-fix-001")' },
91
+ },
92
+ required: ['url', 'label'],
93
+ },
94
+ },
95
+ ];
96
+ // ---------------------------------------------------------------------------
97
+ // Security: path and command validation
98
+ // ---------------------------------------------------------------------------
99
+ const BLOCKED_PATHS = new Set(['.env', '.env.local', '.env.production', 'node_modules']);
100
+ function isPathSafe(projectRoot, filePath) {
101
+ const resolved = resolve(projectRoot, filePath);
102
+ const rel = relative(projectRoot, resolved);
103
+ // Must stay within project
104
+ if (rel.startsWith('..') || rel.startsWith(sep)) {
105
+ return false;
106
+ }
107
+ // Block sensitive files and directories
108
+ const parts = rel.split(sep);
109
+ for (const part of parts) {
110
+ if (BLOCKED_PATHS.has(part)) {
111
+ return false;
112
+ }
113
+ }
114
+ return true;
115
+ }
116
+ const COMMAND_ALLOWLIST = [
117
+ /^npx tsc\b/,
118
+ /^npx eslint\b/,
119
+ /^npm run (build|lint|typecheck|check)\b/,
120
+ /^npx playwright test\b/,
121
+ ];
122
+ function isCommandAllowed(command) {
123
+ return COMMAND_ALLOWLIST.some((re) => re.test(command.trim()));
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // Tool execution
127
+ // ---------------------------------------------------------------------------
128
+ export function executeFixTool(ctx, name, input) {
129
+ switch (name) {
130
+ case 'read_file': {
131
+ const filePath = String(input.path);
132
+ if (!isPathSafe(ctx.projectRoot, filePath)) {
133
+ return { output: `Blocked: "${filePath}" is outside the project or a restricted path.` };
134
+ }
135
+ const fullPath = resolve(ctx.projectRoot, filePath);
136
+ if (!existsSync(fullPath)) {
137
+ return { output: `File not found: ${filePath}` };
138
+ }
139
+ const content = readFileSync(fullPath, 'utf-8');
140
+ const lines = content.split('\n');
141
+ const startLine = Math.max(1, Number(input.start_line) || 1);
142
+ const endLine = Math.min(lines.length, Number(input.end_line) || lines.length);
143
+ const slice = lines.slice(startLine - 1, endLine);
144
+ const numbered = slice.map((l, i) => `${startLine + i}: ${l}`).join('\n');
145
+ return { output: numbered };
146
+ }
147
+ case 'write_file': {
148
+ const filePath = String(input.path);
149
+ if (!isPathSafe(ctx.projectRoot, filePath)) {
150
+ return { output: `Blocked: "${filePath}" is outside the project or a restricted path.` };
151
+ }
152
+ const fullPath = resolve(ctx.projectRoot, filePath);
153
+ writeFileSync(fullPath, String(input.content), 'utf-8');
154
+ return { output: `Written: ${filePath}`, filesChanged: [filePath] };
155
+ }
156
+ case 'search_code': {
157
+ const pattern = String(input.pattern);
158
+ const glob = input.glob ? String(input.glob) : undefined;
159
+ try {
160
+ const args = ['-rn', '--max-count=20', pattern];
161
+ if (glob) {
162
+ args.push('--include', glob);
163
+ }
164
+ args.push('.');
165
+ const result = execFileSync('grep', args, {
166
+ cwd: ctx.projectRoot,
167
+ encoding: 'utf-8',
168
+ timeout: 10000,
169
+ maxBuffer: 1024 * 1024,
170
+ });
171
+ return { output: result.trim() || 'No matches found.' };
172
+ }
173
+ catch (err) {
174
+ const error = err;
175
+ if (error.status === 1) {
176
+ return { output: 'No matches found.' };
177
+ }
178
+ return { output: `Search error: ${String(err)}` };
179
+ }
180
+ }
181
+ case 'run_command': {
182
+ const command = String(input.command).trim();
183
+ if (!isCommandAllowed(command)) {
184
+ return { output: `Blocked: "${command}" is not in the allowlist. Allowed: npx tsc, npx eslint, npm run build/lint/typecheck/check, npx playwright test.` };
185
+ }
186
+ try {
187
+ const parts = command.split(/\s+/);
188
+ const result = execFileSync(parts[0], parts.slice(1), {
189
+ cwd: ctx.projectRoot,
190
+ encoding: 'utf-8',
191
+ timeout: 60000,
192
+ maxBuffer: 2 * 1024 * 1024,
193
+ });
194
+ return { output: result.trim() || '(no output)' };
195
+ }
196
+ catch (err) {
197
+ const error = err;
198
+ const stdout = error.stdout || '';
199
+ const stderr = error.stderr || '';
200
+ return { output: `Command failed:\n${stdout}\n${stderr}`.trim() };
201
+ }
202
+ }
203
+ case 'git_commit': {
204
+ const message = String(input.message);
205
+ const files = Array.isArray(input.files) ? input.files.map(String) : [];
206
+ if (files.length === 0) {
207
+ return { output: 'No files specified for commit.' };
208
+ }
209
+ // Validate all files are safe
210
+ for (const f of files) {
211
+ if (!isPathSafe(ctx.projectRoot, f)) {
212
+ return { output: `Blocked: "${f}" is outside the project or a restricted path.` };
213
+ }
214
+ }
215
+ try {
216
+ execFileSync('git', ['add', ...files], { cwd: ctx.projectRoot, encoding: 'utf-8' });
217
+ execFileSync('git', ['commit', '-m', message], { cwd: ctx.projectRoot, encoding: 'utf-8' });
218
+ const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' }).trim();
219
+ return { output: `Committed: ${hash} — ${message}`, commitHash: hash, filesChanged: files };
220
+ }
221
+ catch (err) {
222
+ const error = err;
223
+ return { output: `Git commit failed: ${error.stderr || String(err)}` };
224
+ }
225
+ }
226
+ case 'git_revert': {
227
+ try {
228
+ execFileSync('git', ['revert', '--no-edit', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' });
229
+ const hash = execFileSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ctx.projectRoot, encoding: 'utf-8' }).trim();
230
+ return { output: `Reverted HEAD. New HEAD: ${hash}`, commitHash: hash };
231
+ }
232
+ catch (err) {
233
+ const error = err;
234
+ return { output: `Git revert failed: ${error.stderr || String(err)}` };
235
+ }
236
+ }
237
+ case 'verify_in_browser': {
238
+ const url = String(input.url);
239
+ const label = String(input.label || 'verify').replace(/[^a-zA-Z0-9_-]/g, '_');
240
+ ctx.screenshotCounter++;
241
+ const filename = `${String(ctx.screenshotCounter).padStart(3, '0')}-${label}.png`;
242
+ const screenshotPath = `${ctx.screenshotDir}/${filename}`;
243
+ ctx.browser.open(url.startsWith('http') ? url : `${ctx.baseUrl}${url}`);
244
+ ctx.browser.screenshot(screenshotPath);
245
+ // Capture console errors
246
+ let consoleErrors = '';
247
+ try {
248
+ const raw = ctx.browser.evaluateInternal('JSON.stringify(window.__consoleErrors || [])');
249
+ const errors = JSON.parse(raw);
250
+ if (Array.isArray(errors) && errors.length > 0) {
251
+ consoleErrors = `\nConsole errors: ${errors.slice(-5).join('; ')}`;
252
+ }
253
+ }
254
+ catch {
255
+ // Not available
256
+ }
257
+ return { output: `Screenshot saved: ${screenshotPath}${consoleErrors}`, screenshotPath };
258
+ }
259
+ default:
260
+ return { output: `Unknown fix tool: ${name}` };
261
+ }
262
+ }
@@ -0,0 +1,60 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ /**
4
+ * Tracks fix-loop health using a WTF-likelihood heuristic.
5
+ *
6
+ * Accumulates risk based on:
7
+ * - Each revert: +15%
8
+ * - Each fix touching >3 files: +5%
9
+ * - After fix #15: +1% per additional fix
10
+ * - All-low-severity batch: +10%
11
+ * - Touching files outside the affected area: +20%
12
+ *
13
+ * When WTF > 20%, the fix loop should stop.
14
+ */
15
+ export class WTFTracker {
16
+ constructor() {
17
+ this.wtf = 0;
18
+ this.totalFixes = 0;
19
+ this.consecutiveReverts = 0;
20
+ }
21
+ recordAttempt(status, filesChanged) {
22
+ this.totalFixes++;
23
+ if (status === 'reverted') {
24
+ this.wtf += 15;
25
+ this.consecutiveReverts++;
26
+ }
27
+ else {
28
+ this.consecutiveReverts = 0;
29
+ }
30
+ if (filesChanged > 3) {
31
+ this.wtf += 5;
32
+ }
33
+ if (this.totalFixes > 15) {
34
+ this.wtf += 1;
35
+ }
36
+ }
37
+ recordUnrelatedFileTouch() {
38
+ this.wtf += 20;
39
+ }
40
+ recordAllLowSeverityBatch() {
41
+ this.wtf += 10;
42
+ }
43
+ shouldStop() {
44
+ if (this.totalFixes >= WTFTracker.MAX_FIXES) {
45
+ return true;
46
+ }
47
+ if (this.consecutiveReverts >= 3) {
48
+ return true;
49
+ }
50
+ return this.wtf > 20;
51
+ }
52
+ get score() {
53
+ return this.wtf;
54
+ }
55
+ get fixes() {
56
+ return this.totalFixes;
57
+ }
58
+ }
59
+ /** Hard cap — stop regardless after this many fixes. */
60
+ WTFTracker.MAX_FIXES = 50;